Rename the 'cretonne' crate to 'cretonne-codegen'.

This fixes the next part of #287.
2018-04-17 08:48:02 -07:00
parent 7767186dd0
commit 24fa169e1f
254 changed files with 265 additions and 264 deletions
--- a/lib/codegen/src/regalloc/affinity.rs
+++ b/lib/codegen/src/regalloc/affinity.rs
@@ -0,0 +1,131 @@
+//! Value affinity for register allocation.
+//!
+//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class
+//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy
+//! instruction operand constraints.
+//!
+//! For values that want to be in registers, the affinity hint includes a register class or
+//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a
+//! larger register class instead.
+
+use ir::{AbiParam, ArgumentLoc};
+use isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa};
+use std::fmt;
+
+/// Preferred register allocation for an SSA value.
+#[derive(Clone, Copy, Debug)]
+pub enum Affinity {
+    /// No affinity.
+    ///
+    /// This indicates a value that is not defined or used by any real instructions. It is a ghost
+    /// value that won't appear in the final program.
+    None,
+
+    /// This value should be placed in a spill slot on the stack.
+    Stack,
+
+    /// This value prefers a register from the given register class.
+    Reg(RegClassIndex),
+}
+
+impl Default for Affinity {
+    fn default() -> Self {
+        Affinity::None
+    }
+}
+
+impl Affinity {
+    /// Create an affinity that satisfies a single constraint.
+    ///
+    /// This will never create an `Affinity::None`.
+    /// Use the `Default` implementation for that.
+    pub fn new(constraint: &OperandConstraint) -> Affinity {
+        if constraint.kind == ConstraintKind::Stack {
+            Affinity::Stack
+        } else {
+            Affinity::Reg(constraint.regclass.into())
+        }
+    }
+
+    /// Create an affinity that matches an ABI argument for `isa`.
+    pub fn abi(arg: &AbiParam, isa: &TargetIsa) -> Affinity {
+        match arg.location {
+            ArgumentLoc::Unassigned => Affinity::None,
+            ArgumentLoc::Reg(_) => Affinity::Reg(isa.regclass_for_abi_type(arg.value_type).into()),
+            ArgumentLoc::Stack(_) => Affinity::Stack,
+        }
+    }
+
+    /// Is this the `None` affinity?
+    pub fn is_none(self) -> bool {
+        match self {
+            Affinity::None => true,
+            _ => false,
+        }
+    }
+
+    /// Is this the `Reg` affinity?
+    pub fn is_reg(self) -> bool {
+        match self {
+            Affinity::Reg(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Is this the `Stack` affinity?
+    pub fn is_stack(self) -> bool {
+        match self {
+            Affinity::Stack => true,
+            _ => false,
+        }
+    }
+
+    /// Merge an operand constraint into this affinity.
+    ///
+    /// Note that this does not guarantee that the register allocator will pick a register that
+    /// satisfies the constraint.
+    pub fn merge(&mut self, constraint: &OperandConstraint, reg_info: &RegInfo) {
+        match *self {
+            Affinity::None => *self = Affinity::new(constraint),
+            Affinity::Reg(rc) => {
+                // If the preferred register class is a subclass of the constraint, there's no need
+                // to change anything.
+                if constraint.kind != ConstraintKind::Stack &&
+                    !constraint.regclass.has_subclass(rc)
+                {
+                    // If the register classes don't overlap, `intersect` returns `None`, and we
+                    // just keep our previous affinity.
+                    if let Some(subclass) = constraint.regclass.intersect_index(reg_info.rc(rc)) {
+                        // This constraint shrinks our preferred register class.
+                        *self = Affinity::Reg(subclass);
+                    }
+                }
+            }
+            Affinity::Stack => {}
+        }
+    }
+
+    /// Return an object that can display this value affinity, using the register info from the
+    /// target ISA.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayAffinity<'a> {
+        DisplayAffinity(self, regs.into())
+    }
+}
+
+/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA.
+pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayAffinity<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self.0 {
+            Affinity::None => write!(f, "none"),
+            Affinity::Stack => write!(f, "stack"),
+            Affinity::Reg(rci) => {
+                match self.1 {
+                    Some(regs) => write!(f, "{}", regs.rc(rci)),
+                    None => write!(f, "{}", rci),
+                }
+            }
+        }
+    }
+}
--- a/lib/codegen/src/regalloc/coalescing.rs
+++ b/lib/codegen/src/regalloc/coalescing.rs
--- a/lib/codegen/src/regalloc/coloring.rs
+++ b/lib/codegen/src/regalloc/coloring.rs
--- a/lib/codegen/src/regalloc/context.rs
+++ b/lib/codegen/src/regalloc/context.rs
@@ -0,0 +1,159 @@
+//! Register allocator context.
+//!
+//! The `Context` struct contains data structures that should be preserved across invocations of
+//! the register allocator algorithm. This doesn't preserve any data between functions, but it
+//! avoids allocating data structures independently for each function begin compiled.
+
+use dominator_tree::DominatorTree;
+use flowgraph::ControlFlowGraph;
+use ir::Function;
+use isa::TargetIsa;
+use regalloc::coalescing::Coalescing;
+use regalloc::coloring::Coloring;
+use regalloc::live_value_tracker::LiveValueTracker;
+use regalloc::liveness::Liveness;
+use regalloc::reload::Reload;
+use regalloc::spilling::Spilling;
+use regalloc::virtregs::VirtRegs;
+use result::CtonResult;
+use timing;
+use topo_order::TopoOrder;
+use verifier::{verify_context, verify_cssa, verify_liveness, verify_locations};
+
+/// Persistent memory allocations for register allocation.
+pub struct Context {
+    liveness: Liveness,
+    virtregs: VirtRegs,
+    coalescing: Coalescing,
+    topo: TopoOrder,
+    tracker: LiveValueTracker,
+    spilling: Spilling,
+    reload: Reload,
+    coloring: Coloring,
+}
+
+impl Context {
+    /// Create a new context for register allocation.
+    ///
+    /// This context should be reused for multiple functions in order to avoid repeated memory
+    /// allocations.
+    pub fn new() -> Self {
+        Self {
+            liveness: Liveness::new(),
+            virtregs: VirtRegs::new(),
+            coalescing: Coalescing::new(),
+            topo: TopoOrder::new(),
+            tracker: LiveValueTracker::new(),
+            spilling: Spilling::new(),
+            reload: Reload::new(),
+            coloring: Coloring::new(),
+        }
+    }
+
+    /// Clear all data structures in this context.
+    pub fn clear(&mut self) {
+        self.liveness.clear();
+        self.virtregs.clear();
+        self.coalescing.clear();
+        self.topo.clear();
+        self.tracker.clear();
+        self.spilling.clear();
+        self.reload.clear();
+        self.coloring.clear();
+    }
+
+    /// Allocate registers in `func`.
+    ///
+    /// After register allocation, all values in `func` have been assigned to a register or stack
+    /// location that is consistent with instruction encoding constraints.
+    pub fn run(
+        &mut self,
+        isa: &TargetIsa,
+        func: &mut Function,
+        cfg: &ControlFlowGraph,
+        domtree: &mut DominatorTree,
+    ) -> CtonResult {
+        let _tt = timing::regalloc();
+        debug_assert!(domtree.is_valid());
+
+        // `Liveness` and `Coloring` are self-clearing.
+        self.virtregs.clear();
+
+        // Tracker state (dominator live sets) is actually reused between the spilling and coloring
+        // phases.
+        self.tracker.clear();
+
+        // Pass: Liveness analysis.
+        self.liveness.compute(isa, func, cfg);
+
+        if isa.flags().enable_verifier() {
+            verify_liveness(isa, func, cfg, &self.liveness)?;
+        }
+
+        // Pass: Coalesce and create Conventional SSA form.
+        self.coalescing.conventional_ssa(
+            isa,
+            func,
+            cfg,
+            domtree,
+            &mut self.liveness,
+            &mut self.virtregs,
+        );
+
+        if isa.flags().enable_verifier() {
+            verify_context(func, cfg, domtree, isa)?;
+            verify_liveness(isa, func, cfg, &self.liveness)?;
+            verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
+        }
+
+        // Pass: Spilling.
+        self.spilling.run(
+            isa,
+            func,
+            domtree,
+            &mut self.liveness,
+            &self.virtregs,
+            &mut self.topo,
+            &mut self.tracker,
+        );
+
+        if isa.flags().enable_verifier() {
+            verify_context(func, cfg, domtree, isa)?;
+            verify_liveness(isa, func, cfg, &self.liveness)?;
+            verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
+        }
+
+        // Pass: Reload.
+        self.reload.run(
+            isa,
+            func,
+            domtree,
+            &mut self.liveness,
+            &mut self.topo,
+            &mut self.tracker,
+        );
+
+        if isa.flags().enable_verifier() {
+            verify_context(func, cfg, domtree, isa)?;
+            verify_liveness(isa, func, cfg, &self.liveness)?;
+            verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
+        }
+
+        // Pass: Coloring.
+        self.coloring.run(
+            isa,
+            func,
+            domtree,
+            &mut self.liveness,
+            &mut self.tracker,
+        );
+
+        if isa.flags().enable_verifier() {
+            verify_context(func, cfg, domtree, isa)?;
+            verify_liveness(isa, func, cfg, &self.liveness)?;
+            verify_locations(isa, func, Some(&self.liveness))?;
+            verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
+        }
+        Ok(())
+    }
+}
--- a/lib/codegen/src/regalloc/diversion.rs
+++ b/lib/codegen/src/regalloc/diversion.rs
@@ -0,0 +1,215 @@
+//! Register diversions.
+//!
+//! Normally, a value is assigned to a single register or stack location by the register allocator.
+//! Sometimes, it is necessary to move register values to a different register in order to satisfy
+//! instruction constraints.
+//!
+//! These register diversions are local to an EBB. No values can be diverted when entering a new
+//! EBB.
+
+use ir::{InstructionData, Opcode};
+use ir::{StackSlot, Value, ValueLoc, ValueLocations};
+use isa::{RegInfo, RegUnit};
+use std::fmt;
+use std::vec::Vec;
+
+/// A diversion of a value from its original location to a new register or stack location.
+///
+/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the
+/// same value.
+///
+/// When tracking diversions, the `from` field is the original assigned value location, and `to` is
+/// the current one.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct Diversion {
+    /// The value that is diverted.
+    pub value: Value,
+    /// The original value location.
+    pub from: ValueLoc,
+    /// The current value location.
+    pub to: ValueLoc,
+}
+
+impl Diversion {
+    /// Make a new diversion.
+    pub fn new(value: Value, from: ValueLoc, to: ValueLoc) -> Diversion {
+        debug_assert!(from.is_assigned() && to.is_assigned());
+        Diversion { value, from, to }
+    }
+}
+
+/// Keep track of diversions in an EBB.
+pub struct RegDiversions {
+    current: Vec<Diversion>,
+}
+
+impl RegDiversions {
+    /// Create a new empty diversion tracker.
+    pub fn new() -> Self {
+        Self { current: Vec::new() }
+    }
+
+    /// Clear the tracker, preparing for a new EBB.
+    pub fn clear(&mut self) {
+        self.current.clear()
+    }
+
+    /// Are there any diversions?
+    pub fn is_empty(&self) -> bool {
+        self.current.is_empty()
+    }
+
+    /// Get the current diversion of `value`, if any.
+    pub fn diversion(&self, value: Value) -> Option<&Diversion> {
+        self.current.iter().find(|d| d.value == value)
+    }
+
+    /// Get all current diversions.
+    pub fn all(&self) -> &[Diversion] {
+        self.current.as_slice()
+    }
+
+    /// Get the current location for `value`. Fall back to the assignment map for non-diverted
+    /// values
+    pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc {
+        match self.diversion(value) {
+            Some(d) => d.to,
+            None => locations[value],
+        }
+    }
+
+    /// Get the current register location for `value`, or panic if `value` isn't in a register.
+    pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit {
+        self.get(value, locations).unwrap_reg()
+    }
+
+    /// Get the current stack location for `value`, or panic if `value` isn't in a stack slot.
+    pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot {
+        self.get(value, locations).unwrap_stack()
+    }
+
+    /// Record any kind of move.
+    ///
+    /// The `from` location must match an existing `to` location, if any.
+    pub fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) {
+        debug_assert!(from.is_assigned() && to.is_assigned());
+        if let Some(i) = self.current.iter().position(|d| d.value == value) {
+            debug_assert_eq!(self.current[i].to, from, "Bad regmove chain for {}", value);
+            if self.current[i].from != to {
+                self.current[i].to = to;
+            } else {
+                self.current.swap_remove(i);
+            }
+        } else {
+            self.current.push(Diversion::new(value, from, to));
+        }
+    }
+
+    /// Record a register -> register move.
+    pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) {
+        self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to));
+    }
+
+    /// Record a register -> stack move.
+    pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) {
+        self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to));
+    }
+
+    /// Record a stack -> register move.
+    pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) {
+        self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to));
+    }
+
+    /// Apply the effect of `inst`.
+    ///
+    /// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to
+    /// match.
+    pub fn apply(&mut self, inst: &InstructionData) {
+        match *inst {
+            InstructionData::RegMove {
+                opcode: Opcode::Regmove,
+                arg,
+                src,
+                dst,
+            } => self.regmove(arg, src, dst),
+            InstructionData::RegSpill {
+                opcode: Opcode::Regspill,
+                arg,
+                src,
+                dst,
+            } => self.regspill(arg, src, dst),
+            InstructionData::RegFill {
+                opcode: Opcode::Regfill,
+                arg,
+                src,
+                dst,
+            } => self.regfill(arg, src, dst),
+            _ => {}
+        }
+    }
+
+    /// Drop any recorded move for `value`.
+    ///
+    /// Returns the `to` location of the removed diversion.
+    pub fn remove(&mut self, value: Value) -> Option<ValueLoc> {
+        self.current.iter().position(|d| d.value == value).map(
+            |i| {
+                self.current.swap_remove(i).to
+            },
+        )
+    }
+
+    /// Return an object that can display the diversions.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayDiversions<'a> {
+        DisplayDiversions(self, regs.into())
+    }
+}
+
+/// Object that displays register diversions.
+pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayDiversions<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{{")?;
+        for div in self.0.all() {
+            write!(
+                f,
+                " {}: {} -> {}",
+                div.value,
+                div.from.display(self.1),
+                div.to.display(self.1)
+            )?
+        }
+        write!(f, " }}")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use entity::EntityRef;
+    use ir::Value;
+
+    #[test]
+    fn inserts() {
+        let mut divs = RegDiversions::new();
+        let v1 = Value::new(1);
+        let v2 = Value::new(2);
+
+        divs.regmove(v1, 10, 12);
+        assert_eq!(
+            divs.diversion(v1),
+            Some(&Diversion {
+                value: v1,
+                from: ValueLoc::Reg(10),
+                to: ValueLoc::Reg(12),
+            })
+        );
+        assert_eq!(divs.diversion(v2), None);
+
+        divs.regmove(v1, 12, 11);
+        assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11));
+        divs.regmove(v1, 11, 10);
+        assert_eq!(divs.diversion(v1), None);
+    }
+}
--- a/lib/codegen/src/regalloc/live_value_tracker.rs
+++ b/lib/codegen/src/regalloc/live_value_tracker.rs
@@ -0,0 +1,348 @@
+//! Track which values are live in an EBB with instruction granularity.
+//!
+//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in an EBB.
+//! The sets of live values are computed on the fly as the tracker is moved from instruction to
+//! instruction, starting at the EBB header.
+
+use dominator_tree::DominatorTree;
+use entity::{EntityList, ListPool};
+use ir::{DataFlowGraph, Ebb, ExpandedProgramPoint, Inst, Layout, Value};
+use partition_slice::partition_slice;
+use regalloc::affinity::Affinity;
+use regalloc::liveness::Liveness;
+use regalloc::liverange::LiveRange;
+use std::collections::HashMap;
+use std::vec::Vec;
+
+type ValueList = EntityList<Value>;
+
+/// Compute and track live values throughout an EBB.
+pub struct LiveValueTracker {
+    /// The set of values that are live at the current program point.
+    live: LiveValueVec,
+
+    /// Saved set of live values for every jump and branch that can potentially be an immediate
+    /// dominator of an EBB.
+    ///
+    /// This is the set of values that are live *before* the branch.
+    idom_sets: HashMap<Inst, ValueList>,
+
+    /// Memory pool for the live sets.
+    idom_pool: ListPool<Value>,
+}
+
+/// Information about a value that is live at the current program point.
+#[derive(Debug)]
+pub struct LiveValue {
+    /// The live value.
+    pub value: Value,
+
+    /// The local ending point of the live range in the current EBB, as returned by
+    /// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`.
+    pub endpoint: Inst,
+
+    /// The affinity of the value as represented in its `LiveRange`.
+    ///
+    /// This value is simply a copy of the affinity stored in the live range. We copy it because
+    /// almost all users of `LiveValue` need to look at it.
+    pub affinity: Affinity,
+
+    /// The live range for this value never leaves its EBB.
+    pub is_local: bool,
+
+    /// This value is dead - the live range ends immediately.
+    pub is_dead: bool,
+}
+
+struct LiveValueVec {
+    /// The set of values that are live at the current program point.
+    values: Vec<LiveValue>,
+
+    /// How many values at the front of `values` are known to be live after `inst`?
+    ///
+    /// This is used to pass a much smaller slice to `partition_slice` when its called a second
+    /// time for the same instruction.
+    live_prefix: Option<(Inst, usize)>,
+}
+
+impl LiveValueVec {
+    fn new() -> Self {
+        Self {
+            values: Vec::new(),
+            live_prefix: None,
+        }
+    }
+
+    /// Add a new live value to `values`. Copy some properties from `lr`.
+    fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) {
+        self.values.push(LiveValue {
+            value,
+            endpoint,
+            affinity: lr.affinity,
+            is_local: lr.is_local(),
+            is_dead: lr.is_dead(),
+        });
+    }
+
+    /// Remove all elements.
+    fn clear(&mut self) {
+        self.values.clear();
+        self.live_prefix = None;
+    }
+
+    /// Make sure that the values killed by `next_inst` are moved to the end of the `values`
+    /// vector.
+    ///
+    /// Returns the number of values that will be live after `next_inst`.
+    fn live_after(&mut self, next_inst: Inst) -> usize {
+        // How many values at the front of the vector are already known to survive `next_inst`?
+        // We don't need to pass this prefix to `partition_slice()`
+        let keep = match self.live_prefix {
+            Some((i, prefix)) if i == next_inst => prefix,
+            _ => 0,
+        };
+
+        // Move the remaining surviving values to the front partition of the vector.
+        let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst);
+
+        // Remember the new prefix length in case we get called again for the same `next_inst`.
+        self.live_prefix = Some((next_inst, prefix));
+        prefix
+    }
+
+    /// Remove the values killed by `next_inst`.
+    fn remove_kill_values(&mut self, next_inst: Inst) {
+        let keep = self.live_after(next_inst);
+        self.values.truncate(keep);
+    }
+
+    /// Remove any dead values.
+    fn remove_dead_values(&mut self) {
+        self.values.retain(|v| !v.is_dead);
+        self.live_prefix = None;
+    }
+}
+
+impl LiveValueTracker {
+    /// Create a new blank tracker.
+    pub fn new() -> Self {
+        Self {
+            live: LiveValueVec::new(),
+            idom_sets: HashMap::new(),
+            idom_pool: ListPool::new(),
+        }
+    }
+
+    /// Clear all cached information.
+    pub fn clear(&mut self) {
+        self.live.clear();
+        self.idom_sets.clear();
+        self.idom_pool.clear();
+    }
+
+    /// Get the set of currently live values.
+    ///
+    /// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and
+    /// defined by the current instruction.
+    pub fn live(&self) -> &[LiveValue] {
+        &self.live.values
+    }
+
+    /// Get a mutable set of currently live values.
+    ///
+    /// Use with care and don't move entries around.
+    pub fn live_mut(&mut self) -> &mut [LiveValue] {
+        &mut self.live.values
+    }
+
+    /// Move the current position to the top of `ebb`.
+    ///
+    /// This depends on the stored live value set at `ebb`'s immediate dominator, so that must have
+    /// been visited first.
+    ///
+    /// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values
+    /// from the immediate dominator. The second slice is the set of `ebb` parameters.
+    ///
+    /// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them.
+    pub fn ebb_top(
+        &mut self,
+        ebb: Ebb,
+        dfg: &DataFlowGraph,
+        liveness: &Liveness,
+        layout: &Layout,
+        domtree: &DominatorTree,
+    ) -> (&[LiveValue], &[LiveValue]) {
+        // Start over, compute the set of live values at the top of the EBB from two sources:
+        //
+        // 1. Values that were live before `ebb`'s immediate dominator, filtered for those that are
+        //    actually live-in.
+        // 2. Arguments to `ebb` that are not dead.
+        //
+        self.live.clear();
+
+        // Compute the live-in values. Start by filtering the set of values that were live before
+        // the immediate dominator. Just use the empty set if there's no immediate dominator (i.e.,
+        // the entry block or an unreachable block).
+        if let Some(idom) = domtree.idom(ebb) {
+            // If the immediate dominator exits, we must have a stored list for it. This is a
+            // requirement to the order EBBs are visited: All dominators must have been processed
+            // before the current EBB.
+            let idom_live_list = self.idom_sets.get(&idom).expect(
+                "No stored live set for dominator",
+            );
+            let ctx = liveness.context(layout);
+            // Get just the values that are live-in to `ebb`.
+            for &value in idom_live_list.as_slice(&self.idom_pool) {
+                let lr = liveness.get(value).expect(
+                    "Immediate dominator value has no live range",
+                );
+
+                // Check if this value is live-in here.
+                if let Some(endpoint) = lr.livein_local_end(ebb, ctx) {
+                    self.live.push(value, endpoint, lr);
+                }
+            }
+        }
+
+        // Now add all the live parameters to `ebb`.
+        let first_arg = self.live.values.len();
+        for &value in dfg.ebb_params(ebb) {
+            let lr = &liveness[value];
+            debug_assert_eq!(lr.def(), ebb.into());
+            match lr.def_local_end().into() {
+                ExpandedProgramPoint::Inst(endpoint) => {
+                    self.live.push(value, endpoint, lr);
+                }
+                ExpandedProgramPoint::Ebb(local_ebb) => {
+                    // This is a dead EBB parameter which is not even live into the first
+                    // instruction in the EBB.
+                    debug_assert_eq!(
+                        local_ebb,
+                        ebb,
+                        "EBB parameter live range ends at wrong EBB header"
+                    );
+                    // Give this value a fake endpoint that is the first instruction in the EBB.
+                    // We expect it to be removed by calling `drop_dead_args()`.
+                    self.live.push(
+                        value,
+                        layout.first_inst(ebb).expect("Empty EBB"),
+                        lr,
+                    );
+                }
+            }
+        }
+
+        self.live.values.split_at(first_arg)
+    }
+
+    /// Prepare to move past `inst`.
+    ///
+    /// Determine the set of already live values that are killed by `inst`, and add the new defined
+    /// values to the tracked set.
+    ///
+    /// Returns `(throughs, kills, defs)` as a tuple of slices:
+    ///
+    /// 1. The `throughs` slice is the set of live-through values that are neither defined nor
+    ///    killed by the instruction.
+    /// 2. The `kills` slice is the set of values that were live before the instruction and are
+    ///    killed at the instruction. This does not include dead defs.
+    /// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes
+    ///    dead defines.
+    ///
+    /// The order of `throughs` and `kills` is arbitrary.
+    ///
+    /// The `drop_dead()` method must be called next to actually remove the dead values from the
+    /// tracked set after the two returned slices are no longer needed.
+    pub fn process_inst(
+        &mut self,
+        inst: Inst,
+        dfg: &DataFlowGraph,
+        liveness: &Liveness,
+    ) -> (&[LiveValue], &[LiveValue], &[LiveValue]) {
+        // Save a copy of the live values before any branches or jumps that could be somebody's
+        // immediate dominator.
+        if dfg[inst].opcode().is_branch() {
+            self.save_idom_live_set(inst);
+        }
+
+        // Move killed values to the end of the vector.
+        // Don't remove them yet, `drop_dead()` will do that.
+        let first_kill = self.live.live_after(inst);
+
+        // Add the values defined by `inst`.
+        let first_def = self.live.values.len();
+        for &value in dfg.inst_results(inst) {
+            let lr = &liveness[value];
+            debug_assert_eq!(lr.def(), inst.into());
+            match lr.def_local_end().into() {
+                ExpandedProgramPoint::Inst(endpoint) => {
+                    self.live.push(value, endpoint, lr);
+                }
+                ExpandedProgramPoint::Ebb(ebb) => {
+                    panic!("Instruction result live range can't end at {}", ebb);
+                }
+            }
+        }
+
+        (
+            &self.live.values[0..first_kill],
+            &self.live.values[first_kill..first_def],
+            &self.live.values[first_def..],
+        )
+    }
+
+    /// Prepare to move past a ghost instruction.
+    ///
+    /// This is like `process_inst`, except any defs are ignored.
+    ///
+    /// Returns `(throughs, kills)`.
+    pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) {
+        let first_kill = self.live.live_after(inst);
+        self.live.values.as_slice().split_at(first_kill)
+    }
+
+    /// Drop the values that are now dead after moving past `inst`.
+    ///
+    /// This removes both live values that were killed by `inst` and dead defines on `inst` itself.
+    ///
+    /// This must be called after `process_inst(inst)` and before proceeding to the next
+    /// instruction.
+    pub fn drop_dead(&mut self, inst: Inst) {
+        // Remove both live values that were killed by `inst` and dead defines from `inst`.
+        self.live.remove_kill_values(inst);
+    }
+
+    /// Drop any values that are marked as `is_dead`.
+    ///
+    /// Use this after calling `ebb_top` to clean out dead EBB parameters.
+    pub fn drop_dead_params(&mut self) {
+        self.live.remove_dead_values();
+    }
+
+    /// Process new spills.
+    ///
+    /// Any values where `f` returns true are spilled and will be treated as if their affinity was
+    /// `Stack`.
+    pub fn process_spills<F>(&mut self, mut f: F)
+    where
+        F: FnMut(Value) -> bool,
+    {
+        for lv in &mut self.live.values {
+            if f(lv.value) {
+                lv.affinity = Affinity::Stack;
+            }
+        }
+    }
+
+    /// Save the current set of live values so it is associated with `idom`.
+    fn save_idom_live_set(&mut self, idom: Inst) {
+        let values = self.live.values.iter().map(|lv| lv.value);
+        let pool = &mut self.idom_pool;
+        // If there already is a set saved for `idom`, just keep it.
+        self.idom_sets.entry(idom).or_insert_with(|| {
+            let mut list = ValueList::default();
+            list.extend(values, pool);
+            list
+        });
+    }
+}
--- a/lib/codegen/src/regalloc/liveness.rs
+++ b/lib/codegen/src/regalloc/liveness.rs
@@ -0,0 +1,458 @@
+//! Liveness analysis for SSA values.
+//!
+//! This module computes the live range of all the SSA values in a function and produces a
+//! `LiveRange` instance for each.
+//!
+//!
+//! # Liveness consumers
+//!
+//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each
+//! EBB and assigns a register to the defined values. This algorithm needs to maintain a set of the
+//! currently live values as it is iterating down the instructions in the EBB. It asks the
+//! following questions:
+//!
+//! - What is the set of live values at the entry to the EBB?
+//! - When moving past a use of a value, is that value still alive in the EBB, or was that the last
+//!   use?
+//! - When moving past a branch, which of the live values are still live below the branch?
+//!
+//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and
+//! `livein_local_end` queries. The coloring algorithm visits EBBs in a topological order of the
+//! dominator tree, so it can compute the set of live values at the beginning of an EBB by starting
+//! from the set of live values at the dominating branch instruction and filtering it with
+//! `livein_local_end`. These sets do not need to be stored in the liveness analysis.
+//!
+//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the
+//! number of live values at every program point and insert spill code until the number of
+//! registers needed is small enough.
+//!
+//!
+//! # Alternative algorithms
+//!
+//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few
+//! alternatives.
+//!
+//! ## Data-flow equations
+//!
+//! The classic *live variables analysis* that you will find in all compiler books from the
+//! previous century does not depend on SSA form. It is typically implemented by iteratively
+//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of
+//! variables for every basic block in the program.
+//!
+//! This algorithm has some disadvantages that makes us look elsewhere:
+//!
+//! - Quadratic memory use. We need a bit per variable per basic block in the function.
+//! - Sparse representation. In practice, the majority of SSA values never leave their basic block,
+//!   and those that do span basic blocks rarely span a large number of basic blocks. This makes
+//!   the bit-vectors quite sparse.
+//! - Traditionally, the data-flow equations were solved for real program *variables* which does
+//!   not include temporaries used in evaluating expressions. We have an SSA form program which
+//!   blurs the distinction between temporaries and variables. This makes the quadratic memory
+//!   problem worse because there are many more SSA values than there was variables in the original
+//!   program, and we don't know a priori which SSA values leave their basic block.
+//! - Missing last-use information. For values that are not live-out of a basic block, we would
+//!   need to store information about the last use in the block somewhere. LLVM stores this
+//!   information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a
+//!   source of problems for LLVM's register allocator.
+//!
+//! Data-flow equations can detect when a variable is used uninitialized, and they can handle
+//! multiple definitions of the same variable. We don't need this generality since we already have
+//! a program in SSA form.
+//!
+//! ## LLVM's liveness analysis
+//!
+//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is
+//! a disjoint union of related SSA values that should be assigned to the same physical register.
+//! It uses a compact data structure very similar to our `LiveRange`. The important difference is
+//! that Cretonne's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval`
+//! describes the live range of a virtual register *and* which one of the related SSA values is
+//! live at any given program point.
+//!
+//! LLVM computes the live range of each virtual register independently by using the use-def chains
+//! that are baked into its IR. The algorithm for a single virtual register is:
+//!
+//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using
+//!    the def-chain. This does not include any phi-values.
+//! 2. Go through the virtual register's use chain and perform the following steps at each use:
+//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks
+//!    that already contain some liveness and extend the last live SSA value in the block to be
+//!    live-out. Also build a list of new basic blocks where the register needs to be live-in.
+//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new
+//!    PHI values to be created when different SSA values can reach the same block.
+//!
+//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered
+//! one SSA value.
+//!
+//! This algorithm has some advantages compared to the data-flow equations:
+//!
+//! - The live ranges of local virtual registers are computed very quickly without ever traversing
+//!   the CFG. The memory needed to store these live ranges is independent of the number of basic
+//!   blocks in the program.
+//! - The time to compute the live range of a global virtual register is proportional to the number
+//!   of basic blocks covered. Many virtual registers only cover a few blocks, even in very large
+//!   functions.
+//! - A single live range can be recomputed after making modifications to the IR. No global
+//!   algorithm is necessary. This feature depends on having use-def chains for virtual registers
+//!   which Cretonne doesn't.
+//!
+//! Cretonne uses a very similar data structures and algorithms to LLVM, with the important
+//! difference that live ranges are computed per SSA value instead of per virtual register, and the
+//! uses in Cretonne IR refers to SSA values instead of virtual registers. This means that Cretonne
+//! can skip the last step of reconstructing SSA form for the virtual register uses.
+//!
+//! ## Fast Liveness Checking for SSA-Form Programs
+//!
+//! A liveness analysis that is often brought up in the context of SSA-based register allocation
+//! was presented at CGO 2008:
+//!
+//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness
+//! Checking for SSA-Form Programs.* CGO.
+//!
+//! This analysis uses a global pre-computation that only depends on the CFG of the function. It
+//! then allows liveness queries for any (value, program point) pair. Each query traverses the use
+//! chain of the value and performs lookups in the precomputed bit-vectors.
+//!
+//! I did not seriously consider this analysis for Cretonne because:
+//!
+//! - It depends critically on use chains which Cretonne doesn't have.
+//! - Popular variables like the `this` pointer in a C++ method can have very large use chains.
+//!   Traversing such a long use chain on every liveness lookup has the potential for some nasty
+//!   quadratic behavior in unfortunate cases.
+//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow
+//!   based approach, which isn't that impressive.
+//!
+//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cretonne
+//! gains use chains, this approach would be worth a proper evaluation.
+//!
+//!
+//! # Cretonne's liveness analysis
+//!
+//! The algorithm implemented in this module is similar to LLVM's with these differences:
+//!
+//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual
+//!   register.
+//! - Instructions in Cretonne IR contains references to SSA values, not virtual registers.
+//! - All live ranges are computed in one traversal of the program. Cretonne doesn't have use
+//!   chains, so it is not possible to compute the live range for a single SSA value independently.
+//!
+//! The liveness computation visits all instructions in the program. The order is not important for
+//! the algorithm to be correct. At each instruction, the used values are examined.
+//!
+//! - The first time a value is encountered, its live range is constructed as a dead live range
+//!   containing only the defining program point.
+//! - The local interval of the value's live range is extended so it reaches the use. This may
+//!   require creating a new live-in local interval for the EBB.
+//! - If the live range became live-in to the EBB, add the EBB to a work-list.
+//! - While the work-list is non-empty pop a live-in EBB and repeat the two steps above, using each
+//!   of the live-in EBB's CFG predecessor instructions as a 'use'.
+//!
+//! The effect of this algorithm is to extend the live range of each to reach uses as they are
+//! visited. No data about each value beyond the live range is needed between visiting uses, so
+//! nothing is lost by computing the live range of all values simultaneously.
+//!
+//! ## Cache efficiency of Cretonne vs LLVM
+//!
+//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the
+//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use
+//! chain order, some cache thrashing can occur as a result of pulling instructions into cache
+//! somewhat chaotically.
+//!
+//! Cretonne uses a transposed algorithm, visiting instructions in order. This means that each
+//! instruction is brought into cache only once, and it is likely that the other instructions on
+//! the same cache line will be visited before the line is evicted.
+//!
+//! Cretonne's problem is that the `LiveRange` structs are visited many times and not always
+//! regularly. We should strive to make the `LiveRange` struct as small as possible such that
+//! multiple related values can live on the same cache line.
+//!
+//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current
+//!   implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the
+//!   size to 32 bytes.
+//! - Related values should be stored on the same cache line. The current sparse set implementation
+//!   does a decent job of that.
+//! - For global values, the list of live-in intervals is very likely to fit on a single cache
+//!   line. These lists are very likely to be found in L2 cache at least.
+//!
+//! There is some room for improvement.
+
+use entity::SparseMap;
+use flowgraph::ControlFlowGraph;
+use ir::dfg::ValueDef;
+use ir::{Ebb, Function, Inst, Layout, ProgramPoint, Value};
+use isa::{EncInfo, TargetIsa};
+use regalloc::affinity::Affinity;
+use regalloc::liverange::{LiveRange, LiveRangeContext, LiveRangeForest};
+use std::mem;
+use std::ops::Index;
+use std::vec::Vec;
+use timing;
+
+/// A set of live ranges, indexed by value number.
+type LiveRangeSet = SparseMap<Value, LiveRange>;
+
+/// Get a mutable reference to the live range for `value`.
+/// Create it if necessary.
+fn get_or_create<'a>(
+    lrset: &'a mut LiveRangeSet,
+    value: Value,
+    isa: &TargetIsa,
+    func: &Function,
+    enc_info: &EncInfo,
+) -> &'a mut LiveRange {
+    // It would be better to use `get_mut()` here, but that leads to borrow checker fighting
+    // which can probably only be resolved by non-lexical lifetimes.
+    // https://github.com/rust-lang/rfcs/issues/811
+    if lrset.get(value).is_none() {
+        // Create a live range for value. We need the program point that defines it.
+        let def;
+        let affinity;
+        match func.dfg.value_def(value) {
+            ValueDef::Result(inst, rnum) => {
+                def = inst.into();
+                // Initialize the affinity from the defining instruction's result constraints.
+                // Don't do this for call return values which are always tied to a single register.
+                affinity = enc_info
+                    .operand_constraints(func.encodings[inst])
+                    .and_then(|rc| rc.outs.get(rnum))
+                    .map(Affinity::new)
+                    .or_else(|| {
+                        // If this is a call, get the return value affinity.
+                        func.dfg.call_signature(inst).map(|sig| {
+                            Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa)
+                        })
+                    })
+                    .unwrap_or_default();
+            }
+            ValueDef::Param(ebb, num) => {
+                def = ebb.into();
+                if func.layout.entry_block() == Some(ebb) {
+                    // The affinity for entry block parameters can be inferred from the function
+                    // signature.
+                    affinity = Affinity::abi(&func.signature.params[num], isa);
+                } else {
+                    // Give normal EBB parameters a register affinity matching their type.
+                    let rc = isa.regclass_for_abi_type(func.dfg.value_type(value));
+                    affinity = Affinity::Reg(rc.into());
+                }
+            }
+        };
+        lrset.insert(LiveRange::new(value, def, affinity));
+    }
+    lrset.get_mut(value).unwrap()
+}
+
+/// Extend the live range for `value` so it reaches `to` which must live in `ebb`.
+fn extend_to_use(
+    lr: &mut LiveRange,
+    ebb: Ebb,
+    to: Inst,
+    worklist: &mut Vec<Ebb>,
+    func: &Function,
+    cfg: &ControlFlowGraph,
+    forest: &mut LiveRangeForest,
+) {
+    // This is our scratch working space, and we'll leave it empty when we return.
+    debug_assert!(worklist.is_empty());
+
+    // Extend the range locally in `ebb`.
+    // If there already was a live interval in that block, we're done.
+    if lr.extend_in_ebb(ebb, to, &func.layout, forest) {
+        worklist.push(ebb);
+    }
+
+    // The work list contains those EBBs where we have learned that the value needs to be
+    // live-in.
+    //
+    // This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the
+    // CFG from the existing live range to `ebb`.
+    //
+    // Extend the live range as we go. The live range itself also serves as a visited set since
+    // `extend_in_ebb` will never return true twice for the same EBB.
+    //
+    while let Some(livein) = worklist.pop() {
+        // We've learned that the value needs to be live-in to the `livein` EBB.
+        // Make sure it is also live at all predecessor branches to `livein`.
+        for (pred, branch) in cfg.pred_iter(livein) {
+            if lr.extend_in_ebb(pred, branch, &func.layout, forest) {
+                // This predecessor EBB also became live-in. We need to process it later.
+                worklist.push(pred);
+            }
+        }
+    }
+}
+
+/// Liveness analysis for a function.
+///
+/// Compute a live range for every SSA value used in the function.
+pub struct Liveness {
+    /// The live ranges that have been computed so far.
+    ranges: LiveRangeSet,
+
+    /// Memory pool for the live ranges.
+    forest: LiveRangeForest,
+
+    /// Working space for the `extend_to_use` algorithm.
+    /// This vector is always empty, except for inside that function.
+    /// It lives here to avoid repeated allocation of scratch memory.
+    worklist: Vec<Ebb>,
+}
+
+impl Liveness {
+    /// Create a new empty liveness analysis.
+    ///
+    /// The memory allocated for this analysis can be reused for multiple functions. Use the
+    /// `compute` method to actually runs the analysis for a function.
+    pub fn new() -> Self {
+        Self {
+            ranges: LiveRangeSet::new(),
+            forest: LiveRangeForest::new(),
+            worklist: Vec::new(),
+        }
+    }
+
+    /// Get a context needed for working with a `LiveRange`.
+    pub fn context<'a>(&'a self, layout: &'a Layout) -> LiveRangeContext<'a, Layout> {
+        LiveRangeContext::new(layout, &self.forest)
+    }
+
+    /// Clear all data structures in this liveness analysis.
+    pub fn clear(&mut self) {
+        self.ranges.clear();
+        self.forest.clear();
+        self.worklist.clear();
+    }
+
+    /// Get the live range for `value`, if it exists.
+    pub fn get(&self, value: Value) -> Option<&LiveRange> {
+        self.ranges.get(value)
+    }
+
+    /// Create a new live range for `value`.
+    ///
+    /// The new live range will be defined at `def` with no extent, like a dead value.
+    ///
+    /// This asserts that `value` does not have an existing live range.
+    pub fn create_dead<PP>(&mut self, value: Value, def: PP, affinity: Affinity)
+    where
+        PP: Into<ProgramPoint>,
+    {
+        let old = self.ranges.insert(
+            LiveRange::new(value, def.into(), affinity),
+        );
+        debug_assert!(old.is_none(), "{} already has a live range", value);
+    }
+
+    /// Move the definition of `value` to `def`.
+    ///
+    /// The old and new def points must be in the same EBB, and before the end of the live range.
+    pub fn move_def_locally<PP>(&mut self, value: Value, def: PP)
+    where
+        PP: Into<ProgramPoint>,
+    {
+        let lr = self.ranges.get_mut(value).expect("Value has no live range");
+        lr.move_def_locally(def.into());
+    }
+
+    /// Locally extend the live range for `value` to reach `user`.
+    ///
+    /// It is assumed the `value` is already live before `user` in `ebb`.
+    ///
+    /// Returns a mutable reference to the value's affinity in case that also needs to be updated.
+    pub fn extend_locally(
+        &mut self,
+        value: Value,
+        ebb: Ebb,
+        user: Inst,
+        layout: &Layout,
+    ) -> &mut Affinity {
+        debug_assert_eq!(Some(ebb), layout.inst_ebb(user));
+        let lr = self.ranges.get_mut(value).expect("Value has no live range");
+        let livein = lr.extend_in_ebb(ebb, user, layout, &mut self.forest);
+        debug_assert!(!livein, "{} should already be live in {}", value, ebb);
+        &mut lr.affinity
+    }
+
+    /// Change the affinity of `value` to `Stack` and return the previous affinity.
+    pub fn spill(&mut self, value: Value) -> Affinity {
+        let lr = self.ranges.get_mut(value).expect("Value has no live range");
+        mem::replace(&mut lr.affinity, Affinity::Stack)
+    }
+
+    /// Compute the live ranges of all SSA values used in `func`.
+    /// This clears out any existing analysis stored in this data structure.
+    pub fn compute(&mut self, isa: &TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
+        let _tt = timing::ra_liveness();
+        self.ranges.clear();
+
+        // Get ISA data structures used for computing live range affinities.
+        let enc_info = isa.encoding_info();
+        let reg_info = isa.register_info();
+
+        // The liveness computation needs to visit all uses, but the order doesn't matter.
+        // TODO: Perhaps this traversal of the function could be combined with a dead code
+        // elimination pass if we visit a post-order of the dominator tree?
+        // TODO: Resolve value aliases while we're visiting instructions?
+        for ebb in func.layout.ebbs() {
+            // Make sure we have created live ranges for dead EBB parameters.
+            // TODO: If these parameters are really dead, we could remove them, except for the
+            // entry block which must match the function signature.
+            for &arg in func.dfg.ebb_params(ebb) {
+                get_or_create(&mut self.ranges, arg, isa, func, &enc_info);
+            }
+
+            for inst in func.layout.ebb_insts(ebb) {
+                // Eliminate all value aliases, they would confuse the register allocator.
+                func.dfg.resolve_aliases_in_arguments(inst);
+
+                // Make sure we have created live ranges for dead defs.
+                // TODO: When we implement DCE, we can use the absence of a live range to indicate
+                // an unused value.
+                for &def in func.dfg.inst_results(inst) {
+                    get_or_create(&mut self.ranges, def, isa, func, &enc_info);
+                }
+
+                // Iterator of constraints, one per value operand.
+                let encoding = func.encodings[inst];
+                let mut operand_constraints = enc_info
+                    .operand_constraints(encoding)
+                    .map(|c| c.ins)
+                    .unwrap_or(&[])
+                    .iter();
+
+                for &arg in func.dfg.inst_args(inst) {
+                    // Get the live range, create it as a dead range if necessary.
+                    let lr = get_or_create(&mut self.ranges, arg, isa, func, &enc_info);
+
+                    // Extend the live range to reach this use.
+                    extend_to_use(
+                        lr,
+                        ebb,
+                        inst,
+                        &mut self.worklist,
+                        func,
+                        cfg,
+                        &mut self.forest,
+                    );
+
+                    // Apply operand constraint, ignoring any variable arguments after the fixed
+                    // operands described by `operand_constraints`. Variable arguments are either
+                    // EBB arguments or call/return ABI arguments.
+                    if let Some(constraint) = operand_constraints.next() {
+                        lr.affinity.merge(constraint, &reg_info);
+                    }
+                }
+            }
+        }
+    }
+}
+
+impl Index<Value> for Liveness {
+    type Output = LiveRange;
+
+    fn index(&self, index: Value) -> &LiveRange {
+        match self.ranges.get(index) {
+            Some(lr) => lr,
+            None => panic!("{} has no live range", index),
+        }
+    }
+}
--- a/lib/codegen/src/regalloc/liverange.rs
+++ b/lib/codegen/src/regalloc/liverange.rs
@@ -0,0 +1,748 @@
+//! Data structure representing the live range of an SSA value.
+//!
+//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of
+//! an SSA value begins where it is defined and extends to all program points where the value is
+//! still needed.
+//!
+//! # Local Live Ranges
+//!
+//! Inside a single extended basic block, the live range of a value is always an interval between
+//! two program points (if the value is live in the EBB at all). The starting point is either:
+//!
+//! 1. The instruction that defines the value, or
+//! 2. The EBB header, because the value is an argument to the EBB, or
+//! 3. The EBB header, because the value is defined in another EBB and live-in to this one.
+//!
+//! The ending point of the local live range is the last of the following program points in the
+//! EBB:
+//!
+//! 1. The last use in the EBB, where a *use* is an instruction that has the value as an argument.
+//! 2. The last branch or jump instruction in the EBB that can reach a use.
+//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it.
+//!
+//! Note that 2. includes loop back-edges to the same EBB. In general, if a value is defined
+//! outside a loop and used inside the loop, it will be live in the entire loop.
+//!
+//! # Global Live Ranges
+//!
+//! Values that appear in more than one EBB have a *global live range* which can be seen as the
+//! disjoint union of the per-EBB local intervals for all of the EBBs where the value is live.
+//! Together with a `ProgramOrder` which provides a linear ordering of the EBBs, the global live
+//! range becomes a linear sequence of disjoint intervals, at most one per EBB.
+//!
+//! In the special case of a dead value, the global live range is a single interval where the start
+//! and end points are the same. The global live range of a value is never completely empty.
+//!
+//! # Register interference
+//!
+//! The register allocator uses live ranges to determine if values *interfere*, which means that
+//! they can't be stored in the same register. Two live ranges interfere if and only if any of
+//! their intervals overlap.
+//!
+//! If one live range ends at an instruction that defines another live range, those two live ranges
+//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input
+//! register for an output value. If Cretonne gets support for inline assembly, we will need to
+//! handle *early clobbers* which are output registers that are not allowed to alias any input
+//! registers.
+//!
+//! If `i1 < i2 < i3` are program points, we have:
+//!
+//! - `i1-i2` and `i1-i3` interfere because the intervals overlap.
+//! - `i1-i2` and `i2-i3` don't interfere.
+//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register.
+//! - `i1-i2` and `i2-i2` don't interfere.
+//! - `i2-i3` and `i2-i2` do interfere.
+//!
+//! Because of this behavior around interval end points, live range interference is not completely
+//! equivalent to mathematical intersection of open or half-open intervals.
+//!
+//! # Implementation notes
+//!
+//! A few notes about the implementation of this data structure. This should not concern someone
+//! only looking to use the public interface.
+//!
+//! ## EBB ordering
+//!
+//! The relative order of EBBs is used to maintain a sorted list of live-in intervals and to
+//! coalesce adjacent live-in intervals when the prior interval covers the whole EBB. This doesn't
+//! depend on any property of the program order, so alternative orderings are possible:
+//!
+//! 1. The EBB layout order. This is what we currently use.
+//! 2. A topological order of the dominator tree. All the live-in intervals would come after the
+//!    def interval.
+//! 3. A numerical order by EBB number. Performant because it doesn't need to indirect through the
+//!    `ProgramOrder` for comparisons.
+//!
+//! These orderings will cause small differences in coalescing opportunities, but all of them would
+//! do a decent job of compressing a long live range. The numerical order might be preferable
+//! because:
+//!
+//! - It has better performance because EBB numbers can be compared directly without any table
+//!   lookups.
+//! - If EBB numbers are not reused, it is safe to allocate new EBBs without getting spurious
+//!   live-in intervals from any coalesced representations that happen to cross a new EBB.
+//!
+//! For comparing instructions, the layout order is always what we want.
+//!
+//! ## Alternative representation
+//!
+//! Since a local live-in interval always begins at its EBB header, it is uniquely described by its
+//! end point instruction alone. We can use the layout to look up the EBB containing the end point.
+//! This means that a sorted `Vec<Inst>` would be enough to represent the set of live-in intervals.
+//!
+//! Coalescing is an important compression technique because some live ranges can span thousands of
+//! EBBs. We can represent that by switching to a sorted `Vec<ProgramPoint>` representation where
+//! an `[Ebb, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding
+//! `Ebb` entry represents a single live-in interval.
+//!
+//! This representation is more compact for a live range with many uncoalesced live-in intervals.
+//! It is more complicated to work with, though, so it is probably not worth it. The performance
+//! benefits of switching to a numerical EBB order only appears if the binary search is doing
+//! EBB-EBB comparisons.
+//!
+//! ## B-tree representation
+//!
+//! A `BTreeMap<Ebb, Inst>` could also be used for the live-in intervals. It looks like the
+//! standard library B-tree doesn't provide the necessary interface for an efficient implementation
+//! of coalescing, so we would need to roll our own.
+//!
+
+use bforest;
+use entity::SparseMapValue;
+use ir::{Ebb, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value};
+use regalloc::affinity::Affinity;
+use std::cmp::Ordering;
+
+/// Global live range of a single SSA value.
+///
+/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an
+/// SSA value is the disjoint union of a set of intervals, each local to a single EBB, and with at
+/// most one interval per EBB. We further distinguish between:
+///
+/// 1. The *def interval* is the local interval in the EBB where the value is defined, and
+/// 2. The *live-in intervals* are the local intervals in the remaining EBBs.
+///
+/// A live-in interval always begins at the EBB header, while the def interval can begin at the
+/// defining instruction, or at the EBB header for an EBB argument value.
+///
+/// All values have a def interval, but a large proportion of values don't have any live-in
+/// intervals. These are called *local live ranges*.
+///
+/// # Program order requirements
+///
+/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for
+/// ordering instructions inside an EBB *and* for ordering EBBs. The methods that depend on the
+/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to
+/// ensure that the provided ordering is consistent between calls.
+///
+/// In particular, changing the order of EBBs or inserting new EBBs will invalidate live ranges.
+///
+/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the
+/// instructions using or defining their value, `LiveRange` structs can contain references to
+/// branch and jump instructions.
+pub type LiveRange = GenLiveRange<Layout>;
+
+/// Generic live range implementation.
+///
+/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order.
+/// Use `LiveRange` instead of using this generic directly.
+pub struct GenLiveRange<PO: ProgramOrder> {
+    /// The value described by this live range.
+    /// This member can't be modified in case the live range is stored in a `SparseMap`.
+    value: Value,
+
+    /// The preferred register allocation for this value.
+    pub affinity: Affinity,
+
+    /// The instruction or EBB header where this value is defined.
+    def_begin: ProgramPoint,
+
+    /// The end point of the def interval. This must always belong to the same EBB as `def_begin`.
+    ///
+    /// We always have `def_begin <= def_end` with equality implying a dead def live range with no
+    /// uses.
+    def_end: ProgramPoint,
+
+    /// Additional live-in intervals sorted in program order.
+    ///
+    /// This map is empty for most values which are only used in one EBB.
+    ///
+    /// A map entry `ebb -> inst` means that the live range is live-in to `ebb`, continuing up to
+    /// `inst` which may belong to a later EBB in the program order.
+    ///
+    /// The entries are non-overlapping, and none of them overlap the EBB where the value is
+    /// defined.
+    liveins: bforest::Map<Ebb, Inst, PO>,
+}
+
+/// Context information needed to query a `LiveRange`.
+pub struct LiveRangeContext<'a, PO: 'a + ProgramOrder> {
+    /// Ordering of EBBs.
+    pub order: &'a PO,
+    /// Memory pool.
+    pub forest: &'a bforest::MapForest<Ebb, Inst, PO>,
+}
+
+impl<'a, PO: ProgramOrder> LiveRangeContext<'a, PO> {
+    /// Make a new context.
+    pub fn new(
+        order: &'a PO,
+        forest: &'a bforest::MapForest<Ebb, Inst, PO>,
+    ) -> LiveRangeContext<'a, PO> {
+        LiveRangeContext { order, forest }
+    }
+}
+
+impl<'a, PO: ProgramOrder> Clone for LiveRangeContext<'a, PO> {
+    fn clone(&self) -> Self {
+        LiveRangeContext {
+            order: self.order,
+            forest: self.forest,
+        }
+    }
+}
+
+impl<'a, PO: ProgramOrder> Copy for LiveRangeContext<'a, PO> {}
+
+/// Forest of B-trees used for storing live ranges.
+pub type LiveRangeForest = bforest::MapForest<Ebb, Inst, Layout>;
+
+impl<PO: ProgramOrder> bforest::Comparator<Ebb> for PO {
+    fn cmp(&self, a: Ebb, b: Ebb) -> Ordering {
+        self.cmp(a, b)
+    }
+}
+
+impl<PO: ProgramOrder> GenLiveRange<PO> {
+    /// Create a new live range for `value` defined at `def`.
+    ///
+    /// The live range will be created as dead, but it can be extended with `extend_in_ebb()`.
+    pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> GenLiveRange<PO> {
+        GenLiveRange {
+            value,
+            affinity,
+            def_begin: def,
+            def_end: def,
+            liveins: bforest::Map::new(),
+        }
+    }
+
+    /// Extend the local interval for `ebb` so it reaches `to` which must belong to `ebb`.
+    /// Create a live-in interval if necessary.
+    ///
+    /// If the live range already has a local interval in `ebb`, extend its end point so it
+    /// includes `to`, and return false.
+    ///
+    /// If the live range did not previously have a local interval in `ebb`, add one so the value
+    /// is live-in to `ebb`, extending to `to`. Return true.
+    ///
+    /// The return value can be used to detect if we just learned that the value is live-in to
+    /// `ebb`. This can trigger recursive extensions in `ebb`'s CFG predecessor blocks.
+    pub fn extend_in_ebb(
+        &mut self,
+        ebb: Ebb,
+        to: Inst,
+        order: &PO,
+        forest: &mut bforest::MapForest<Ebb, Inst, PO>,
+    ) -> bool {
+        // First check if we're extending the def interval.
+        //
+        // We're assuming here that `to` never precedes `def_begin` in the same EBB, but we can't
+        // check it without a method for getting `to`'s EBB.
+        if order.cmp(ebb, self.def_end) != Ordering::Greater &&
+            order.cmp(to, self.def_begin) != Ordering::Less
+        {
+            let to_pp = to.into();
+            debug_assert_ne!(
+                to_pp,
+                self.def_begin,
+                "Can't use value in the defining instruction."
+            );
+            if order.cmp(to, self.def_end) == Ordering::Greater {
+                self.def_end = to_pp;
+            }
+            return false;
+        }
+
+        // Now check if we're extending any of the existing live-in intervals.
+        let mut c = self.liveins.cursor(forest, order);
+        let first_time_livein;
+
+        if let Some(end) = c.goto(ebb) {
+            // There's an interval beginning at `ebb`. See if it extends.
+            first_time_livein = false;
+            if order.cmp(end, to) == Ordering::Less {
+                *c.value_mut().unwrap() = to;
+            } else {
+                return first_time_livein;
+            }
+        } else if let Some((_, end)) = c.prev() {
+            // There's no interval beginning at `ebb`, but we could still be live-in at `ebb` with
+            // a coalesced interval that begins before and ends after.
+            if order.cmp(end, ebb) == Ordering::Greater {
+                // Yep, the previous interval overlaps `ebb`.
+                first_time_livein = false;
+                if order.cmp(end, to) == Ordering::Less {
+                    *c.value_mut().unwrap() = to;
+                } else {
+                    return first_time_livein;
+                }
+            } else {
+                first_time_livein = true;
+                // The current interval does not overlap `ebb`, but it may still be possible to
+                // coalesce with it.
+                if order.is_ebb_gap(end, ebb) {
+                    *c.value_mut().unwrap() = to;
+                } else {
+                    c.insert(ebb, to);
+                }
+            }
+        } else {
+            // There is no existing interval before `ebb`.
+            first_time_livein = true;
+            c.insert(ebb, to);
+        }
+
+        // Now `c` to left pointing at an interval that ends in `to`.
+        debug_assert_eq!(c.value(), Some(to));
+
+        // See if it can be coalesced with the following interval.
+        if let Some((next_ebb, next_end)) = c.next() {
+            if order.is_ebb_gap(to, next_ebb) {
+                // Remove this interval and extend the previous end point to `next_end`.
+                c.remove();
+                c.prev();
+                *c.value_mut().unwrap() = next_end;
+            }
+        }
+
+        first_time_livein
+    }
+
+    /// Is this the live range of a dead value?
+    ///
+    /// A dead value has no uses, and its live range ends at the same program point where it is
+    /// defined.
+    pub fn is_dead(&self) -> bool {
+        self.def_begin == self.def_end
+    }
+
+    /// Is this a local live range?
+    ///
+    /// A local live range is only used in the same EBB where it was defined. It is allowed to span
+    /// multiple basic blocks within that EBB.
+    pub fn is_local(&self) -> bool {
+        self.liveins.is_empty()
+    }
+
+    /// Get the program point where this live range is defined.
+    ///
+    /// This will be an EBB header when the value is an EBB argument, otherwise it is the defining
+    /// instruction.
+    pub fn def(&self) -> ProgramPoint {
+        self.def_begin
+    }
+
+    /// Move the definition of this value to a new program point.
+    ///
+    /// It is only valid to move the definition within the same EBB, and it can't be moved beyond
+    /// `def_local_end()`.
+    pub fn move_def_locally(&mut self, def: ProgramPoint) {
+        self.def_begin = def;
+    }
+
+    /// Get the local end-point of this live range in the EBB where it is defined.
+    ///
+    /// This can be the EBB header itself in the case of a dead EBB argument.
+    /// Otherwise, it will be the last local use or branch/jump that can reach a use.
+    pub fn def_local_end(&self) -> ProgramPoint {
+        self.def_end
+    }
+
+    /// Get the local end-point of this live range in an EBB where it is live-in.
+    ///
+    /// If this live range is not live-in to `ebb`, return `None`. Otherwise, return the end-point
+    /// of this live range's local interval in `ebb`.
+    ///
+    /// If the live range is live through all of `ebb`, the terminator of `ebb` is a correct
+    /// answer, but it is also possible that an even later program point is returned. So don't
+    /// depend on the returned `Inst` to belong to `ebb`.
+    pub fn livein_local_end(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> Option<Inst> {
+        self.liveins
+            .get_or_less(ebb, ctx.forest, ctx.order)
+            .and_then(|(_, inst)| {
+                // We have an entry that ends at `inst`.
+                if ctx.order.cmp(inst, ebb) == Ordering::Greater {
+                    Some(inst)
+                } else {
+                    None
+                }
+            })
+    }
+
+    /// Is this value live-in to `ebb`?
+    ///
+    /// An EBB argument is not considered to be live in.
+    pub fn is_livein(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
+        self.livein_local_end(ebb, ctx).is_some()
+    }
+
+    /// Get all the live-in intervals.
+    ///
+    /// Note that the intervals are stored in a compressed form so each entry may span multiple
+    /// EBBs where the value is live in.
+    pub fn liveins<'a>(
+        &'a self,
+        ctx: LiveRangeContext<'a, PO>,
+    ) -> bforest::MapIter<'a, Ebb, Inst, PO> {
+        self.liveins.iter(ctx.forest)
+    }
+
+    /// Check if this live range overlaps a definition in `ebb`.
+    pub fn overlaps_def(
+        &self,
+        def: ExpandedProgramPoint,
+        ebb: Ebb,
+        ctx: LiveRangeContext<PO>,
+    ) -> bool {
+        // Two defs at the same program point always overlap, even if one is dead.
+        if def == self.def_begin.into() {
+            return true;
+        }
+
+        // Check for an overlap with the local range.
+        if ctx.order.cmp(def, self.def_begin) != Ordering::Less &&
+            ctx.order.cmp(def, self.def_end) == Ordering::Less
+        {
+            return true;
+        }
+
+        // Check for an overlap with a live-in range.
+        match self.livein_local_end(ebb, ctx) {
+            Some(inst) => ctx.order.cmp(def, inst) == Ordering::Less,
+            None => false,
+        }
+    }
+
+    /// Check if this live range reaches a use at `user` in `ebb`.
+    pub fn reaches_use(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
+        // Check for an overlap with the local range.
+        if ctx.order.cmp(user, self.def_begin) == Ordering::Greater &&
+            ctx.order.cmp(user, self.def_end) != Ordering::Greater
+        {
+            return true;
+        }
+
+        // Check for an overlap with a live-in range.
+        match self.livein_local_end(ebb, ctx) {
+            Some(inst) => ctx.order.cmp(user, inst) != Ordering::Greater,
+            None => false,
+        }
+    }
+
+    /// Check if this live range is killed at `user` in `ebb`.
+    pub fn killed_at(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
+        self.def_local_end() == user.into() || self.livein_local_end(ebb, ctx) == Some(user)
+    }
+}
+
+/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values.
+impl<PO: ProgramOrder> SparseMapValue<Value> for GenLiveRange<PO> {
+    fn key(&self) -> Value {
+        self.value
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{GenLiveRange, LiveRangeContext};
+    use bforest;
+    use entity::EntityRef;
+    use ir::{Ebb, Inst, Value};
+    use ir::{ExpandedProgramPoint, ProgramOrder};
+    use std::cmp::Ordering;
+    use std::vec::Vec;
+
+    // Dummy program order which simply compares indexes.
+    // It is assumed that EBBs have indexes that are multiples of 10, and instructions have indexes
+    // in between. `is_ebb_gap` assumes that terminator instructions have indexes of the form
+    // ebb * 10 + 1. This is used in the coalesce test.
+    struct ProgOrder {}
+
+    impl ProgramOrder for ProgOrder {
+        fn cmp<A, B>(&self, a: A, b: B) -> Ordering
+        where
+            A: Into<ExpandedProgramPoint>,
+            B: Into<ExpandedProgramPoint>,
+        {
+            fn idx(pp: ExpandedProgramPoint) -> usize {
+                match pp {
+                    ExpandedProgramPoint::Inst(i) => i.index(),
+                    ExpandedProgramPoint::Ebb(e) => e.index(),
+                }
+            }
+
+            let ia = idx(a.into());
+            let ib = idx(b.into());
+            ia.cmp(&ib)
+        }
+
+        fn is_ebb_gap(&self, inst: Inst, ebb: Ebb) -> bool {
+            inst.index() % 10 == 1 && ebb.index() / 10 == inst.index() / 10 + 1
+        }
+    }
+
+    impl ProgOrder {
+        // Get the EBB corresponding to `inst`.
+        fn inst_ebb(&self, inst: Inst) -> Ebb {
+            let i = inst.index();
+            Ebb::new(i - i % 10)
+        }
+
+        // Get the EBB of a program point.
+        fn pp_ebb<PP: Into<ExpandedProgramPoint>>(&self, pp: PP) -> Ebb {
+            match pp.into() {
+                ExpandedProgramPoint::Inst(i) => self.inst_ebb(i),
+                ExpandedProgramPoint::Ebb(e) => e,
+            }
+        }
+
+        // Validate the live range invariants.
+        fn validate(
+            &self,
+            lr: &GenLiveRange<ProgOrder>,
+            forest: &bforest::MapForest<Ebb, Inst, ProgOrder>,
+        ) {
+            // The def interval must cover a single EBB.
+            let def_ebb = self.pp_ebb(lr.def_begin);
+            assert_eq!(def_ebb, self.pp_ebb(lr.def_end));
+
+            // Check that the def interval isn't backwards.
+            match self.cmp(lr.def_begin, lr.def_end) {
+                Ordering::Equal => assert!(lr.liveins.is_empty()),
+                Ordering::Greater => {
+                    panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end)
+                }
+                Ordering::Less => {}
+            }
+
+            // Check the live-in intervals.
+            let mut prev_end = None;
+            for (begin, end) in lr.liveins.iter(forest) {
+                assert_eq!(self.cmp(begin, end), Ordering::Less);
+                if let Some(e) = prev_end {
+                    assert_eq!(self.cmp(e, begin), Ordering::Less);
+                }
+
+                assert!(
+                    self.cmp(lr.def_end, begin) == Ordering::Less ||
+                        self.cmp(lr.def_begin, end) == Ordering::Greater,
+                    "Interval can't overlap the def EBB"
+                );
+
+                // Save for next round.
+                prev_end = Some(end);
+            }
+        }
+    }
+
+    // Singleton `ProgramOrder` for tests below.
+    const PO: &'static ProgOrder = &ProgOrder {};
+
+    #[test]
+    fn dead_def_range() {
+        let v0 = Value::new(0);
+        let e0 = Ebb::new(0);
+        let i1 = Inst::new(1);
+        let i2 = Inst::new(2);
+        let e2 = Ebb::new(2);
+        let lr = GenLiveRange::new(v0, i1.into(), Default::default());
+        let forest = &bforest::MapForest::new();
+        let ctx = LiveRangeContext::new(PO, forest);
+        assert!(lr.is_dead());
+        assert!(lr.is_local());
+        assert_eq!(lr.def(), i1.into());
+        assert_eq!(lr.def_local_end(), i1.into());
+        assert_eq!(lr.livein_local_end(e2, ctx), None);
+        PO.validate(&lr, ctx.forest);
+
+        // A dead live range overlaps its own def program point.
+        assert!(lr.overlaps_def(i1.into(), e0, ctx));
+        assert!(!lr.overlaps_def(i2.into(), e0, ctx));
+        assert!(!lr.overlaps_def(e0.into(), e0, ctx));
+    }
+
+    #[test]
+    fn dead_arg_range() {
+        let v0 = Value::new(0);
+        let e2 = Ebb::new(2);
+        let lr = GenLiveRange::new(v0, e2.into(), Default::default());
+        let forest = &bforest::MapForest::new();
+        let ctx = LiveRangeContext::new(PO, forest);
+        assert!(lr.is_dead());
+        assert!(lr.is_local());
+        assert_eq!(lr.def(), e2.into());
+        assert_eq!(lr.def_local_end(), e2.into());
+        // The def interval of an EBB argument does not count as live-in.
+        assert_eq!(lr.livein_local_end(e2, ctx), None);
+        PO.validate(&lr, ctx.forest);
+    }
+
+    #[test]
+    fn local_def() {
+        let v0 = Value::new(0);
+        let e10 = Ebb::new(10);
+        let i11 = Inst::new(11);
+        let i12 = Inst::new(12);
+        let i13 = Inst::new(13);
+        let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
+        let forest = &mut bforest::MapForest::new();
+
+        assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert!(!lr.is_dead());
+        assert!(lr.is_local());
+        assert_eq!(lr.def(), i11.into());
+        assert_eq!(lr.def_local_end(), i13.into());
+
+        // Extending to an already covered inst should not change anything.
+        assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert_eq!(lr.def(), i11.into());
+        assert_eq!(lr.def_local_end(), i13.into());
+    }
+
+    #[test]
+    fn local_arg() {
+        let v0 = Value::new(0);
+        let e10 = Ebb::new(10);
+        let i11 = Inst::new(11);
+        let i12 = Inst::new(12);
+        let i13 = Inst::new(13);
+        let mut lr = GenLiveRange::new(v0, e10.into(), Default::default());
+        let forest = &mut bforest::MapForest::new();
+
+        // Extending a dead EBB argument in its own block should not indicate that a live-in
+        // interval was created.
+        assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert!(!lr.is_dead());
+        assert!(lr.is_local());
+        assert_eq!(lr.def(), e10.into());
+        assert_eq!(lr.def_local_end(), i12.into());
+
+        // Extending to an already covered inst should not change anything.
+        assert_eq!(lr.extend_in_ebb(e10, i11, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert_eq!(lr.def(), e10.into());
+        assert_eq!(lr.def_local_end(), i12.into());
+
+        // Extending further.
+        assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert_eq!(lr.def(), e10.into());
+        assert_eq!(lr.def_local_end(), i13.into());
+    }
+
+    #[test]
+    fn global_def() {
+        let v0 = Value::new(0);
+        let e10 = Ebb::new(10);
+        let i11 = Inst::new(11);
+        let i12 = Inst::new(12);
+        let e20 = Ebb::new(20);
+        let i21 = Inst::new(21);
+        let i22 = Inst::new(22);
+        let i23 = Inst::new(23);
+        let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
+        let forest = &mut bforest::MapForest::new();
+
+        assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
+
+        // Adding a live-in interval.
+        assert_eq!(lr.extend_in_ebb(e20, i22, PO, forest), true);
+        PO.validate(&lr, forest);
+        assert_eq!(
+            lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
+            Some(i22)
+        );
+
+        // Non-extending the live-in.
+        assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), false);
+        assert_eq!(
+            lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
+            Some(i22)
+        );
+
+        // Extending the existing live-in.
+        assert_eq!(lr.extend_in_ebb(e20, i23, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert_eq!(
+            lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
+            Some(i23)
+        );
+    }
+
+    #[test]
+    fn coalesce() {
+        let v0 = Value::new(0);
+        let i11 = Inst::new(11);
+        let e20 = Ebb::new(20);
+        let i21 = Inst::new(21);
+        let e30 = Ebb::new(30);
+        let i31 = Inst::new(31);
+        let e40 = Ebb::new(40);
+        let i41 = Inst::new(41);
+        let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
+        let forest = &mut bforest::MapForest::new();
+
+        assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e30, i31)]
+        );
+
+        // Coalesce to previous
+        assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e30, i41)]
+        );
+
+        // Coalesce to next
+        assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e20, i41)]
+        );
+
+        let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
+
+        assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e40, i41)]
+        );
+
+        assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e20, i21), (e40, i41)]
+        );
+
+        // Coalesce to previous and next
+        assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e20, i41)]
+        );
+    }
+
+    // TODO: Add more tests that exercise the binary search algorithm.
+}
--- a/lib/codegen/src/regalloc/mod.rs
+++ b/lib/codegen/src/regalloc/mod.rs
@@ -0,0 +1,23 @@
+//! Register allocation.
+//!
+//! This module contains data structures and algorithms used for register allocation.
+
+pub mod register_set;
+pub mod coloring;
+pub mod live_value_tracker;
+pub mod liveness;
+pub mod liverange;
+pub mod virtregs;
+
+mod affinity;
+mod coalescing;
+mod context;
+mod diversion;
+mod pressure;
+mod reload;
+mod solver;
+mod spilling;
+
+pub use self::register_set::RegisterSet;
+pub use self::context::Context;
+pub use self::diversion::RegDiversions;
--- a/lib/codegen/src/regalloc/pressure.rs
+++ b/lib/codegen/src/regalloc/pressure.rs
@@ -0,0 +1,377 @@
+//! Register pressure tracking.
+//!
+//! SSA-based register allocation depends on a spilling phase that "lowers register pressure
+//! sufficiently". This module defines the data structures needed to measure register pressure
+//! accurately enough to guarantee that the coloring phase will not run out of registers.
+//!
+//! Ideally, measuring register pressure amounts to simply counting the number of live registers at
+//! any given program point. This simplistic method has two problems:
+//!
+//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point
+//!    register banks, so we need to at least count the number of live registers in each register
+//!    bank separately.
+//!
+//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM
+//!    ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register.
+//!    This makes it difficult to accurately measure register pressure.
+//!
+//! This module deals with the problems via *register banks* and *top-level register classes*.
+//! Register classes in different register banks are completely independent, so we can count
+//! registers in one bank without worrying about the other bank at all.
+//!
+//! All register classes have a unique top-level register class, and we will count registers for
+//! each top-level register class individually. However, a register bank can have multiple
+//! top-level register classes that interfere with each other, so all top-level counts need to
+//! be considered when determining how many more registers can be allocated.
+//!
+//! Currently, the only register bank with multiple top-level registers is the `arm32`
+//! floating-point register bank which has `S`, `D`, and `Q` top-level classes.
+//!
+//! # Base and transient counts
+//!
+//! We maintain two separate register counts per top-level register class: base counts and
+//! transient counts. The base counts are adjusted with the `take` and `free` functions. The
+//! transient counts are adjusted with `take_transient` and `free_transient`.
+
+// Remove once we're using the pressure tracker.
+#![allow(dead_code)]
+
+use isa::registers::{RegClass, RegClassMask, RegInfo, MAX_TRACKED_TOPRCS};
+use regalloc::RegisterSet;
+use std::cmp::min;
+use std::fmt;
+use std::iter::ExactSizeIterator;
+
+/// Information per top-level register class.
+///
+/// Everything but the counts is static information computed from the constructor arguments.
+#[derive(Default)]
+struct TopRC {
+    // Number of registers currently used from this register class.
+    base_count: u32,
+    transient_count: u32,
+
+    // Max number of registers that can be allocated.
+    limit: u32,
+
+    // Register units per register.
+    width: u8,
+
+    // The first aliasing top-level RC.
+    first_toprc: u8,
+
+    // The number of aliasing top-level RCs.
+    num_toprcs: u8,
+}
+
+impl TopRC {
+    fn total_count(&self) -> u32 {
+        self.base_count + self.transient_count
+    }
+}
+
+pub struct Pressure {
+    // Bit mask of top-level register classes that are aliased by other top-level register classes.
+    // Unaliased register classes can use a simpler interference algorithm.
+    aliased: RegClassMask,
+
+    // Current register counts per top-level register class.
+    toprc: [TopRC; MAX_TRACKED_TOPRCS],
+}
+
+impl Pressure {
+    /// Create a new register pressure tracker.
+    pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Pressure {
+        let mut p = Pressure {
+            aliased: 0,
+            toprc: Default::default(),
+        };
+
+        // Get the layout of aliasing top-level register classes from the register banks.
+        for bank in reginfo.banks.iter() {
+            let first = bank.first_toprc;
+            let num = bank.num_toprcs;
+
+            if bank.pressure_tracking {
+                for rc in &mut p.toprc[first..first + num] {
+                    rc.first_toprc = first as u8;
+                    rc.num_toprcs = num as u8;
+                }
+
+                // Flag the top-level register classes with aliases.
+                if num > 1 {
+                    p.aliased |= ((1 << num) - 1) << first;
+                }
+            } else {
+                // This bank has no pressure tracking, so its top-level register classes may exceed
+                // `MAX_TRACKED_TOPRCS`. Fill in dummy entries.
+                for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOPRCS)] {
+                    // These aren't used if we don't set the `aliased` bit.
+                    rc.first_toprc = !0;
+                    rc.limit = !0;
+                }
+            }
+        }
+
+        // Compute per-class limits from `usable`.
+        for (toprc, rc) in p.toprc.iter_mut().take_while(|t| t.num_toprcs > 0).zip(
+            reginfo.classes,
+        )
+        {
+            toprc.limit = usable.iter(rc).len() as u32;
+            toprc.width = rc.width;
+        }
+
+        p
+    }
+
+    /// Check for an available register in the register class `rc`.
+    ///
+    /// If it is possible to allocate one more register from `rc`'s top-level register class,
+    /// returns 0.
+    ///
+    /// If not, returns a bit-mask of top-level register classes that are interfering. Register
+    /// pressure should be eased in one of the returned top-level register classes before calling
+    /// `can_take()` to check again.
+    fn check_avail(&self, rc: RegClass) -> RegClassMask {
+        let entry = match self.toprc.get(rc.toprc as usize) {
+            None => return 0, // Not a pressure tracked bank.
+            Some(e) => e,
+        };
+        let mask = 1 << rc.toprc;
+        if (self.aliased & mask) == 0 {
+            // This is a simple unaliased top-level register class.
+            if entry.total_count() < entry.limit {
+                0
+            } else {
+                mask
+            }
+        } else {
+            // This is the more complicated case. The top-level register class has aliases.
+            self.check_avail_aliased(entry)
+        }
+    }
+
+    /// Check for an available register in a top-level register class that may have aliases.
+    ///
+    /// This is the out-of-line slow path for `check_avail()`.
+    fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask {
+        let first = usize::from(entry.first_toprc);
+        let num = usize::from(entry.num_toprcs);
+        let width = u32::from(entry.width);
+        let ulimit = entry.limit * width;
+
+        // Count up the number of available register units.
+        let mut units = 0;
+        for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) {
+            let rcw = u32::from(rc.width);
+            // If `rc.width` is smaller than `width`, each register in `rc` could potentially block
+            // one of ours. This is assuming that none of the smaller registers are straddling the
+            // bigger ones.
+            //
+            // If `rc.width` is larger than `width`, we are also assuming that the registers are
+            // aligned and `rc.width` is a multiple of `width`.
+            let u = if rcw < width {
+                // We can't take more than the total number of register units in the class.
+                // This matters for arm32 S-registers which can only ever lock out 16 D-registers.
+                min(rc.total_count() * width, rc.limit * rcw)
+            } else {
+                rc.total_count() * rcw
+            };
+
+            // If this top-level RC on its own is responsible for exceeding our limit, return it
+            // early to guarantee that registers here are spilled before spilling other registers
+            // unnecessarily.
+            if u >= ulimit {
+                return 1 << rci;
+            }
+
+            units += u;
+        }
+
+        // We've counted up the worst-case number of register units claimed by all aliasing
+        // classes. Compare to the unit limit in this class.
+        if units < ulimit {
+            0
+        } else {
+            // Registers need to be spilled from any one of the aliasing classes.
+            ((1 << num) - 1) << first
+        }
+    }
+
+    /// Take a register from `rc`.
+    ///
+    /// This does not check if there are enough registers available.
+    pub fn take(&mut self, rc: RegClass) {
+        self.toprc.get_mut(rc.toprc as usize).map(
+            |t| t.base_count += 1,
+        );
+    }
+
+    /// Free a register in `rc`.
+    pub fn free(&mut self, rc: RegClass) {
+        self.toprc.get_mut(rc.toprc as usize).map(
+            |t| t.base_count -= 1,
+        );
+    }
+
+    /// Reset all counts to 0, both base and transient.
+    pub fn reset(&mut self) {
+        for e in &mut self.toprc {
+            e.base_count = 0;
+            e.transient_count = 0;
+        }
+    }
+
+    /// Try to increment a transient counter.
+    ///
+    /// This will fail if there are not enough registers available.
+    pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> {
+        let mask = self.check_avail(rc);
+        if mask == 0 {
+            self.toprc.get_mut(rc.toprc as usize).map(|t| {
+                t.transient_count += 1
+            });
+            Ok(())
+        } else {
+            Err(mask)
+        }
+    }
+
+    /// Reset all transient counts to 0.
+    pub fn reset_transient(&mut self) {
+        for e in &mut self.toprc {
+            e.transient_count = 0;
+        }
+    }
+
+    /// Preserve the transient counts by transferring them to the base counts.
+    pub fn preserve_transient(&mut self) {
+        for e in &mut self.toprc {
+            e.base_count += e.transient_count;
+            e.transient_count = 0;
+        }
+    }
+}
+
+impl fmt::Display for Pressure {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Pressure[")?;
+        for rc in &self.toprc {
+            if rc.limit > 0 && rc.limit < !0 {
+                write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?;
+            }
+        }
+        write!(f, " ]")
+    }
+}
+
+#[cfg(test)]
+#[cfg(build_arm32)]
+mod tests {
+    use super::Pressure;
+    use isa::{RegClass, TargetIsa};
+    use regalloc::RegisterSet;
+    use std::borrow::Borrow;
+    use std::boxed::Box;
+
+    // Make an arm32 `TargetIsa`, if possible.
+    fn arm32() -> Option<Box<TargetIsa>> {
+        use isa;
+        use settings;
+
+        let shared_builder = settings::builder();
+        let shared_flags = settings::Flags::new(&shared_builder);
+
+        isa::lookup("arm32").ok().map(|b| b.finish(shared_flags))
+    }
+
+    // Get a register class by name.
+    fn rc_by_name(isa: &TargetIsa, name: &str) -> RegClass {
+        isa.register_info()
+            .classes
+            .iter()
+            .find(|rc| rc.name == name)
+            .expect("Can't find named register class.")
+    }
+
+    #[test]
+    fn basic_counting() {
+        let isa = arm32().expect("This test requires arm32 support");
+        let isa = isa.borrow();
+        let gpr = rc_by_name(isa, "GPR");
+        let s = rc_by_name(isa, "S");
+        let reginfo = isa.register_info();
+        let regs = RegisterSet::new();
+
+        let mut pressure = Pressure::new(&reginfo, &regs);
+        let mut count = 0;
+        while pressure.check_avail(gpr) == 0 {
+            pressure.take(gpr);
+            count += 1;
+        }
+        assert_eq!(count, 16);
+        assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
+        assert_eq!(pressure.check_avail(s), 0);
+        pressure.free(gpr);
+        assert_eq!(pressure.check_avail(gpr), 0);
+        pressure.take(gpr);
+        assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
+        assert_eq!(pressure.check_avail(s), 0);
+        pressure.reset();
+        assert_eq!(pressure.check_avail(gpr), 0);
+        assert_eq!(pressure.check_avail(s), 0);
+    }
+
+    #[test]
+    fn arm_float_bank() {
+        let isa = arm32().expect("This test requires arm32 support");
+        let isa = isa.borrow();
+        let s = rc_by_name(isa, "S");
+        let d = rc_by_name(isa, "D");
+        let q = rc_by_name(isa, "Q");
+        let reginfo = isa.register_info();
+        let regs = RegisterSet::new();
+
+        let mut pressure = Pressure::new(&reginfo, &regs);
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        // Allocating a single S-register should not affect availability.
+        pressure.take(s);
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        pressure.take(d);
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        pressure.take(q);
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        // Take a total of 16 S-regs.
+        for _ in 1..16 {
+            pressure.take(s);
+        }
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        // We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs.
+        for _ in 0..6 {
+            assert_eq!(pressure.check_avail(d), 0);
+            assert_eq!(pressure.check_avail(q), 0);
+            pressure.take(q);
+        }
+
+        // We've taken 16 S, 1 D, and 7 Qs.
+        assert!(pressure.check_avail(s) != 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert!(pressure.check_avail(q) != 0);
+    }
+}
--- a/lib/codegen/src/regalloc/register_set.rs
+++ b/lib/codegen/src/regalloc/register_set.rs
@@ -0,0 +1,321 @@
+//! Set of allocatable registers as a bit vector of register units.
+//!
+//! While allocating registers, we need to keep track of which registers are available and which
+//! registers are in use. Since registers can alias in different ways, we track this via the
+//! "register unit" abstraction. Every register contains one or more register units. Registers that
+//! share a register unit can't be in use at the same time.
+
+use isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask};
+use std::char;
+use std::fmt;
+use std::iter::ExactSizeIterator;
+use std::mem::size_of_val;
+
+/// Set of registers available for allocation.
+#[derive(Clone)]
+pub struct RegisterSet {
+    avail: RegUnitMask,
+}
+
+// Given a register class and a register unit in the class, compute a word index and a bit mask of
+// register units representing that register.
+//
+// Note that a register is not allowed to straddle words.
+fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) {
+    // Bit mask representing the register. It is `rc.width` consecutive units.
+    let width_bits = (1 << rc.width) - 1;
+    // Index into avail[] of the word containing `reg`.
+    let word_index = (reg / 32) as usize;
+    // The actual bits in the word that cover `reg`.
+    let reg_bits = width_bits << (reg % 32);
+
+    (word_index, reg_bits)
+}
+
+impl RegisterSet {
+    /// Create a new register set with all registers available.
+    ///
+    /// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of
+    /// allocatable registers where reserved registers have been filtered out.
+    pub fn new() -> Self {
+        Self { avail: [!0; 3] }
+    }
+
+    /// Create a new register set with no registers available.
+    pub fn empty() -> Self {
+        Self { avail: [0; 3] }
+    }
+
+    /// Returns `true` if the specified register is available.
+    pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool {
+        let (idx, bits) = bitmask(rc, reg);
+        (self.avail[idx] & bits) == bits
+    }
+
+    /// Allocate `reg` from `rc` so it is no longer available.
+    ///
+    /// It is an error to take a register that doesn't have all of its register units available.
+    pub fn take(&mut self, rc: RegClass, reg: RegUnit) {
+        let (idx, bits) = bitmask(rc, reg);
+        debug_assert!(
+            (self.avail[idx] & bits) == bits,
+            "{}:{} not available in {}",
+            rc,
+            rc.info.display_regunit(reg),
+            self.display(rc.info)
+        );
+        self.avail[idx] &= !bits;
+    }
+
+    /// Return `reg` and all of its register units to the set of available registers.
+    pub fn free(&mut self, rc: RegClass, reg: RegUnit) {
+        let (idx, bits) = bitmask(rc, reg);
+        debug_assert!(
+            (self.avail[idx] & bits) == 0,
+            "{}:{} not allocated in {}",
+            rc,
+            rc.info.display_regunit(reg),
+            self.display(rc.info)
+        );
+        self.avail[idx] |= bits;
+    }
+
+    /// Return an iterator over all available registers belonging to the register class `rc`.
+    ///
+    /// This doesn't allocate anything from the set; use `take()` for that.
+    pub fn iter(&self, rc: RegClass) -> RegSetIter {
+        // Start by copying the RC mask. It is a single set bit for each register in the class.
+        let mut rsi = RegSetIter { regs: rc.mask };
+
+        // Mask out the unavailable units.
+        for idx in 0..self.avail.len() {
+            // If a single unit in a register is unavailable, the whole register can't be used.
+            // If a register straddles a word boundary, it will be marked as unavailable.
+            // There's an assertion in `cdsl/registers.py` to check for that.
+            for i in 0..rc.width {
+                rsi.regs[idx] &= self.avail[idx] >> i;
+            }
+        }
+        rsi
+    }
+
+    /// Check if any register units allocated out of this set interferes with units allocated out
+    /// of `other`.
+    ///
+    /// This assumes that unused bits are 1.
+    pub fn interferes_with(&self, other: &RegisterSet) -> bool {
+        self.avail.iter().zip(&other.avail).any(
+            |(&x, &y)| (x | y) != !0,
+        )
+    }
+
+    /// Intersect this set of registers with `other`. This has the effect of removing any register
+    /// units from this set that are not in `other`.
+    pub fn intersect(&mut self, other: &RegisterSet) {
+        for (x, &y) in self.avail.iter_mut().zip(&other.avail) {
+            *x &= y;
+        }
+    }
+
+    /// Return an object that can display this register set, using the register info from the
+    /// target ISA.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayRegisterSet<'a> {
+        DisplayRegisterSet(self.clone(), regs.into())
+    }
+}
+
+/// Iterator over available registers in a register class.
+pub struct RegSetIter {
+    regs: RegUnitMask,
+}
+
+impl Iterator for RegSetIter {
+    type Item = RegUnit;
+
+    fn next(&mut self) -> Option<RegUnit> {
+        let mut unit_offset = 0;
+
+        // Find the first set bit in `self.regs`.
+        for word in &mut self.regs {
+            if *word != 0 {
+                // Compute the register unit number from the lowest set bit in the word.
+                let unit = unit_offset + word.trailing_zeros() as RegUnit;
+
+                // Clear that lowest bit so we won't find it again.
+                *word &= *word - 1;
+
+                return Some(unit);
+            }
+            // How many register units was there in the word? This is a constant 32 for `u32` etc.
+            unit_offset += 8 * size_of_val(word) as RegUnit;
+        }
+
+        // All of `self.regs` is 0.
+        None
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum();
+        (bits, Some(bits))
+    }
+}
+
+impl ExactSizeIterator for RegSetIter {}
+
+/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
+pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayRegisterSet<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "[")?;
+        match self.1 {
+            None => {
+                for w in &self.0.avail {
+                    write!(f, " #{:08x}", w)?;
+                }
+            }
+            Some(reginfo) => {
+                let toprcs = reginfo
+                    .banks
+                    .iter()
+                    .map(|b| b.first_toprc + b.num_toprcs)
+                    .max()
+                    .expect("No register banks");
+                for rc in &reginfo.classes[0..toprcs] {
+                    if rc.width == 1 {
+                        let bank = &reginfo.banks[rc.bank as usize];
+                        write!(f, " {}: ", rc)?;
+                        for offset in 0..bank.units {
+                            let reg = bank.first_unit + offset;
+                            if !rc.contains(reg) {
+                                continue;
+                            }
+                            if !self.0.is_avail(rc, reg) {
+                                write!(f, "-")?;
+                                continue;
+                            }
+                            // Display individual registers as either the second letter of their
+                            // name or the last digit of their number.
+                            // This works for x86 (rax, rbx, ...) and for numbered regs.
+                            write!(
+                                f,
+                                "{}",
+                                bank.names
+                                    .get(offset as usize)
+                                    .and_then(|name| name.chars().nth(1))
+                                    .unwrap_or_else(
+                                        || char::from_digit(u32::from(offset % 10), 10).unwrap(),
+                                    )
+                            )?;
+                        }
+                    }
+                }
+            }
+        }
+        write!(f, " ]")
+    }
+}
+
+impl fmt::Display for RegisterSet {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.display(None).fmt(f)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use isa::registers::{RegClass, RegClassData};
+    use std::vec::Vec;
+
+    // Register classes for testing.
+    const GPR: RegClass = &RegClassData {
+        name: "GPR",
+        index: 0,
+        width: 1,
+        bank: 0,
+        toprc: 0,
+        first: 28,
+        subclasses: 0,
+        mask: [0xf0000000, 0x0000000f, 0],
+        info: &INFO,
+    };
+
+    const DPR: RegClass = &RegClassData {
+        name: "DPR",
+        index: 0,
+        width: 2,
+        bank: 0,
+        toprc: 0,
+        first: 28,
+        subclasses: 0,
+        mask: [0x50000000, 0x0000000a, 0],
+        info: &INFO,
+    };
+
+    const INFO: RegInfo = RegInfo {
+        banks: &[],
+        classes: &[],
+    };
+
+    #[test]
+    fn put_and_take() {
+        let mut regs = RegisterSet::new();
+
+        // `GPR` has units 28-36.
+        assert_eq!(regs.iter(GPR).len(), 8);
+        assert_eq!(regs.iter(GPR).count(), 8);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [28, 30, 33, 35]);
+
+        assert!(regs.is_avail(GPR, 29));
+        regs.take(&GPR, 29);
+        assert!(!regs.is_avail(GPR, 29));
+
+        assert_eq!(regs.iter(GPR).count(), 7);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
+
+        assert!(regs.is_avail(GPR, 30));
+        regs.take(&GPR, 30);
+        assert!(!regs.is_avail(GPR, 30));
+
+        assert_eq!(regs.iter(GPR).count(), 6);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
+
+        assert!(regs.is_avail(GPR, 32));
+        regs.take(&GPR, 32);
+        assert!(!regs.is_avail(GPR, 32));
+
+        assert_eq!(regs.iter(GPR).count(), 5);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
+
+        regs.free(&GPR, 30);
+        assert!(regs.is_avail(GPR, 30));
+        assert!(!regs.is_avail(GPR, 29));
+        assert!(!regs.is_avail(GPR, 32));
+
+        assert_eq!(regs.iter(GPR).count(), 6);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
+
+        regs.free(&GPR, 32);
+        assert!(regs.is_avail(GPR, 31));
+        assert!(!regs.is_avail(GPR, 29));
+        assert!(regs.is_avail(GPR, 32));
+
+        assert_eq!(regs.iter(GPR).count(), 7);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
+    }
+
+    #[test]
+    fn interference() {
+        let mut regs1 = RegisterSet::new();
+        let mut regs2 = RegisterSet::new();
+
+        assert!(!regs1.interferes_with(&regs2));
+        regs1.take(&GPR, 32);
+        assert!(!regs1.interferes_with(&regs2));
+        regs2.take(&GPR, 31);
+        assert!(!regs1.interferes_with(&regs2));
+        regs1.intersect(&regs2);
+        assert!(regs1.interferes_with(&regs2));
+    }
+}
--- a/lib/codegen/src/regalloc/reload.rs
+++ b/lib/codegen/src/regalloc/reload.rs
@@ -0,0 +1,390 @@
+//! Reload pass
+//!
+//! The reload pass runs between the spilling and coloring passes. Its primary responsibility is to
+//! insert `spill` and `fill` instructions such that instruction operands expecting a register will
+//! get a value with register affinity, and operands expecting a stack slot will get a value with
+//! stack affinity.
+//!
+//! The secondary responsibility of the reload pass is to reuse values in registers as much as
+//! possible to minimize the number of `fill` instructions needed. This must not cause the register
+//! pressure limits to be exceeded.
+
+use cursor::{Cursor, EncCursor};
+use dominator_tree::DominatorTree;
+use entity::{SparseMap, SparseMapValue};
+use ir::{AbiParam, ArgumentLoc, InstBuilder};
+use ir::{Ebb, Function, Inst, Value};
+use isa::RegClass;
+use isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa};
+use regalloc::affinity::Affinity;
+use regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
+use regalloc::liveness::Liveness;
+use std::vec::Vec;
+use timing;
+use topo_order::TopoOrder;
+
+/// Reusable data structures for the reload pass.
+pub struct Reload {
+    candidates: Vec<ReloadCandidate>,
+    reloads: SparseMap<Value, ReloadedValue>,
+}
+
+/// Context data structure that gets instantiated once per pass.
+struct Context<'a> {
+    cur: EncCursor<'a>,
+
+    // Cached ISA information.
+    // We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object.
+    encinfo: EncInfo,
+
+    // References to contextual data structures we need.
+    domtree: &'a DominatorTree,
+    liveness: &'a mut Liveness,
+    topo: &'a mut TopoOrder,
+
+    candidates: &'a mut Vec<ReloadCandidate>,
+    reloads: &'a mut SparseMap<Value, ReloadedValue>,
+}
+
+impl Reload {
+    /// Create a new blank reload pass.
+    pub fn new() -> Self {
+        Self {
+            candidates: Vec::new(),
+            reloads: SparseMap::new(),
+        }
+    }
+
+    /// Clear all data structures in this reload pass.
+    pub fn clear(&mut self) {
+        self.candidates.clear();
+        self.reloads.clear();
+    }
+
+    /// Run the reload algorithm over `func`.
+    pub fn run(
+        &mut self,
+        isa: &TargetIsa,
+        func: &mut Function,
+        domtree: &DominatorTree,
+        liveness: &mut Liveness,
+        topo: &mut TopoOrder,
+        tracker: &mut LiveValueTracker,
+    ) {
+        let _tt = timing::ra_reload();
+        dbg!("Reload for:\n{}", func.display(isa));
+        let mut ctx = Context {
+            cur: EncCursor::new(func, isa),
+            encinfo: isa.encoding_info(),
+            domtree,
+            liveness,
+            topo,
+            candidates: &mut self.candidates,
+            reloads: &mut self.reloads,
+        };
+        ctx.run(tracker)
+    }
+}
+
+/// A reload candidate.
+///
+/// This represents a stack value that is used by the current instruction where a register is
+/// needed.
+struct ReloadCandidate {
+    argidx: usize,
+    value: Value,
+    regclass: RegClass,
+}
+
+/// A Reloaded value.
+///
+/// This represents a value that has been reloaded into a register value from the stack.
+struct ReloadedValue {
+    stack: Value,
+    reg: Value,
+}
+
+impl SparseMapValue<Value> for ReloadedValue {
+    fn key(&self) -> Value {
+        self.stack
+    }
+}
+
+impl<'a> Context<'a> {
+    fn run(&mut self, tracker: &mut LiveValueTracker) {
+        self.topo.reset(self.cur.func.layout.ebbs());
+        while let Some(ebb) = self.topo.next(&self.cur.func.layout, self.domtree) {
+            self.visit_ebb(ebb, tracker);
+        }
+    }
+
+    fn visit_ebb(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
+        dbg!("Reloading {}:", ebb);
+        self.visit_ebb_header(ebb, tracker);
+        tracker.drop_dead_params();
+
+        // visit_ebb_header() places us at the first interesting instruction in the EBB.
+        while let Some(inst) = self.cur.current_inst() {
+            let encoding = self.cur.func.encodings[inst];
+            if encoding.is_legal() {
+                self.visit_inst(ebb, inst, encoding, tracker);
+                tracker.drop_dead(inst);
+            } else {
+                self.cur.next_inst();
+            }
+        }
+    }
+
+    /// Process the EBB parameters. Move to the next instruction in the EBB to be processed
+    fn visit_ebb_header(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
+        let (liveins, args) = tracker.ebb_top(
+            ebb,
+            &self.cur.func.dfg,
+            self.liveness,
+            &self.cur.func.layout,
+            self.domtree,
+        );
+
+        if self.cur.func.layout.entry_block() == Some(ebb) {
+            debug_assert_eq!(liveins.len(), 0);
+            self.visit_entry_params(ebb, args);
+        } else {
+            self.visit_ebb_params(ebb, args);
+        }
+    }
+
+    /// Visit the parameters on the entry block.
+    /// These values have ABI constraints from the function signature.
+    fn visit_entry_params(&mut self, ebb: Ebb, args: &[LiveValue]) {
+        debug_assert_eq!(self.cur.func.signature.params.len(), args.len());
+        self.cur.goto_first_inst(ebb);
+
+        for (arg_idx, arg) in args.iter().enumerate() {
+            let abi = self.cur.func.signature.params[arg_idx];
+            match abi.location {
+                ArgumentLoc::Reg(_) => {
+                    if arg.affinity.is_stack() {
+                        // An incoming register parameter was spilled. Replace the parameter value
+                        // with a temporary register value that is immediately spilled.
+                        let reg = self.cur.func.dfg.replace_ebb_param(
+                            arg.value,
+                            abi.value_type,
+                        );
+                        let affinity = Affinity::abi(&abi, self.cur.isa);
+                        self.liveness.create_dead(reg, ebb, affinity);
+                        self.insert_spill(ebb, arg.value, reg);
+                    }
+                }
+                ArgumentLoc::Stack(_) => {
+                    debug_assert!(arg.affinity.is_stack());
+                }
+                ArgumentLoc::Unassigned => panic!("Unexpected ABI location"),
+            }
+        }
+    }
+
+    fn visit_ebb_params(&mut self, ebb: Ebb, _args: &[LiveValue]) {
+        self.cur.goto_first_inst(ebb);
+    }
+
+    /// Process the instruction pointed to by `pos`, and advance the cursor to the next instruction
+    /// that needs processing.
+    fn visit_inst(
+        &mut self,
+        ebb: Ebb,
+        inst: Inst,
+        encoding: Encoding,
+        tracker: &mut LiveValueTracker,
+    ) {
+        self.cur.use_srcloc(inst);
+
+        // Get the operand constraints for `inst` that we are trying to satisfy.
+        let constraints = self.encinfo.operand_constraints(encoding).expect(
+            "Missing instruction encoding",
+        );
+
+        // Identify reload candidates.
+        debug_assert!(self.candidates.is_empty());
+        self.find_candidates(inst, constraints);
+
+        // Insert fill instructions before `inst` and replace `cand.value` with the filled value.
+        for cand in self.candidates.iter_mut() {
+            if let Some(reload) = self.reloads.get(cand.value) {
+                cand.value = reload.reg;
+                continue;
+            }
+
+            let reg = self.cur.ins().fill(cand.value);
+            let fill = self.cur.built_inst();
+
+            self.reloads.insert(ReloadedValue {
+                stack: cand.value,
+                reg: reg,
+            });
+            cand.value = reg;
+
+            // Create a live range for the new reload.
+            let affinity = Affinity::Reg(cand.regclass.into());
+            self.liveness.create_dead(reg, fill, affinity);
+            self.liveness.extend_locally(
+                reg,
+                ebb,
+                inst,
+                &self.cur.func.layout,
+            );
+        }
+
+        // Rewrite instruction arguments.
+        //
+        // Only rewrite those arguments that were identified as candidates. This leaves EBB
+        // arguments on branches as-is without rewriting them. A spilled EBB argument needs to stay
+        // spilled because the matching EBB parameter is going to be in the same virtual register
+        // and therefore the same stack slot as the EBB argument value.
+        if !self.candidates.is_empty() {
+            let args = self.cur.func.dfg.inst_args_mut(inst);
+            while let Some(cand) = self.candidates.pop() {
+                args[cand.argidx] = cand.value;
+            }
+        }
+
+        // TODO: Reuse reloads for future instructions.
+        self.reloads.clear();
+
+        let (_throughs, _kills, defs) =
+            tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
+
+        // Advance to the next instruction so we can insert any spills after the instruction.
+        self.cur.next_inst();
+
+        // Rewrite register defs that need to be spilled.
+        //
+        // Change:
+        //
+        // v2 = inst ...
+        //
+        // Into:
+        //
+        // v7 = inst ...
+        // v2 = spill v7
+        //
+        // That way, we don't need to rewrite all future uses of v2.
+        for (lv, op) in defs.iter().zip(constraints.outs) {
+            if lv.affinity.is_stack() && op.kind != ConstraintKind::Stack {
+                let value_type = self.cur.func.dfg.value_type(lv.value);
+                let reg = self.cur.func.dfg.replace_result(lv.value, value_type);
+                self.liveness.create_dead(reg, inst, Affinity::new(op));
+                self.insert_spill(ebb, lv.value, reg);
+            }
+        }
+
+        // Same thing for spilled call return values.
+        let retvals = &defs[constraints.outs.len()..];
+        if !retvals.is_empty() {
+            let sig = self.cur.func.dfg.call_signature(inst).expect(
+                "Extra results on non-call instruction",
+            );
+            for (i, lv) in retvals.iter().enumerate() {
+                let abi = self.cur.func.dfg.signatures[sig].returns[i];
+                debug_assert!(abi.location.is_reg());
+                if lv.affinity.is_stack() {
+                    let reg = self.cur.func.dfg.replace_result(lv.value, abi.value_type);
+                    self.liveness.create_dead(
+                        reg,
+                        inst,
+                        Affinity::abi(&abi, self.cur.isa),
+                    );
+                    self.insert_spill(ebb, lv.value, reg);
+                }
+            }
+        }
+    }
+
+    // Find reload candidates for `inst` and add them to `self.candidates`.
+    //
+    // These are uses of spilled values where the operand constraint requires a register.
+    fn find_candidates(&mut self, inst: Inst, constraints: &RecipeConstraints) {
+        let args = self.cur.func.dfg.inst_args(inst);
+
+        for (argidx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
+            if op.kind != ConstraintKind::Stack && self.liveness[arg].affinity.is_stack() {
+                self.candidates.push(ReloadCandidate {
+                    argidx,
+                    value: arg,
+                    regclass: op.regclass,
+                })
+            }
+        }
+
+        // If we only have the fixed arguments, we're done now.
+        let offset = constraints.ins.len();
+        if args.len() == offset {
+            return;
+        }
+        let var_args = &args[offset..];
+
+        // Handle ABI arguments.
+        if let Some(sig) = self.cur.func.dfg.call_signature(inst) {
+            handle_abi_args(
+                self.candidates,
+                &self.cur.func.dfg.signatures[sig].params,
+                var_args,
+                offset,
+                self.cur.isa,
+                self.liveness,
+            );
+        } else if self.cur.func.dfg[inst].opcode().is_return() {
+            handle_abi_args(
+                self.candidates,
+                &self.cur.func.signature.returns,
+                var_args,
+                offset,
+                self.cur.isa,
+                self.liveness,
+            );
+        }
+    }
+
+    /// Insert a spill at `pos` and update data structures.
+    ///
+    /// - Insert `stack = spill reg` at `pos`, and assign an encoding.
+    /// - Move the `stack` live range starting point to the new instruction.
+    /// - Extend the `reg` live range to reach the new instruction.
+    fn insert_spill(&mut self, ebb: Ebb, stack: Value, reg: Value) {
+        self.cur.ins().with_result(stack).spill(reg);
+        let inst = self.cur.built_inst();
+
+        // Update live ranges.
+        self.liveness.move_def_locally(stack, inst);
+        self.liveness.extend_locally(
+            reg,
+            ebb,
+            inst,
+            &self.cur.func.layout,
+        );
+    }
+}
+
+/// Find reload candidates in the instruction's ABI variable arguments. This handles both
+/// return values and call arguments.
+fn handle_abi_args(
+    candidates: &mut Vec<ReloadCandidate>,
+    abi_types: &[AbiParam],
+    var_args: &[Value],
+    offset: usize,
+    isa: &TargetIsa,
+    liveness: &Liveness,
+) {
+    debug_assert_eq!(abi_types.len(), var_args.len());
+    for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) {
+        if abi.location.is_reg() {
+            let lv = liveness.get(arg).expect("Missing live range for ABI arg");
+            if lv.affinity.is_stack() {
+                candidates.push(ReloadCandidate {
+                    argidx,
+                    value: arg,
+                    regclass: isa.regclass_for_abi_type(abi.value_type),
+                });
+            }
+        }
+    }
+}
--- a/lib/codegen/src/regalloc/solver.rs
+++ b/lib/codegen/src/regalloc/solver.rs
--- a/lib/codegen/src/regalloc/spilling.rs
+++ b/lib/codegen/src/regalloc/spilling.rs
@@ -0,0 +1,596 @@
+//! Spilling pass.
+//!
+//! The spilling pass is the first to run after the liveness analysis. Its primary function is to
+//! ensure that the register pressure never exceeds the number of available registers by moving
+//! some SSA values to spill slots on the stack. This is encoded in the affinity of the value's
+//! live range.
+//!
+//! Some instruction operand constraints may require additional registers to resolve. Since this
+//! can cause spilling, the spilling pass is also responsible for resolving those constraints by
+//! inserting copies. The extra constraints are:
+//!
+//! 1. A value used by a tied operand must be killed by the instruction. This is resolved by
+//!    inserting a copy to a temporary value when necessary.
+//! 2. When the same value is used more than once by an instruction, the operand constraints must
+//!    be compatible. Otherwise, the value must be copied into a new register for some of the
+//!    operands.
+
+use cursor::{Cursor, EncCursor};
+use dominator_tree::DominatorTree;
+use ir::{Ebb, Function, Inst, InstBuilder, SigRef, Value, ValueLoc};
+use isa::registers::{RegClassIndex, RegClassMask};
+use isa::{ConstraintKind, EncInfo, RecipeConstraints, RegInfo, TargetIsa};
+use regalloc::affinity::Affinity;
+use regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
+use regalloc::liveness::Liveness;
+use regalloc::pressure::Pressure;
+use regalloc::virtregs::VirtRegs;
+use std::fmt;
+use std::vec::Vec;
+use timing;
+use topo_order::TopoOrder;
+
+/// Persistent data structures for the spilling pass.
+pub struct Spilling {
+    spills: Vec<Value>,
+    reg_uses: Vec<RegUse>,
+}
+
+/// Context data structure that gets instantiated once per pass.
+struct Context<'a> {
+    // Current instruction as well as reference to function and ISA.
+    cur: EncCursor<'a>,
+
+    // Cached ISA information.
+    reginfo: RegInfo,
+    encinfo: EncInfo,
+
+    // References to contextual data structures we need.
+    domtree: &'a DominatorTree,
+    liveness: &'a mut Liveness,
+    virtregs: &'a VirtRegs,
+    topo: &'a mut TopoOrder,
+
+    // Current register pressure.
+    pressure: Pressure,
+
+    // Values spilled for the current instruction. These values have already been removed from the
+    // pressure tracker, but they are still present in the live value tracker and their affinity
+    // hasn't been changed yet.
+    spills: &'a mut Vec<Value>,
+
+    // Uses of register values in the current instruction.
+    reg_uses: &'a mut Vec<RegUse>,
+}
+
+impl Spilling {
+    /// Create a new spilling data structure.
+    pub fn new() -> Self {
+        Self {
+            spills: Vec::new(),
+            reg_uses: Vec::new(),
+        }
+    }
+
+    /// Clear all data structures in this spilling pass.
+    pub fn clear(&mut self) {
+        self.spills.clear();
+        self.reg_uses.clear();
+    }
+
+    /// Run the spilling algorithm over `func`.
+    pub fn run(
+        &mut self,
+        isa: &TargetIsa,
+        func: &mut Function,
+        domtree: &DominatorTree,
+        liveness: &mut Liveness,
+        virtregs: &VirtRegs,
+        topo: &mut TopoOrder,
+        tracker: &mut LiveValueTracker,
+    ) {
+        let _tt = timing::ra_spilling();
+        dbg!("Spilling for:\n{}", func.display(isa));
+        let reginfo = isa.register_info();
+        let usable_regs = isa.allocatable_registers(func);
+        let mut ctx = Context {
+            cur: EncCursor::new(func, isa),
+            reginfo: isa.register_info(),
+            encinfo: isa.encoding_info(),
+            domtree,
+            liveness,
+            virtregs,
+            topo,
+            pressure: Pressure::new(&reginfo, &usable_regs),
+            spills: &mut self.spills,
+            reg_uses: &mut self.reg_uses,
+        };
+        ctx.run(tracker)
+    }
+}
+
+impl<'a> Context<'a> {
+    fn run(&mut self, tracker: &mut LiveValueTracker) {
+        self.topo.reset(self.cur.func.layout.ebbs());
+        while let Some(ebb) = self.topo.next(&self.cur.func.layout, self.domtree) {
+            self.visit_ebb(ebb, tracker);
+        }
+    }
+
+    fn visit_ebb(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
+        dbg!("Spilling {}:", ebb);
+        self.cur.goto_top(ebb);
+        self.visit_ebb_header(ebb, tracker);
+        tracker.drop_dead_params();
+        self.process_spills(tracker);
+
+        while let Some(inst) = self.cur.next_inst() {
+            if let Some(constraints) =
+                self.encinfo.operand_constraints(
+                    self.cur.func.encodings[inst],
+                )
+            {
+                self.visit_inst(inst, ebb, constraints, tracker);
+            } else {
+                let (_throughs, kills) = tracker.process_ghost(inst);
+                self.free_regs(kills);
+            }
+            tracker.drop_dead(inst);
+            self.process_spills(tracker);
+        }
+    }
+
+    // Take all live registers in `regs` from the pressure set.
+    // This doesn't cause any spilling, it is assumed there are enough registers.
+    fn take_live_regs(&mut self, regs: &[LiveValue]) {
+        for lv in regs {
+            if !lv.is_dead {
+                if let Affinity::Reg(rci) = lv.affinity {
+                    let rc = self.reginfo.rc(rci);
+                    self.pressure.take(rc);
+                }
+            }
+        }
+    }
+
+    // Free all registers in `kills` from the pressure set.
+    fn free_regs(&mut self, kills: &[LiveValue]) {
+        for lv in kills {
+            if let Affinity::Reg(rci) = lv.affinity {
+                if !self.spills.contains(&lv.value) {
+                    let rc = self.reginfo.rc(rci);
+                    self.pressure.free(rc);
+                }
+            }
+        }
+    }
+
+    // Free all dead registers in `regs` from the pressure set.
+    fn free_dead_regs(&mut self, regs: &[LiveValue]) {
+        for lv in regs {
+            if lv.is_dead {
+                if let Affinity::Reg(rci) = lv.affinity {
+                    if !self.spills.contains(&lv.value) {
+                        let rc = self.reginfo.rc(rci);
+                        self.pressure.free(rc);
+                    }
+                }
+            }
+        }
+    }
+
+    fn visit_ebb_header(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
+        let (liveins, params) = tracker.ebb_top(
+            ebb,
+            &self.cur.func.dfg,
+            self.liveness,
+            &self.cur.func.layout,
+            self.domtree,
+        );
+
+        // Count the live-in registers. These should already fit in registers; they did at the
+        // dominator.
+        self.pressure.reset();
+        self.take_live_regs(liveins);
+
+        // An EBB can have an arbitrary (up to 2^16...) number of parameters, so they are not
+        // guaranteed to fit in registers.
+        for lv in params {
+            if let Affinity::Reg(rci) = lv.affinity {
+                let rc = self.reginfo.rc(rci);
+                'try_take: while let Err(mask) = self.pressure.take_transient(rc) {
+                    dbg!("Need {} reg for EBB param {}", rc, lv.value);
+                    match self.spill_candidate(mask, liveins) {
+                        Some(cand) => {
+                            dbg!(
+                                "Spilling live-in {} to make room for {} EBB param {}",
+                                cand,
+                                rc,
+                                lv.value
+                            );
+                            self.spill_reg(cand);
+                        }
+                        None => {
+                            // We can't spill any of the live-in registers, so we have to spill an
+                            // EBB argument. Since the current spill metric would consider all the
+                            // EBB arguments equal, just spill the present register.
+                            dbg!("Spilling {} EBB argument {}", rc, lv.value);
+
+                            // Since `spill_reg` will free a register, add the current one here.
+                            self.pressure.take(rc);
+                            self.spill_reg(lv.value);
+                            break 'try_take;
+                        }
+                    }
+                }
+            }
+        }
+
+        // The transient pressure counts for the EBB arguments are accurate. Just preserve them.
+        self.pressure.preserve_transient();
+        self.free_dead_regs(params);
+    }
+
+    fn visit_inst(
+        &mut self,
+        inst: Inst,
+        ebb: Ebb,
+        constraints: &RecipeConstraints,
+        tracker: &mut LiveValueTracker,
+    ) {
+        dbg!("Inst {}, {}", self.cur.display_inst(inst), self.pressure);
+        debug_assert_eq!(self.cur.current_inst(), Some(inst));
+        debug_assert_eq!(self.cur.current_ebb(), Some(ebb));
+
+        // We may need to resolve register constraints if there are any noteworthy uses.
+        debug_assert!(self.reg_uses.is_empty());
+        self.collect_reg_uses(inst, ebb, constraints);
+
+        // Calls usually have fixed register uses.
+        let call_sig = self.cur.func.dfg.call_signature(inst);
+        if let Some(sig) = call_sig {
+            self.collect_abi_reg_uses(inst, sig);
+        }
+
+        if !self.reg_uses.is_empty() {
+            self.process_reg_uses(inst, tracker);
+        }
+
+        // Update the live value tracker with this instruction.
+        let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
+
+        // Remove kills from the pressure tracker.
+        self.free_regs(kills);
+
+        // If inst is a call, spill all register values that are live across the call.
+        // This means that we don't currently take advantage of callee-saved registers.
+        // TODO: Be more sophisticated.
+        if call_sig.is_some() {
+            for lv in throughs {
+                if lv.affinity.is_reg() && !self.spills.contains(&lv.value) {
+                    self.spill_reg(lv.value);
+                }
+            }
+        }
+
+        // Make sure we have enough registers for the register defs.
+        // Dead defs are included here. They need a register too.
+        // No need to process call return values, they are in fixed registers.
+        for op in constraints.outs {
+            if op.kind != ConstraintKind::Stack {
+                // Add register def to pressure, spill if needed.
+                while let Err(mask) = self.pressure.take_transient(op.regclass) {
+                    dbg!("Need {} reg from {} throughs", op.regclass, throughs.len());
+                    match self.spill_candidate(mask, throughs) {
+                        Some(cand) => self.spill_reg(cand),
+                        None => {
+                            panic!(
+                                "Ran out of {} registers for {}",
+                                op.regclass,
+                                self.cur.display_inst(inst)
+                            )
+                        }
+                    }
+                }
+            }
+        }
+        self.pressure.reset_transient();
+
+        // Restore pressure state, compute pressure with affinities from `defs`.
+        // Exclude dead defs. Includes call return values.
+        // This won't cause spilling.
+        self.take_live_regs(defs);
+    }
+
+    // Collect register uses that are noteworthy in one of the following ways:
+    //
+    // 1. It's a fixed register constraint.
+    // 2. It's a use of a spilled value.
+    // 3. It's a tied register constraint and the value isn't killed.
+    //
+    // We are assuming here that if a value is used both by a fixed register operand and a register
+    // class operand, they two are compatible. We are also assuming that two register class
+    // operands are always compatible.
+    fn collect_reg_uses(&mut self, inst: Inst, ebb: Ebb, constraints: &RecipeConstraints) {
+        let args = self.cur.func.dfg.inst_args(inst);
+        for (idx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
+            let mut reguse = RegUse::new(arg, idx, op.regclass.into());
+            let lr = &self.liveness[arg];
+            let ctx = self.liveness.context(&self.cur.func.layout);
+            match op.kind {
+                ConstraintKind::Stack => continue,
+                ConstraintKind::FixedReg(_) => reguse.fixed = true,
+                ConstraintKind::Tied(_) => {
+                    // A tied operand must kill the used value.
+                    reguse.tied = !lr.killed_at(inst, ebb, ctx);
+                }
+                ConstraintKind::FixedTied(_) => {
+                    reguse.fixed = true;
+                    reguse.tied = !lr.killed_at(inst, ebb, ctx);
+                }
+                ConstraintKind::Reg => {}
+            }
+            if lr.affinity.is_stack() {
+                reguse.spilled = true;
+            }
+
+            // Only collect the interesting register uses.
+            if reguse.fixed || reguse.tied || reguse.spilled {
+                dbg!("  reguse: {}", reguse);
+                self.reg_uses.push(reguse);
+            }
+        }
+    }
+
+    // Collect register uses from the ABI input constraints.
+    fn collect_abi_reg_uses(&mut self, inst: Inst, sig: SigRef) {
+        let fixed_args = self.cur.func.dfg[inst]
+            .opcode()
+            .constraints()
+            .fixed_value_arguments();
+        let args = self.cur.func.dfg.inst_variable_args(inst);
+        for (idx, (abi, &arg)) in
+            self.cur.func.dfg.signatures[sig]
+                .params
+                .iter()
+                .zip(args)
+                .enumerate()
+        {
+            if abi.location.is_reg() {
+                let (rci, spilled) = match self.liveness[arg].affinity {
+                    Affinity::Reg(rci) => (rci, false),
+                    Affinity::Stack => (
+                        self.cur.isa.regclass_for_abi_type(abi.value_type).into(),
+                        true,
+                    ),
+                    Affinity::None => panic!("Missing affinity for {}", arg),
+                };
+                let mut reguse = RegUse::new(arg, fixed_args + idx, rci);
+                reguse.fixed = true;
+                reguse.spilled = spilled;
+                self.reg_uses.push(reguse);
+            }
+        }
+    }
+
+    // Process multiple register uses to resolve potential conflicts.
+    //
+    // Look for multiple uses of the same value in `self.reg_uses` and insert copies as necessary.
+    // Trigger spilling if any of the temporaries cause the register pressure to become too high.
+    //
+    // Leave `self.reg_uses` empty.
+    fn process_reg_uses(&mut self, inst: Inst, tracker: &LiveValueTracker) {
+        // We're looking for multiple uses of the same value, so start by sorting by value. The
+        // secondary `opidx` key makes it possible to use an unstable (non-allocating) sort.
+        self.reg_uses.sort_unstable_by_key(|u| (u.value, u.opidx));
+
+        for i in 0..self.reg_uses.len() {
+            let ru = self.reg_uses[i];
+
+            // Do we need to insert a copy for this use?
+            let need_copy = if ru.tied {
+                true
+            } else if ru.fixed {
+                // This is a fixed register use which doesn't necessarily require a copy.
+                // Make a copy only if this is not the first use of the value.
+                self.reg_uses
+                    .get(i.wrapping_sub(1))
+                    .map(|ru2| ru2.value == ru.value)
+                    .unwrap_or(false)
+            } else {
+                false
+            };
+
+            if need_copy {
+                let copy = self.insert_copy(ru.value, ru.rci);
+                self.cur.func.dfg.inst_args_mut(inst)[ru.opidx as usize] = copy;
+            }
+
+            // Even if we don't insert a copy, we may need to account for register pressure for the
+            // reload pass.
+            if need_copy || ru.spilled {
+                let rc = self.reginfo.rc(ru.rci);
+                while let Err(mask) = self.pressure.take_transient(rc) {
+                    dbg!("Copy of {} reg causes spill", rc);
+                    // Spill a live register that is *not* used by the current instruction.
+                    // Spilling a use wouldn't help.
+                    //
+                    // Do allow spilling of EBB arguments on branches. This is safe since we spill
+                    // the whole virtual register which includes the matching EBB parameter value
+                    // at the branch destination. It is also necessary since there can be
+                    // arbitrarily many EBB arguments.
+                    match {
+                        let args = if self.cur.func.dfg[inst].opcode().is_branch() {
+                            self.cur.func.dfg.inst_fixed_args(inst)
+                        } else {
+                            self.cur.func.dfg.inst_args(inst)
+                        };
+                        self.spill_candidate(
+                            mask,
+                            tracker.live().iter().filter(|lv| !args.contains(&lv.value)),
+                        )
+                    } {
+                        Some(cand) => self.spill_reg(cand),
+                        None => {
+                            panic!(
+                                "Ran out of {} registers when inserting copy before {}",
+                                rc,
+                                self.cur.display_inst(inst)
+                            )
+                        }
+                    }
+                }
+            }
+        }
+        self.pressure.reset_transient();
+        self.reg_uses.clear()
+    }
+
+    // Find a spill candidate from `candidates` whose top-level register class is in `mask`.
+    fn spill_candidate<'ii, II>(&self, mask: RegClassMask, candidates: II) -> Option<Value>
+    where
+        II: IntoIterator<Item = &'ii LiveValue>,
+    {
+        // Find the best viable spill candidate.
+        //
+        // The very simple strategy implemented here is to spill the value with the earliest def in
+        // the reverse post-order. This strategy depends on a good reload pass to generate good
+        // code.
+        //
+        // We know that all candidate defs dominate the current instruction, so one of them will
+        // dominate the others. That is the earliest def.
+        candidates
+            .into_iter()
+            .filter_map(|lv| {
+                // Viable candidates are registers in one of the `mask` classes, and not already in
+                // the spill set.
+                if let Affinity::Reg(rci) = lv.affinity {
+                    let rc = self.reginfo.rc(rci);
+                    if (mask & (1 << rc.toprc)) != 0 && !self.spills.contains(&lv.value) {
+                        // Here, `lv` is a viable spill candidate.
+                        return Some(lv.value);
+                    }
+                }
+                None
+            })
+            .min_by(|&a, &b| {
+                // Find the minimum candidate according to the RPO of their defs.
+                self.domtree.rpo_cmp(
+                    self.cur.func.dfg.value_def(a),
+                    self.cur.func.dfg.value_def(b),
+                    &self.cur.func.layout,
+                )
+            })
+    }
+
+    /// Spill `value` immediately by
+    ///
+    /// 1. Changing its affinity to `Stack` which marks the spill.
+    /// 2. Removing the value from the pressure tracker.
+    /// 3. Adding the value to `self.spills` for later reference by `process_spills`.
+    ///
+    /// Note that this does not update the cached affinity in the live value tracker. Call
+    /// `process_spills` to do that.
+    fn spill_reg(&mut self, value: Value) {
+        if let Affinity::Reg(rci) = self.liveness.spill(value) {
+            let rc = self.reginfo.rc(rci);
+            self.pressure.free(rc);
+            self.spills.push(value);
+            dbg!("Spilled {}:{} -> {}", value, rc, self.pressure);
+        } else {
+            panic!("Cannot spill {} that was already on the stack", value);
+        }
+
+        // Assign a spill slot for the whole virtual register.
+        let ss = self.cur.func.stack_slots.make_spill_slot(
+            self.cur.func.dfg.value_type(value),
+        );
+        for &v in self.virtregs.congruence_class(&value) {
+            self.liveness.spill(v);
+            self.cur.func.locations[v] = ValueLoc::Stack(ss);
+        }
+    }
+
+    /// Process any pending spills in the `self.spills` vector.
+    ///
+    /// It is assumed that spills are removed from the pressure tracker immediately, see
+    /// `spill_reg` above.
+    ///
+    /// We also need to update the live range affinity and remove spilled values from the live
+    /// value tracker.
+    fn process_spills(&mut self, tracker: &mut LiveValueTracker) {
+        if !self.spills.is_empty() {
+            tracker.process_spills(|v| self.spills.contains(&v));
+            self.spills.clear()
+        }
+    }
+
+    /// Insert a `copy value` before the current instruction and give it a live range extending to
+    /// the current instruction.
+    ///
+    /// Returns the new local value created.
+    fn insert_copy(&mut self, value: Value, rci: RegClassIndex) -> Value {
+        let copy = self.cur.ins().copy(value);
+        let inst = self.cur.built_inst();
+
+        // Update live ranges.
+        self.liveness.create_dead(copy, inst, Affinity::Reg(rci));
+        self.liveness.extend_locally(
+            copy,
+            self.cur.func.layout.pp_ebb(inst),
+            self.cur.current_inst().expect("must be at an instruction"),
+            &self.cur.func.layout,
+        );
+
+        copy
+    }
+}
+
+/// Struct representing a register use of a value.
+/// Used to detect multiple uses of the same value with incompatible register constraints.
+#[derive(Clone, Copy)]
+struct RegUse {
+    value: Value,
+    opidx: u16,
+
+    // Register class required by the use.
+    rci: RegClassIndex,
+
+    // A use with a fixed register constraint.
+    fixed: bool,
+
+    // A register use of a spilled value.
+    spilled: bool,
+
+    // A use with a tied register constraint *and* the used value is not killed.
+    tied: bool,
+}
+
+impl RegUse {
+    fn new(value: Value, idx: usize, rci: RegClassIndex) -> RegUse {
+        RegUse {
+            value,
+            opidx: idx as u16,
+            rci,
+            fixed: false,
+            spilled: false,
+            tied: false,
+        }
+    }
+}
+
+impl fmt::Display for RegUse {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}@op{}", self.value, self.opidx)?;
+        if self.fixed {
+            write!(f, "/fixed")?;
+        }
+        if self.spilled {
+            write!(f, "/spilled")?;
+        }
+        if self.tied {
+            write!(f, "/tied")?;
+        }
+        Ok(())
+    }
+}
--- a/lib/codegen/src/regalloc/virtregs.rs
+++ b/lib/codegen/src/regalloc/virtregs.rs
@@ -0,0 +1,503 @@
+//! Virtual registers.
+//!
+//! A virtual register is a set of related SSA values whose live ranges don't interfere. If all the
+//! values in a virtual register are assigned to the same location, fewer copies will result in the
+//! output.
+//!
+//! A virtual register is typically built by merging together SSA values that are "phi-related" -
+//! that is, one value is passed as an EBB argument to a branch and the other is the EBB parameter
+//! value itself.
+//!
+//! If any values in a virtual register are spilled, they will use the same stack slot. This avoids
+//! memory-to-memory copies when a spilled value is passed as an EBB argument.
+
+use dbg::DisplayList;
+use dominator_tree::DominatorTreePreorder;
+use entity::EntityRef;
+use entity::{EntityList, ListPool};
+use entity::{EntityMap, Keys, PrimaryMap};
+use ir::{Function, Value};
+use packed_option::PackedOption;
+use ref_slice::ref_slice;
+use std::cmp::Ordering;
+use std::fmt;
+use std::vec::Vec;
+
+/// A virtual register reference.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct VirtReg(u32);
+entity_impl!(VirtReg, "vreg");
+
+type ValueList = EntityList<Value>;
+
+/// Collection of virtual registers.
+///
+/// Each virtual register is a list of values. Also maintain a map from values to their unique
+/// virtual register, if any.
+pub struct VirtRegs {
+    /// Memory pool for the value lists.
+    pool: ListPool<Value>,
+
+    /// The primary table of virtual registers.
+    vregs: PrimaryMap<VirtReg, ValueList>,
+
+    /// Allocated virtual register numbers that are no longer in use.
+    unused_vregs: Vec<VirtReg>,
+
+    /// Each value belongs to at most one virtual register.
+    value_vregs: EntityMap<Value, PackedOption<VirtReg>>,
+
+    /// Table used during the union-find phase while `vregs` is empty.
+    union_find: EntityMap<Value, i32>,
+
+    /// Values that have been activated in the `union_find` table, but not yet added to any virtual
+    /// registers by the `finish_union_find()` function.
+    pending_values: Vec<Value>,
+}
+
+impl VirtRegs {
+    /// Create a new virtual register collection.
+    pub fn new() -> Self {
+        Self {
+            pool: ListPool::new(),
+            vregs: PrimaryMap::new(),
+            unused_vregs: Vec::new(),
+            value_vregs: EntityMap::new(),
+            union_find: EntityMap::new(),
+            pending_values: Vec::new(),
+        }
+    }
+
+    /// Clear all virtual registers.
+    pub fn clear(&mut self) {
+        self.vregs.clear();
+        self.unused_vregs.clear();
+        self.value_vregs.clear();
+        self.pool.clear();
+        self.union_find.clear();
+        self.pending_values.clear();
+    }
+
+    /// Get the virtual register containing `value`, if any.
+    pub fn get(&self, value: Value) -> Option<VirtReg> {
+        self.value_vregs[value].into()
+    }
+
+    /// Get the list of values in `vreg`.
+    pub fn values(&self, vreg: VirtReg) -> &[Value] {
+        self.vregs[vreg].as_slice(&self.pool)
+    }
+
+    /// Get an iterator over all virtual registers.
+    pub fn all_virtregs(&self) -> Keys<VirtReg> {
+        self.vregs.keys()
+    }
+
+    /// Get the congruence class of `value`.
+    ///
+    /// If `value` belongs to a virtual register, the congruence class is the values of the virtual
+    /// register. Otherwise it is just the value itself.
+    pub fn congruence_class<'a, 'b>(&'a self, value: &'b Value) -> &'b [Value]
+    where
+        'a: 'b,
+    {
+        self.get(*value).map(|vr| self.values(vr)).unwrap_or_else(
+            || {
+                ref_slice(value)
+            },
+        )
+    }
+
+    /// Check if `a` and `b` belong to the same congruence class.
+    pub fn same_class(&self, a: Value, b: Value) -> bool {
+        match (self.get(a), self.get(b)) {
+            (Some(va), Some(vb)) => va == vb,
+            _ => a == b,
+        }
+    }
+
+    /// Sort the values in `vreg` according to the dominator tree pre-order.
+    ///
+    /// Returns the slice of sorted values which `values(vreg)` will also return from now on.
+    pub fn sort_values(
+        &mut self,
+        vreg: VirtReg,
+        func: &Function,
+        preorder: &DominatorTreePreorder,
+    ) -> &[Value] {
+        let s = self.vregs[vreg].as_mut_slice(&mut self.pool);
+        s.sort_unstable_by(|&a, &b| preorder.pre_cmp_def(a, b, func));
+        s
+    }
+
+    /// Insert a single value into a sorted virtual register.
+    ///
+    /// It is assumed that the virtual register containing `big` is already sorted by
+    /// `sort_values()`, and that `single` does not already belong to a virtual register.
+    ///
+    /// If `big` is not part of a virtual register, one will be created.
+    pub fn insert_single(
+        &mut self,
+        big: Value,
+        single: Value,
+        func: &Function,
+        preorder: &DominatorTreePreorder,
+    ) -> VirtReg {
+        debug_assert_eq!(self.get(single), None, "Expected singleton {}", single);
+
+        // Make sure `big` has a vreg.
+        let vreg = self.get(big).unwrap_or_else(|| {
+            let vr = self.alloc();
+            self.vregs[vr].push(big, &mut self.pool);
+            self.value_vregs[big] = vr.into();
+            vr
+        });
+
+        // Determine the insertion position for `single`.
+        let index = match self.values(vreg).binary_search_by(
+            |&v| preorder.pre_cmp_def(v, single, func),
+        ) {
+            Ok(_) => panic!("{} already in {}", single, vreg),
+            Err(i) => i,
+        };
+        self.vregs[vreg].insert(index, single, &mut self.pool);
+        self.value_vregs[single] = vreg.into();
+        vreg
+    }
+
+    /// Remove a virtual register.
+    ///
+    /// The values in `vreg` become singletons, and the virtual register number may be reused in
+    /// the future.
+    pub fn remove(&mut self, vreg: VirtReg) {
+        // Start by reassigning all the values.
+        for &v in self.vregs[vreg].as_slice(&self.pool) {
+            let old = self.value_vregs[v].take();
+            debug_assert_eq!(old, Some(vreg));
+        }
+
+        self.vregs[vreg].clear(&mut self.pool);
+        self.unused_vregs.push(vreg);
+    }
+
+    /// Allocate a new empty virtual register.
+    fn alloc(&mut self) -> VirtReg {
+        self.unused_vregs.pop().unwrap_or_else(|| {
+            self.vregs.push(Default::default())
+        })
+    }
+
+    /// Unify `values` into a single virtual register.
+    ///
+    /// The values in the slice can be singletons or they can belong to a virtual register already.
+    /// If a value belongs to a virtual register, all of the values in that register must be
+    /// present.
+    ///
+    /// The values are assumed to already be in topological order.
+    pub fn unify(&mut self, values: &[Value]) -> VirtReg {
+        // Start by clearing all virtual registers involved.
+        let mut singletons = 0;
+        let mut cleared = 0;
+        for &val in values {
+            match self.get(val) {
+                None => singletons += 1,
+                Some(vreg) => {
+                    if !self.vregs[vreg].is_empty() {
+                        cleared += self.vregs[vreg].len(&self.pool);
+                        self.vregs[vreg].clear(&mut self.pool);
+                        self.unused_vregs.push(vreg);
+                    }
+                }
+            }
+        }
+
+        debug_assert_eq!(
+            values.len(),
+            singletons + cleared,
+            "Can't unify partial virtual registers"
+        );
+
+        let vreg = self.alloc();
+        self.vregs[vreg].extend(values.iter().cloned(), &mut self.pool);
+        for &v in values {
+            self.value_vregs[v] = vreg.into();
+        }
+
+        vreg
+    }
+}
+
+impl fmt::Display for VirtRegs {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        for vreg in self.all_virtregs() {
+            write!(f, "\n{} = {}", vreg, DisplayList(self.values(vreg)))?;
+        }
+        Ok(())
+    }
+}
+
+/// Expanded version of a union-find table entry.
+enum UFEntry {
+    /// This value is a a set leader. The embedded number is the set's rank.
+    Rank(u32),
+
+    /// This value belongs to the same set as the linked value.
+    Link(Value),
+}
+
+/// The `union_find` table contains `i32` entries that are interpreted as follows:
+///
+/// x = 0: The value belongs to its own singleton set.
+/// x > 0: The value is the leader of a set with rank x.
+/// x < 0: The value belongs to the same set as the value numbered !x.
+///
+/// The rank of a set is an upper bound on the number of links that must be followed from a member
+/// of the set to the set leader.
+///
+/// A singleton set is the same as a set with rank 0. It contains only the leader value.
+impl UFEntry {
+    /// Decode a table entry.
+    fn decode(x: i32) -> UFEntry {
+        if x < 0 {
+            UFEntry::Link(Value::new((!x) as usize))
+        } else {
+            UFEntry::Rank(x as u32)
+        }
+    }
+
+    /// Encode a link entry.
+    fn encode_link(v: Value) -> i32 {
+        !(v.index() as i32)
+    }
+}
+
+/// Union-find algorithm for building virtual registers.
+///
+/// Before values are added to virtual registers, it is possible to use a union-find algorithm to
+/// construct virtual registers efficiently. This support implemented here is used as follows:
+///
+/// 1. Repeatedly call the `union(a, b)` method to request that `a` and `b` are placed in the same
+///    virtual register.
+/// 2. When done, call `finish_union_find()` to construct the virtual register sets based on the
+///    `union()` calls.
+///
+/// The values that were passed to `union(a, b)` mist not belong to any existing virtual registers
+/// by the time `finish_union_find()` is called.
+///
+/// For more information on the algorithm implemented here, see Chapter 21 "Data Structures for
+/// Disjoint Sets" of Cormen, Leiserson, Rivest, Stein, "Introduction to algorithms", 3rd Ed.
+///
+/// The [Wikipedia entry on disjoint-set data
+/// structures](https://en.wikipedia.org/wiki/Disjoint-set_data_structure) is also good.
+impl VirtRegs {
+    /// Find the leader value and rank of the set containing `v`.
+    /// Compress the path if needed.
+    fn find(&mut self, val: Value) -> (Value, u32) {
+        match UFEntry::decode(self.union_find[val]) {
+            UFEntry::Rank(rank) => (val, rank),
+            UFEntry::Link(parent) => {
+                // TODO: This recursion would be more efficient as an iteration that pushes
+                // elements onto a SmallVector.
+                let found = self.find(parent);
+                // Compress the path if needed.
+                if found.0 != parent {
+                    self.union_find[val] = UFEntry::encode_link(found.0);
+                }
+                found
+            }
+        }
+    }
+
+    /// Union the two sets containing `a` and `b`.
+    ///
+    /// This ensures that `a` and `b` will belong to the same virtual register after calling
+    /// `finish_union_find()`.
+    pub fn union(&mut self, a: Value, b: Value) {
+        let (leader_a, rank_a) = self.find(a);
+        let (leader_b, rank_b) = self.find(b);
+
+        if leader_a == leader_b {
+            return;
+        }
+
+        // The first time we see a value, its rank will be 0. Add it to the list of pending values.
+        if rank_a == 0 {
+            debug_assert_eq!(a, leader_a);
+            self.pending_values.push(a);
+        }
+        if rank_b == 0 {
+            debug_assert_eq!(b, leader_b);
+            self.pending_values.push(b);
+        }
+
+        // Merge into the set with the greater rank. This preserves the invariant that the rank is
+        // an upper bound on the number of links to the leader.
+        match rank_a.cmp(&rank_b) {
+            Ordering::Less => {
+                self.union_find[leader_a] = UFEntry::encode_link(leader_b);
+            }
+            Ordering::Greater => {
+                self.union_find[leader_b] = UFEntry::encode_link(leader_a);
+            }
+            Ordering::Equal => {
+                // When the two sets have the same rank, we arbitrarily pick the a-set to preserve.
+                // We need to increase the rank by one since the elements in the b-set are now one
+                // link further away from the leader.
+                self.union_find[leader_a] += 1;
+                self.union_find[leader_b] = UFEntry::encode_link(leader_a);
+            }
+        }
+    }
+
+    /// Compute virtual registers based on previous calls to `union(a, b)`.
+    ///
+    /// This terminates the union-find algorithm, so the next time `union()` is called, it is for a
+    /// new independent batch of values.
+    ///
+    /// The values in each virtual register will be ordered according to when they were first
+    /// passed to `union()`, but backwards. It is expected that `sort_values()` will be used to
+    /// create a more sensible value order.
+    ///
+    /// The new virtual registers will be appended to `new_vregs`, if present.
+    pub fn finish_union_find(&mut self, mut new_vregs: Option<&mut Vec<VirtReg>>) {
+        debug_assert_eq!(
+            self.pending_values.iter().find(|&&v| self.get(v).is_some()),
+            None,
+            "Values participating in union-find must not belong to existing virtual registers"
+        );
+
+        while let Some(val) = self.pending_values.pop() {
+            let (leader, _) = self.find(val);
+
+            // Get the vreg for `leader`, or create it.
+            let vreg = self.get(leader).unwrap_or_else(|| {
+                // Allocate a vreg for `leader`, but leave it empty.
+                let vr = self.alloc();
+                if let Some(ref mut vec) = new_vregs {
+                    vec.push(vr);
+                }
+                self.value_vregs[leader] = vr.into();
+                vr
+            });
+
+            // Push values in `pending_values` order, including when `v == leader`.
+            self.vregs[vreg].push(val, &mut self.pool);
+            self.value_vregs[val] = vreg.into();
+
+            // Clear the entry in the union-find table. The `find(val)` call may still look at this
+            // entry in a future iteration, but that it ok. It will return a rank 0 leader that has
+            // already been assigned to the correct virtual register.
+            self.union_find[val] = 0;
+        }
+
+        // We do *not* call `union_find.clear()` table here because re-initializing the table for
+        // sparse use takes time linear in the number of values in the function. Instead we reset
+        // the entries that are known to be non-zero in the loop above.
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use entity::EntityRef;
+    use ir::Value;
+
+    #[test]
+    fn empty_union_find() {
+        let mut vregs = VirtRegs::new();
+        vregs.finish_union_find(None);
+        assert_eq!(vregs.all_virtregs().count(), 0);
+    }
+
+    #[test]
+    fn union_self() {
+        let mut vregs = VirtRegs::new();
+        let v1 = Value::new(1);
+        vregs.union(v1, v1);
+        vregs.finish_union_find(None);
+        assert_eq!(vregs.get(v1), None);
+        assert_eq!(vregs.all_virtregs().count(), 0);
+    }
+
+    #[test]
+    fn union_pair() {
+        let mut vregs = VirtRegs::new();
+        let v1 = Value::new(1);
+        let v2 = Value::new(2);
+        vregs.union(v1, v2);
+        vregs.finish_union_find(None);
+        assert_eq!(vregs.congruence_class(&v1), &[v2, v1]);
+        assert_eq!(vregs.congruence_class(&v2), &[v2, v1]);
+        assert_eq!(vregs.all_virtregs().count(), 1);
+    }
+
+    #[test]
+    fn union_pair_backwards() {
+        let mut vregs = VirtRegs::new();
+        let v1 = Value::new(1);
+        let v2 = Value::new(2);
+        vregs.union(v2, v1);
+        vregs.finish_union_find(None);
+        assert_eq!(vregs.congruence_class(&v1), &[v1, v2]);
+        assert_eq!(vregs.congruence_class(&v2), &[v1, v2]);
+        assert_eq!(vregs.all_virtregs().count(), 1);
+    }
+
+    #[test]
+    fn union_tree() {
+        let mut vregs = VirtRegs::new();
+        let v1 = Value::new(1);
+        let v2 = Value::new(2);
+        let v3 = Value::new(3);
+        let v4 = Value::new(4);
+
+        vregs.union(v2, v4);
+        vregs.union(v3, v1);
+        // Leaders: v2, v3
+        vregs.union(v4, v1);
+        vregs.finish_union_find(None);
+        assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]);
+        assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]);
+        assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]);
+        assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]);
+        assert_eq!(vregs.all_virtregs().count(), 1);
+    }
+
+    #[test]
+    fn union_two() {
+        let mut vregs = VirtRegs::new();
+        let v1 = Value::new(1);
+        let v2 = Value::new(2);
+        let v3 = Value::new(3);
+        let v4 = Value::new(4);
+
+        vregs.union(v2, v4);
+        vregs.union(v3, v1);
+        // Leaders: v2, v3
+        vregs.finish_union_find(None);
+        assert_eq!(vregs.congruence_class(&v1), &[v1, v3]);
+        assert_eq!(vregs.congruence_class(&v2), &[v4, v2]);
+        assert_eq!(vregs.congruence_class(&v3), &[v1, v3]);
+        assert_eq!(vregs.congruence_class(&v4), &[v4, v2]);
+        assert_eq!(vregs.all_virtregs().count(), 2);
+    }
+
+    #[test]
+    fn union_uneven() {
+        let mut vregs = VirtRegs::new();
+        let v1 = Value::new(1);
+        let v2 = Value::new(2);
+        let v3 = Value::new(3);
+        let v4 = Value::new(4);
+
+        vregs.union(v2, v4); // Rank 0-0
+        vregs.union(v3, v2); // Rank 0-1
+        vregs.union(v2, v1); // Rank 1-0
+        vregs.finish_union_find(None);
+        assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]);
+        assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]);
+        assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]);
+        assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]);
+        assert_eq!(vregs.all_virtregs().count(), 1);
+    }
+}