Merge branch 'master' into no_std

2018-04-17 16:41:27 -04:00
parent 07693048f0 58380f38e8
commit a10a6a0df0
364 changed files with 3412 additions and 1024 deletions
--- a/lib/codegen/src/abi.rs
+++ b/lib/codegen/src/abi.rs
@@ -0,0 +1,226 @@
+//! Common helper code for ABI lowering.
+//!
+//! This module provides functions and data structures that are useful for implementing the
+//! `TargetIsa::legalize_signature()` method.
+
+use ir::{AbiParam, ArgumentExtension, ArgumentLoc, Type};
+use std::cmp::Ordering;
+use std::vec::Vec;
+
+/// Legalization action to perform on a single argument or return value when converting a
+/// signature.
+///
+/// An argument may go through a sequence of legalization steps before it reaches the final
+/// `Assign` action.
+#[derive(Clone, Copy, Debug)]
+pub enum ArgAction {
+    /// Assign the argument to the given location.
+    Assign(ArgumentLoc),
+
+    /// Convert the argument, then call again.
+    ///
+    /// This action can split an integer type into two smaller integer arguments, or it can split a
+    /// SIMD vector into halves.
+    Convert(ValueConversion),
+}
+
+impl From<ArgumentLoc> for ArgAction {
+    fn from(x: ArgumentLoc) -> ArgAction {
+        ArgAction::Assign(x)
+    }
+}
+
+impl From<ValueConversion> for ArgAction {
+    fn from(x: ValueConversion) -> ArgAction {
+        ArgAction::Convert(x)
+    }
+}
+
+/// Legalization action to be applied to a value that is being passed to or from a legalized ABI.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ValueConversion {
+    /// Split an integer types into low and high parts, using `isplit`.
+    IntSplit,
+
+    /// Split a vector type into halves with identical lane types, using `vsplit`.
+    VectorSplit,
+
+    /// Bit-cast to an integer type of the same size.
+    IntBits,
+
+    /// Sign-extend integer value to the required type.
+    Sext(Type),
+
+    /// Unsigned zero-extend value to the required type.
+    Uext(Type),
+}
+
+impl ValueConversion {
+    /// Apply this conversion to a type, return the converted type.
+    pub fn apply(self, ty: Type) -> Type {
+        match self {
+            ValueConversion::IntSplit => ty.half_width().expect("Integer type too small to split"),
+            ValueConversion::VectorSplit => ty.half_vector().expect("Not a vector"),
+            ValueConversion::IntBits => Type::int(ty.bits()).expect("Bad integer size"),
+            ValueConversion::Sext(nty) |
+            ValueConversion::Uext(nty) => nty,
+        }
+    }
+
+    /// Is this a split conversion that results in two arguments?
+    pub fn is_split(self) -> bool {
+        match self {
+            ValueConversion::IntSplit |
+            ValueConversion::VectorSplit => true,
+            _ => false,
+        }
+    }
+}
+
+/// Common trait for assigning arguments to registers or stack locations.
+///
+/// This will be implemented by individual ISAs.
+pub trait ArgAssigner {
+    /// Pick an assignment action for function argument (or return value) `arg`.
+    fn assign(&mut self, arg: &AbiParam) -> ArgAction;
+}
+
+/// Legalize the arguments in `args` using the given argument assigner.
+///
+/// This function can be used for both arguments and return values.
+pub fn legalize_args<AA: ArgAssigner>(args: &mut Vec<AbiParam>, aa: &mut AA) {
+    // Iterate over the arguments.
+    // We may need to mutate the vector in place, so don't use a normal iterator, and clone the
+    // argument to avoid holding a reference.
+    let mut argno = 0;
+    while let Some(arg) = args.get(argno).cloned() {
+        // Leave the pre-assigned arguments alone.
+        // We'll assume that they don't interfere with our assignments.
+        if arg.location.is_assigned() {
+            argno += 1;
+            continue;
+        }
+
+        match aa.assign(&arg) {
+            // Assign argument to a location and move on to the next one.
+            ArgAction::Assign(loc) => {
+                args[argno].location = loc;
+                argno += 1;
+            }
+            // Split this argument into two smaller ones. Then revisit both.
+            ArgAction::Convert(conv) => {
+                let new_arg = AbiParam {
+                    value_type: conv.apply(arg.value_type),
+                    ..arg
+                };
+                args[argno].value_type = new_arg.value_type;
+                if conv.is_split() {
+                    args.insert(argno + 1, new_arg);
+                }
+            }
+        }
+    }
+}
+
+/// Determine the right action to take when passing a `have` value type to a call signature where
+/// the next argument is `arg` which has a different value type.
+///
+/// The signature legalization process in `legalize_args` above can replace a single argument value
+/// with multiple arguments of smaller types. It can also change the type of an integer argument to
+/// a larger integer type, requiring the smaller value to be sign- or zero-extended.
+///
+/// The legalizer needs to repair the values at all ABI boundaries:
+///
+/// - Incoming function arguments to the entry EBB.
+/// - Function arguments passed to a call.
+/// - Return values from a call.
+/// - Return values passed to a return instruction.
+///
+/// The `legalize_abi_value` function helps the legalizer with the process. When the legalizer
+/// needs to pass a pre-legalized `have` argument, but the ABI argument `arg` has a different value
+/// type, `legalize_abi_value(have, arg)` tells the legalizer how to create the needed value type
+/// for the argument.
+///
+/// It may be necessary to call `legalize_abi_value` more than once for a given argument before the
+/// desired argument type appears. This will happen when a vector or integer type needs to be split
+/// more than once, for example.
+pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
+    let have_bits = have.bits();
+    let arg_bits = arg.value_type.bits();
+
+    match have_bits.cmp(&arg_bits) {
+        // We have fewer bits than the ABI argument.
+        Ordering::Less => {
+            debug_assert!(
+                have.is_int() && arg.value_type.is_int(),
+                "Can only extend integer values"
+            );
+            match arg.extension {
+                ArgumentExtension::Uext => ValueConversion::Uext(arg.value_type),
+                ArgumentExtension::Sext => ValueConversion::Sext(arg.value_type),
+                _ => panic!("No argument extension specified"),
+            }
+        }
+        // We have the same number of bits as the argument.
+        Ordering::Equal => {
+            // This must be an integer vector that is split and then extended.
+            debug_assert!(arg.value_type.is_int());
+            debug_assert!(have.is_vector());
+            ValueConversion::VectorSplit
+        }
+        // We have more bits than the argument.
+        Ordering::Greater => {
+            if have.is_vector() {
+                ValueConversion::VectorSplit
+            } else if have.is_float() {
+                // Convert a float to int so it can be split the next time.
+                // ARM would do this to pass an `f64` in two registers.
+                ValueConversion::IntBits
+            } else {
+                ValueConversion::IntSplit
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use ir::AbiParam;
+    use ir::types;
+
+    #[test]
+    fn legalize() {
+        let mut arg = AbiParam::new(types::I32);
+
+        assert_eq!(
+            legalize_abi_value(types::I64X2, &arg),
+            ValueConversion::VectorSplit
+        );
+        assert_eq!(
+            legalize_abi_value(types::I64, &arg),
+            ValueConversion::IntSplit
+        );
+
+        // Vector of integers is broken down, then sign-extended.
+        arg.extension = ArgumentExtension::Sext;
+        assert_eq!(
+            legalize_abi_value(types::I16X4, &arg),
+            ValueConversion::VectorSplit
+        );
+        assert_eq!(
+            legalize_abi_value(types::I16.by(2).unwrap(), &arg),
+            ValueConversion::VectorSplit
+        );
+        assert_eq!(
+            legalize_abi_value(types::I16, &arg),
+            ValueConversion::Sext(types::I32)
+        );
+
+        // 64-bit float is split as an integer.
+        assert_eq!(
+            legalize_abi_value(types::F64, &arg),
+            ValueConversion::IntBits
+        );
+    }
+}
--- a/lib/codegen/src/bforest/map.rs
+++ b/lib/codegen/src/bforest/map.rs
@@ -0,0 +1,920 @@
+//! Forest of maps.
+
+use super::{Comparator, Forest, Node, NodeData, NodePool, Path, INNER_SIZE};
+use packed_option::PackedOption;
+use std::marker::PhantomData;
+
+/// Tag type defining forest types for a map.
+struct MapTypes<K, V, C>(PhantomData<(K, V, C)>);
+
+impl<K, V, C> Forest for MapTypes<K, V, C>
+where
+    K: Copy,
+    V: Copy,
+    C: Comparator<K>,
+{
+    type Key = K;
+    type Value = V;
+    type LeafKeys = [K; INNER_SIZE - 1];
+    type LeafValues = [V; INNER_SIZE - 1];
+    type Comparator = C;
+
+    fn splat_key(key: Self::Key) -> Self::LeafKeys {
+        [key; INNER_SIZE - 1]
+    }
+
+    fn splat_value(value: Self::Value) -> Self::LeafValues {
+        [value; INNER_SIZE - 1]
+    }
+}
+
+/// Memory pool for a forest of `Map` instances.
+pub struct MapForest<K, V, C>
+where
+    K: Copy,
+    V: Copy,
+    C: Comparator<K>,
+{
+    nodes: NodePool<MapTypes<K, V, C>>,
+}
+
+impl<K, V, C> MapForest<K, V, C>
+where
+    K: Copy,
+    V: Copy,
+    C: Comparator<K>,
+{
+    /// Create a new empty forest.
+    pub fn new() -> MapForest<K, V, C> {
+        MapForest { nodes: NodePool::new() }
+    }
+
+    /// Clear all maps in the forest.
+    ///
+    /// All `Map` instances belong to this forest are invalidated and should no longer be used.
+    pub fn clear(&mut self) {
+        self.nodes.clear();
+    }
+}
+
+/// B-tree mapping from `K` to `V` using `C` for comparing keys.
+///
+/// This is not a general-purpose replacement for `BTreeMap`. See the [module
+/// documentation](index.html) for more information about design tradeoffs.
+///
+/// Maps can be cloned, but that operation should only be used as part of cloning the whole forest
+/// they belong to. *Cloning a map does not allocate new memory for the clone*. It creates an alias
+/// of the same memory.
+#[derive(Clone)]
+pub struct Map<K, V, C>
+where
+    K: Copy,
+    V: Copy,
+    C: Comparator<K>,
+{
+    root: PackedOption<Node>,
+    unused: PhantomData<(K, V, C)>,
+}
+
+impl<K, V, C> Map<K, V, C>
+where
+    K: Copy,
+    V: Copy,
+    C: Comparator<K>,
+{
+    /// Make an empty map.
+    pub fn new() -> Map<K, V, C> {
+        Map {
+            root: None.into(),
+            unused: PhantomData,
+        }
+    }
+
+    /// Is this an empty map?
+    pub fn is_empty(&self) -> bool {
+        self.root.is_none()
+    }
+
+    /// Get the value stored for `key`.
+    pub fn get(&self, key: K, forest: &MapForest<K, V, C>, comp: &C) -> Option<V> {
+        self.root.expand().and_then(|root| {
+            Path::default().find(key, root, &forest.nodes, comp)
+        })
+    }
+
+    /// Look up the value stored for `key`.
+    ///
+    /// If it exists, return the stored key-value pair.
+    ///
+    /// Otherwise, return the last key-value pair with a key that is less than or equal to `key`.
+    ///
+    /// If no stored keys are less than or equal to `key`, return `None`.
+    pub fn get_or_less(&self, key: K, forest: &MapForest<K, V, C>, comp: &C) -> Option<(K, V)> {
+        self.root.expand().and_then(|root| {
+            let mut path = Path::default();
+            match path.find(key, root, &forest.nodes, comp) {
+                Some(v) => Some((key, v)),
+                None => path.prev(root, &forest.nodes),
+            }
+        })
+    }
+
+    /// Insert `key, value` into the map and return the old value stored for `key`, if any.
+    pub fn insert(
+        &mut self,
+        key: K,
+        value: V,
+        forest: &mut MapForest<K, V, C>,
+        comp: &C,
+    ) -> Option<V> {
+        self.cursor(forest, comp).insert(key, value)
+    }
+
+    /// Remove `key` from the map and return the removed value for `key`, if any.
+    pub fn remove(&mut self, key: K, forest: &mut MapForest<K, V, C>, comp: &C) -> Option<V> {
+        let mut c = self.cursor(forest, comp);
+        if c.goto(key).is_some() {
+            c.remove()
+        } else {
+            None
+        }
+    }
+
+    /// Remove all entries.
+    pub fn clear(&mut self, forest: &mut MapForest<K, V, C>) {
+        if let Some(root) = self.root.take() {
+            forest.nodes.free_tree(root);
+        }
+    }
+
+    /// Retains only the elements specified by the predicate.
+    ///
+    /// Remove all key-value pairs where the predicate returns false.
+    ///
+    /// The predicate is allowed to update the values stored in the map.
+    pub fn retain<F>(&mut self, forest: &mut MapForest<K, V, C>, mut predicate: F)
+    where
+        F: FnMut(K, &mut V) -> bool,
+    {
+        let mut path = Path::default();
+        if let Some(root) = self.root.expand() {
+            path.first(root, &forest.nodes);
+        }
+        while let Some((node, entry)) = path.leaf_pos() {
+            let keep = {
+                let (ks, vs) = forest.nodes[node].unwrap_leaf_mut();
+                predicate(ks[entry], &mut vs[entry])
+            };
+            if keep {
+                path.next(&forest.nodes);
+            } else {
+                self.root = path.remove(&mut forest.nodes).into();
+            }
+        }
+    }
+
+    /// Create a cursor for navigating this map. The cursor is initially positioned off the end of
+    /// the map.
+    pub fn cursor<'a>(
+        &'a mut self,
+        forest: &'a mut MapForest<K, V, C>,
+        comp: &'a C,
+    ) -> MapCursor<'a, K, V, C> {
+        MapCursor::new(self, forest, comp)
+    }
+
+    /// Create an iterator traversing this map. The iterator type is `(K, V)`.
+    pub fn iter<'a>(&'a self, forest: &'a MapForest<K, V, C>) -> MapIter<'a, K, V, C> {
+        MapIter {
+            root: self.root,
+            pool: &forest.nodes,
+            path: Path::default(),
+        }
+    }
+}
+
+impl<K, V, C> Default for Map<K, V, C>
+where
+    K: Copy,
+    V: Copy,
+    C: Comparator<K>,
+{
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+impl<K, V, C> Map<K, V, C>
+where
+    K: Copy + ::std::fmt::Display,
+    V: Copy,
+    C: Comparator<K>,
+{
+    /// Verify consistency.
+    fn verify(&self, forest: &MapForest<K, V, C>, comp: &C)
+    where
+        NodeData<MapTypes<K, V, C>>: ::std::fmt::Display,
+    {
+        if let Some(root) = self.root.expand() {
+            forest.nodes.verify_tree(root, comp);
+        }
+    }
+
+    /// Get a text version of the path to `key`.
+    fn tpath(&self, key: K, forest: &MapForest<K, V, C>, comp: &C) -> ::std::string::String {
+        use std::string::ToString;
+        match self.root.expand() {
+            None => "map(empty)".to_string(),
+            Some(root) => {
+                let mut path = Path::default();
+                path.find(key, root, &forest.nodes, comp);
+                path.to_string()
+            }
+        }
+    }
+}
+
+/// A position in a `Map` used to navigate and modify the ordered map.
+///
+/// A cursor always points at a key-value pair in the map, or "off the end" which is a position
+/// after the last entry in the map.
+pub struct MapCursor<'a, K, V, C>
+where
+    K: 'a + Copy,
+    V: 'a + Copy,
+    C: 'a + Comparator<K>,
+{
+    root: &'a mut PackedOption<Node>,
+    pool: &'a mut NodePool<MapTypes<K, V, C>>,
+    comp: &'a C,
+    path: Path<MapTypes<K, V, C>>,
+}
+
+impl<'a, K, V, C> MapCursor<'a, K, V, C>
+where
+    K: Copy,
+    V: Copy,
+    C: Comparator<K>,
+{
+    /// Create a cursor with a default (off-the-end) location.
+    fn new(
+        container: &'a mut Map<K, V, C>,
+        forest: &'a mut MapForest<K, V, C>,
+        comp: &'a C,
+    ) -> MapCursor<'a, K, V, C> {
+        MapCursor {
+            root: &mut container.root,
+            pool: &mut forest.nodes,
+            comp,
+            path: Path::default(),
+        }
+    }
+
+    /// Is this cursor pointing to an empty map?
+    pub fn is_empty(&self) -> bool {
+        self.root.is_none()
+    }
+
+    /// Move cursor to the next key-value pair and return it.
+    ///
+    /// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end
+    /// position.
+    pub fn next(&mut self) -> Option<(K, V)> {
+        self.path.next(self.pool)
+    }
+
+    /// Move cursor to the previous key-value pair and return it.
+    ///
+    /// If the cursor is already pointing at the first entry, leave it there and return `None`.
+    pub fn prev(&mut self) -> Option<(K, V)> {
+        self.root.expand().and_then(
+            |root| self.path.prev(root, self.pool),
+        )
+    }
+
+    /// Get the current key, or `None` if the cursor is at the end.
+    pub fn key(&self) -> Option<K> {
+        self.path.leaf_pos().and_then(|(node, entry)| {
+            self.pool[node].unwrap_leaf().0.get(entry).cloned()
+        })
+    }
+
+    /// Get the current value, or `None` if the cursor is at the end.
+    pub fn value(&self) -> Option<V> {
+        self.path.leaf_pos().and_then(|(node, entry)| {
+            self.pool[node].unwrap_leaf().1.get(entry).cloned()
+        })
+    }
+
+    /// Get a mutable reference to the current value, or `None` if the cursor is at the end.
+    pub fn value_mut(&mut self) -> Option<&mut V> {
+        self.path.leaf_pos().and_then(move |(node, entry)| {
+            self.pool[node].unwrap_leaf_mut().1.get_mut(entry)
+        })
+    }
+
+    /// Move this cursor to `key`.
+    ///
+    /// If `key` is in the map, place the cursor at `key` and return the corresponding value.
+    ///
+    /// If `key` is not in the set, place the cursor at the next larger element (or the end) and
+    /// return `None`.
+    pub fn goto(&mut self, elem: K) -> Option<V> {
+        self.root.expand().and_then(|root| {
+            let v = self.path.find(elem, root, self.pool, self.comp);
+            if v.is_none() {
+                self.path.normalize(self.pool);
+            }
+            v
+        })
+    }
+
+    /// Move this cursor to the first element.
+    pub fn goto_first(&mut self) -> Option<V> {
+        self.root.map(|root| self.path.first(root, self.pool).1)
+    }
+
+    /// Insert `(key, value))` into the map and leave the cursor at the inserted pair.
+    ///
+    /// If the map did not contain `key`, return `None`.
+    ///
+    /// If `key` is already present, replace the existing with `value` and return the old value.
+    pub fn insert(&mut self, key: K, value: V) -> Option<V> {
+        match self.root.expand() {
+            None => {
+                let root = self.pool.alloc_node(NodeData::leaf(key, value));
+                *self.root = root.into();
+                self.path.set_root_node(root);
+                None
+            }
+            Some(root) => {
+                // TODO: Optimize the case where `self.path` is already at the correct insert pos.
+                let old = self.path.find(key, root, self.pool, self.comp);
+                if old.is_some() {
+                    *self.path.value_mut(self.pool) = value;
+                } else {
+                    *self.root = self.path.insert(key, value, self.pool).into();
+                }
+                old
+            }
+        }
+    }
+
+    /// Remove the current entry (if any) and return the mapped value.
+    /// This advances the cursor to the next entry after the removed one.
+    pub fn remove(&mut self) -> Option<V> {
+        let value = self.value();
+        if value.is_some() {
+            *self.root = self.path.remove(self.pool).into();
+        }
+        value
+    }
+}
+
+/// An iterator visiting the key-value pairs of a `Map`.
+pub struct MapIter<'a, K, V, C>
+where
+    K: 'a + Copy,
+    V: 'a + Copy,
+    C: 'a + Comparator<K>,
+{
+    root: PackedOption<Node>,
+    pool: &'a NodePool<MapTypes<K, V, C>>,
+    path: Path<MapTypes<K, V, C>>,
+}
+
+impl<'a, K, V, C> Iterator for MapIter<'a, K, V, C>
+where
+    K: 'a + Copy,
+    V: 'a + Copy,
+    C: 'a + Comparator<K>,
+{
+    type Item = (K, V);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // We use `self.root` to indicate if we need to go to the first element. Reset to `None`
+        // once we've returned the first element. This also works for an empty tree since the
+        // `path.next()` call returns `None` when the path is empty. This also fuses the iterator.
+        match self.root.take() {
+            Some(root) => Some(self.path.first(root, self.pool)),
+            None => self.path.next(self.pool),
+        }
+    }
+}
+
+#[cfg(test)]
+impl<'a, K, V, C> MapCursor<'a, K, V, C>
+where
+    K: Copy + ::std::fmt::Display,
+    V: Copy + ::std::fmt::Display,
+    C: Comparator<K>,
+{
+    fn verify(&self) {
+        self.path.verify(self.pool);
+        self.root.map(|root| self.pool.verify_tree(root, self.comp));
+    }
+
+    /// Get a text version of the path to the current position.
+    fn tpath(&self) -> ::std::string::String {
+        use std::string::ToString;
+        self.path.to_string()
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::super::NodeData;
+    use super::*;
+    use std::mem;
+    use std::vec::Vec;
+
+    #[test]
+    fn node_size() {
+        // check that nodes are cache line sized when keys and values are 32 bits.
+        type F = MapTypes<u32, u32, ()>;
+        assert_eq!(mem::size_of::<NodeData<F>>(), 64);
+    }
+
+    #[test]
+    fn empty() {
+        let mut f = MapForest::<u32, f32, ()>::new();
+        f.clear();
+
+        let mut m = Map::<u32, f32, ()>::new();
+        assert!(m.is_empty());
+        m.clear(&mut f);
+
+        assert_eq!(m.get(7, &f, &()), None);
+        assert_eq!(m.iter(&f).next(), None);
+        assert_eq!(m.get_or_less(7, &f, &()), None);
+        m.retain(&mut f, |_, _| unreachable!());
+
+        let mut c = m.cursor(&mut f, &());
+        assert!(c.is_empty());
+        assert_eq!(c.key(), None);
+        assert_eq!(c.value(), None);
+        assert_eq!(c.next(), None);
+        assert_eq!(c.prev(), None);
+        c.verify();
+        assert_eq!(c.tpath(), "<empty path>");
+        assert_eq!(c.goto_first(), None);
+        assert_eq!(c.tpath(), "<empty path>");
+    }
+
+    #[test]
+    fn inserting() {
+        let f = &mut MapForest::<u32, f32, ()>::new();
+        let mut m = Map::<u32, f32, ()>::new();
+
+        // The first seven values stay in a single leaf node.
+        assert_eq!(m.insert(50, 5.0, f, &()), None);
+        assert_eq!(m.insert(50, 5.5, f, &()), Some(5.0));
+        assert_eq!(m.insert(20, 2.0, f, &()), None);
+        assert_eq!(m.insert(80, 8.0, f, &()), None);
+        assert_eq!(m.insert(40, 4.0, f, &()), None);
+        assert_eq!(m.insert(60, 6.0, f, &()), None);
+        assert_eq!(m.insert(90, 9.0, f, &()), None);
+        assert_eq!(m.insert(200, 20.0, f, &()), None);
+
+        m.verify(f, &());
+
+        assert_eq!(
+            m.iter(f).collect::<Vec<_>>(),
+            [
+                (20, 2.0),
+                (40, 4.0),
+                (50, 5.5),
+                (60, 6.0),
+                (80, 8.0),
+                (90, 9.0),
+                (200, 20.0),
+            ]
+        );
+
+        assert_eq!(m.get(0, f, &()), None);
+        assert_eq!(m.get(20, f, &()), Some(2.0));
+        assert_eq!(m.get(30, f, &()), None);
+        assert_eq!(m.get(40, f, &()), Some(4.0));
+        assert_eq!(m.get(50, f, &()), Some(5.5));
+        assert_eq!(m.get(60, f, &()), Some(6.0));
+        assert_eq!(m.get(70, f, &()), None);
+        assert_eq!(m.get(80, f, &()), Some(8.0));
+        assert_eq!(m.get(100, f, &()), None);
+
+        assert_eq!(m.get_or_less(0, f, &()), None);
+        assert_eq!(m.get_or_less(20, f, &()), Some((20, 2.0)));
+        assert_eq!(m.get_or_less(30, f, &()), Some((20, 2.0)));
+        assert_eq!(m.get_or_less(40, f, &()), Some((40, 4.0)));
+        assert_eq!(m.get_or_less(200, f, &()), Some((200, 20.0)));
+        assert_eq!(m.get_or_less(201, f, &()), Some((200, 20.0)));
+
+        {
+            let mut c = m.cursor(f, &());
+            assert_eq!(c.prev(), Some((200, 20.0)));
+            assert_eq!(c.prev(), Some((90, 9.0)));
+            assert_eq!(c.prev(), Some((80, 8.0)));
+            assert_eq!(c.prev(), Some((60, 6.0)));
+            assert_eq!(c.prev(), Some((50, 5.5)));
+            assert_eq!(c.prev(), Some((40, 4.0)));
+            assert_eq!(c.prev(), Some((20, 2.0)));
+            assert_eq!(c.prev(), None);
+        }
+
+        // Test some removals where the node stays healthy.
+        assert_eq!(m.tpath(50, f, &()), "node0[2]");
+        assert_eq!(m.tpath(80, f, &()), "node0[4]");
+        assert_eq!(m.tpath(200, f, &()), "node0[6]");
+
+        assert_eq!(m.remove(80, f, &()), Some(8.0));
+        assert_eq!(m.tpath(50, f, &()), "node0[2]");
+        assert_eq!(m.tpath(80, f, &()), "node0[4]");
+        assert_eq!(m.tpath(200, f, &()), "node0[5]");
+        assert_eq!(m.remove(80, f, &()), None);
+        m.verify(f, &());
+
+        assert_eq!(m.remove(20, f, &()), Some(2.0));
+        assert_eq!(m.tpath(50, f, &()), "node0[1]");
+        assert_eq!(m.tpath(80, f, &()), "node0[3]");
+        assert_eq!(m.tpath(200, f, &()), "node0[4]");
+        assert_eq!(m.remove(20, f, &()), None);
+        m.verify(f, &());
+
+        // [ 40 50 60 90 200 ]
+
+        {
+            let mut c = m.cursor(f, &());
+            assert_eq!(c.goto_first(), Some(4.0));
+            assert_eq!(c.key(), Some(40));
+            assert_eq!(c.value(), Some(4.0));
+            assert_eq!(c.next(), Some((50, 5.5)));
+            assert_eq!(c.next(), Some((60, 6.0)));
+            assert_eq!(c.next(), Some((90, 9.0)));
+            assert_eq!(c.next(), Some((200, 20.0)));
+            c.verify();
+            assert_eq!(c.next(), None);
+            c.verify();
+        }
+
+        // Removals from the root leaf node beyond underflow.
+        assert_eq!(m.remove(200, f, &()), Some(20.0));
+        assert_eq!(m.remove(40, f, &()), Some(4.0));
+        assert_eq!(m.remove(60, f, &()), Some(6.0));
+        m.verify(f, &());
+        assert_eq!(m.remove(50, f, &()), Some(5.5));
+        m.verify(f, &());
+        assert_eq!(m.remove(90, f, &()), Some(9.0));
+        m.verify(f, &());
+        assert!(m.is_empty());
+    }
+
+    #[test]
+    fn split_level0_leaf() {
+        // Various ways of splitting a full leaf node at level 0.
+        let f = &mut MapForest::<u32, f32, ()>::new();
+
+        fn full_leaf(f: &mut MapForest<u32, f32, ()>) -> Map<u32, f32, ()> {
+            let mut m = Map::new();
+            for n in 1..8 {
+                m.insert(n * 10, n as f32 * 1.1, f, &());
+            }
+            m
+        }
+
+        // Insert at front of leaf.
+        let mut m = full_leaf(f);
+        m.insert(5, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(5, f, &()), Some(4.2));
+
+        // Retain even entries, with altered values.
+        m.retain(f, |k, v| {
+            *v = (k / 10) as f32;
+            (k % 20) == 0
+        });
+        assert_eq!(
+            m.iter(f).collect::<Vec<_>>(),
+            [(20, 2.0), (40, 4.0), (60, 6.0)]
+        );
+
+        // Insert at back of leaf.
+        let mut m = full_leaf(f);
+        m.insert(80, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(80, f, &()), Some(4.2));
+
+        // Insert before middle (40).
+        let mut m = full_leaf(f);
+        m.insert(35, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(35, f, &()), Some(4.2));
+
+        // Insert after middle (40).
+        let mut m = full_leaf(f);
+        m.insert(45, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(45, f, &()), Some(4.2));
+
+        m.clear(f);
+        assert!(m.is_empty());
+    }
+
+    #[test]
+    fn split_level1_leaf() {
+        // Various ways of splitting a full leaf node at level 1.
+        let f = &mut MapForest::<u32, f32, ()>::new();
+
+        // Return a map whose root node is a full inner node, and the leaf nodes are all full
+        // containing:
+        //
+        // 110, 120, ..., 170
+        // 210, 220, ..., 270
+        // ...
+        // 810, 820, ..., 870
+        fn full(f: &mut MapForest<u32, f32, ()>) -> Map<u32, f32, ()> {
+            let mut m = Map::new();
+
+            // Start by inserting elements in order.
+            // This should leave 8 leaf nodes with 4 elements in each.
+            for row in 1..9 {
+                for col in 1..5 {
+                    m.insert(row * 100 + col * 10, row as f32 + col as f32 * 0.1, f, &());
+                }
+            }
+
+            // Then top up the leaf nodes without splitting them.
+            for row in 1..9 {
+                for col in 5..8 {
+                    m.insert(row * 100 + col * 10, row as f32 + col as f32 * 0.1, f, &());
+                }
+            }
+
+            m
+        }
+
+        let mut m = full(f);
+        // Verify geometry. Get get node2 as the root and leaves node0, 1, 3, ...
+        m.verify(f, &());
+        assert_eq!(m.tpath(110, f, &()), "node2[0]--node0[0]");
+        assert_eq!(m.tpath(140, f, &()), "node2[0]--node0[3]");
+        assert_eq!(m.tpath(210, f, &()), "node2[1]--node1[0]");
+        assert_eq!(m.tpath(270, f, &()), "node2[1]--node1[6]");
+        assert_eq!(m.tpath(310, f, &()), "node2[2]--node3[0]");
+        assert_eq!(m.tpath(810, f, &()), "node2[7]--node8[0]");
+        assert_eq!(m.tpath(870, f, &()), "node2[7]--node8[6]");
+
+        {
+            let mut c = m.cursor(f, &());
+            assert_eq!(c.goto_first(), Some(1.1));
+            assert_eq!(c.key(), Some(110));
+        }
+
+        // Front of first leaf.
+        m.insert(0, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(0, f, &()), Some(4.2));
+
+        // First leaf split 4-4 after appending to LHS.
+        f.clear();
+        m = full(f);
+        m.insert(135, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(135, f, &()), Some(4.2));
+
+        // First leaf split 4-4 after prepending to RHS.
+        f.clear();
+        m = full(f);
+        m.insert(145, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(145, f, &()), Some(4.2));
+
+        // First leaf split 4-4 after appending to RHS.
+        f.clear();
+        m = full(f);
+        m.insert(175, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(175, f, &()), Some(4.2));
+
+        // Left-middle leaf split, ins LHS.
+        f.clear();
+        m = full(f);
+        m.insert(435, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(435, f, &()), Some(4.2));
+
+        // Left-middle leaf split, ins RHS.
+        f.clear();
+        m = full(f);
+        m.insert(445, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(445, f, &()), Some(4.2));
+
+        // Right-middle leaf split, ins LHS.
+        f.clear();
+        m = full(f);
+        m.insert(535, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(535, f, &()), Some(4.2));
+
+        // Right-middle leaf split, ins RHS.
+        f.clear();
+        m = full(f);
+        m.insert(545, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(545, f, &()), Some(4.2));
+
+        // Last leaf split, ins LHS.
+        f.clear();
+        m = full(f);
+        m.insert(835, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(835, f, &()), Some(4.2));
+
+        // Last leaf split, ins RHS.
+        f.clear();
+        m = full(f);
+        m.insert(845, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(845, f, &()), Some(4.2));
+
+        // Front of last leaf.
+        f.clear();
+        m = full(f);
+        m.insert(805, 4.2, f, &());
+        m.verify(f, &());
+        assert_eq!(m.get(805, f, &()), Some(4.2));
+
+        m.clear(f);
+        m.verify(f, &());
+    }
+
+    // Make a tree with two barely healthy leaf nodes:
+    // [ 10 20 30 40 ] [ 50 60 70 80 ]
+    fn two_leaf(f: &mut MapForest<u32, f32, ()>) -> Map<u32, f32, ()> {
+        f.clear();
+        let mut m = Map::new();
+        for n in 1..9 {
+            m.insert(n * 10, n as f32, f, &());
+        }
+        m
+    }
+
+    #[test]
+    fn remove_level1() {
+        let f = &mut MapForest::<u32, f32, ()>::new();
+        let mut m = two_leaf(f);
+
+        // Verify geometry.
+        m.verify(f, &());
+        assert_eq!(m.tpath(10, f, &()), "node2[0]--node0[0]");
+        assert_eq!(m.tpath(40, f, &()), "node2[0]--node0[3]");
+        assert_eq!(m.tpath(49, f, &()), "node2[0]--node0[4]");
+        assert_eq!(m.tpath(50, f, &()), "node2[1]--node1[0]");
+        assert_eq!(m.tpath(80, f, &()), "node2[1]--node1[3]");
+
+        // Remove the front entry from a node that stays healthy.
+        assert_eq!(m.insert(55, 5.5, f, &()), None);
+        assert_eq!(m.remove(50, f, &()), Some(5.0));
+        m.verify(f, &());
+        assert_eq!(m.tpath(49, f, &()), "node2[0]--node0[4]");
+        assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[4]");
+        assert_eq!(m.tpath(55, f, &()), "node2[1]--node1[0]");
+
+        // Remove the front entry from the first leaf node: No critical key to update.
+        assert_eq!(m.insert(15, 1.5, f, &()), None);
+        assert_eq!(m.remove(10, f, &()), Some(1.0));
+        m.verify(f, &());
+
+        // [ 15 20 30 40 ] [ 55 60 70 80 ]
+
+        // Remove the front entry from a right-most node that underflows.
+        // No rebalancing for the right-most node. Still need critical key update.
+        assert_eq!(m.remove(55, f, &()), Some(5.5));
+        m.verify(f, &());
+        assert_eq!(m.tpath(55, f, &()), "node2[0]--node0[4]");
+        assert_eq!(m.tpath(60, f, &()), "node2[1]--node1[0]");
+
+        // [ 15 20 30 40 ] [ 60 70 80 ]
+
+        // Replenish the right leaf.
+        assert_eq!(m.insert(90, 9.0, f, &()), None);
+        assert_eq!(m.insert(100, 10.0, f, &()), None);
+        m.verify(f, &());
+        assert_eq!(m.tpath(55, f, &()), "node2[0]--node0[4]");
+        assert_eq!(m.tpath(60, f, &()), "node2[1]--node1[0]");
+
+        // [ 15 20 30 40 ] [ 60 70 80 90 100 ]
+
+        // Removing one entry from the left leaf should trigger a rebalancing from the right
+        // sibling.
+        assert_eq!(m.remove(20, f, &()), Some(2.0));
+        m.verify(f, &());
+
+        // [ 15 30 40 60 ] [ 70 80 90 100 ]
+        // Check that the critical key was updated correctly.
+        assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[3]");
+        assert_eq!(m.tpath(60, f, &()), "node2[0]--node0[3]");
+        assert_eq!(m.tpath(70, f, &()), "node2[1]--node1[0]");
+
+        // Remove front entry from the left-most leaf node, underflowing.
+        // This should cause two leaf nodes to be merged and the root node to go away.
+        assert_eq!(m.remove(15, f, &()), Some(1.5));
+        m.verify(f, &());
+    }
+
+    #[test]
+    fn remove_level1_rightmost() {
+        let f = &mut MapForest::<u32, f32, ()>::new();
+        let mut m = two_leaf(f);
+
+        // [ 10 20 30 40 ] [ 50 60 70 80 ]
+
+        // Remove entries from the right leaf. This doesn't trigger a rebalancing.
+        assert_eq!(m.remove(60, f, &()), Some(6.0));
+        assert_eq!(m.remove(80, f, &()), Some(8.0));
+        assert_eq!(m.remove(50, f, &()), Some(5.0));
+        m.verify(f, &());
+
+        // [ 10 20 30 40 ] [ 70 ]
+        assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[4]");
+        assert_eq!(m.tpath(70, f, &()), "node2[1]--node1[0]");
+
+        // Removing the last entry from the right leaf should cause a collapse.
+        assert_eq!(m.remove(70, f, &()), Some(7.0));
+        m.verify(f, &());
+    }
+
+    // Make a 3-level tree with barely healthy nodes.
+    // 1 root, 8 inner nodes, 7*4+5=33 leaf nodes, 4 entries each.
+    fn level3_sparse(f: &mut MapForest<u32, f32, ()>) -> Map<u32, f32, ()> {
+        f.clear();
+        let mut m = Map::new();
+        for n in 1..133 {
+            m.insert(n * 10, n as f32, f, &());
+        }
+        m
+    }
+
+    #[test]
+    fn level3_removes() {
+        let f = &mut MapForest::<u32, f32, ()>::new();
+        let mut m = level3_sparse(f);
+        m.verify(f, &());
+
+        // Check geometry.
+        // Root: node11
+        // [ node2 170 node10 330 node16 490 node21 650 node26 810 node31 970 node36 1130 node41 ]
+        // L1: node11
+        assert_eq!(m.tpath(0, f, &()), "node11[0]--node2[0]--node0[0]");
+        assert_eq!(m.tpath(10000, f, &()), "node11[7]--node41[4]--node40[4]");
+
+        // 650 is a critical key in the middle of the root.
+        assert_eq!(m.tpath(640, f, &()), "node11[3]--node21[3]--node19[3]");
+        assert_eq!(m.tpath(650, f, &()), "node11[4]--node26[0]--node20[0]");
+
+        // Deleting 640 triggers a rebalance from node19 to node 20, cascading to n21 -> n26.
+        assert_eq!(m.remove(640, f, &()), Some(64.0));
+        m.verify(f, &());
+        assert_eq!(m.tpath(650, f, &()), "node11[3]--node26[3]--node20[3]");
+
+        // 1130 is in the first leaf of the last L1 node. Deleting it triggers a rebalance node35
+        // -> node37, but no rebalance above where there is no right sibling.
+        assert_eq!(m.tpath(1130, f, &()), "node11[6]--node41[0]--node35[0]");
+        assert_eq!(m.tpath(1140, f, &()), "node11[6]--node41[0]--node35[1]");
+        assert_eq!(m.remove(1130, f, &()), Some(113.0));
+        m.verify(f, &());
+        assert_eq!(m.tpath(1140, f, &()), "node11[6]--node41[0]--node37[0]");
+    }
+
+    #[test]
+    fn insert_many() {
+        let f = &mut MapForest::<u32, f32, ()>::new();
+        let mut m = Map::<u32, f32, ()>::new();
+
+        let mm = 4096;
+        let mut x = 0;
+
+        for n in 0..mm {
+            assert_eq!(m.insert(x, n as f32, f, &()), None);
+            m.verify(f, &());
+
+            x = (x + n + 1) % mm;
+        }
+
+        x = 0;
+        for n in 0..mm {
+            assert_eq!(m.get(x, f, &()), Some(n as f32));
+            x = (x + n + 1) % mm;
+        }
+
+        x = 0;
+        for n in 0..mm {
+            assert_eq!(m.remove(x, f, &()), Some(n as f32));
+            m.verify(f, &());
+
+            x = (x + n + 1) % mm;
+        }
+
+        assert!(m.is_empty());
+    }
+}
--- a/lib/codegen/src/bforest/mod.rs
+++ b/lib/codegen/src/bforest/mod.rs
@@ -0,0 +1,172 @@
+//! A forest of B+-trees.
+//!
+//! This module provides a data structures representing a set of small ordered sets or maps.
+//! It is implemented as a forest of B+-trees all allocating nodes out of the same pool.
+//!
+//! **These are not general purpose data structures that are somehow magically faster that the
+//! standard library's `BTreeSet` and `BTreeMap` types.**
+//!
+//! The tradeoffs are different:
+//!
+//! - Keys and values are expected to be small and copyable. We optimize for 32-bit types.
+//! - A comparator object is used to compare keys, allowing smaller "context free" keys.
+//! - Empty trees have a very small 32-bit footprint.
+//! - All the trees in a forest can be cleared in constant time.
+
+use std::borrow::BorrowMut;
+use std::cmp::Ordering;
+
+mod map;
+mod node;
+mod path;
+mod pool;
+mod set;
+
+pub use self::map::{Map, MapCursor, MapForest, MapIter};
+pub use self::set::{Set, SetCursor, SetForest, SetIter};
+
+use self::node::NodeData;
+use self::path::Path;
+use self::pool::NodePool;
+
+/// The maximum branching factor of an inner node in a B+-tree.
+/// The minimum number of outgoing edges is `INNER_SIZE/2`.
+const INNER_SIZE: usize = 8;
+
+/// Given the worst case branching factor of `INNER_SIZE/2` = 4, this is the
+/// worst case path length from the root node to a leaf node in a tree with 2^32
+/// entries. We would run out of node references before we hit `MAX_PATH`.
+const MAX_PATH: usize = 16;
+
+/// Key comparator.
+///
+/// Keys don't need to implement `Ord`. They are compared using a comparator object which
+/// provides a context for comparison.
+pub trait Comparator<K>
+where
+    K: Copy,
+{
+    /// Compare keys `a` and `b`.
+    ///
+    /// This relation must provide a total ordering or the key space.
+    fn cmp(&self, a: K, b: K) -> Ordering;
+
+    /// Binary search for `k` in an ordered slice.
+    ///
+    /// Assume that `s` is already sorted according to this ordering, search for the key `k`.
+    ///
+    /// Returns `Ok(idx)` if `k` was found in the slice or `Err(idx)` with the position where it
+    /// should be inserted to preserve the ordering.
+    fn search(&self, k: K, s: &[K]) -> Result<usize, usize> {
+        s.binary_search_by(|x| self.cmp(*x, k))
+    }
+}
+
+/// Trivial comparator that doesn't actually provide any context.
+impl<K> Comparator<K> for ()
+where
+    K: Copy + Ord,
+{
+    fn cmp(&self, a: K, b: K) -> Ordering {
+        a.cmp(&b)
+    }
+}
+
+/// Family of types shared by the map and set forest implementations.
+trait Forest {
+    /// The key type is present for both sets and maps.
+    type Key: Copy;
+
+    /// The value type is `()` for sets.
+    type Value: Copy;
+
+    /// An array of keys for the leaf nodes.
+    type LeafKeys: Copy + BorrowMut<[Self::Key]>;
+
+    /// An array of values for the leaf nodes.
+    type LeafValues: Copy + BorrowMut<[Self::Value]>;
+
+    /// Type used for key comparisons.
+    type Comparator: Comparator<Self::Key>;
+
+    /// Splat a single key into a whole array.
+    fn splat_key(key: Self::Key) -> Self::LeafKeys;
+
+    /// Splat a single value inst a whole array
+    fn splat_value(value: Self::Value) -> Self::LeafValues;
+}
+
+/// A reference to a B+-tree node.
+#[derive(Clone, Copy, PartialEq, Eq)]
+struct Node(u32);
+entity_impl!(Node, "node");
+
+/// Empty type to be used as the "value" in B-trees representing sets.
+#[derive(Clone, Copy)]
+struct SetValue();
+
+/// Insert `x` into `s` at position `i`, pushing out the last element.
+fn slice_insert<T: Copy>(s: &mut [T], i: usize, x: T) {
+    for j in (i + 1..s.len()).rev() {
+        s[j] = s[j - 1];
+    }
+    s[i] = x;
+}
+
+/// Shift elements in `s` to the left by `n` positions.
+fn slice_shift<T: Copy>(s: &mut [T], n: usize) {
+    for j in 0..s.len() - n {
+        s[j] = s[j + n];
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use entity::EntityRef;
+    use ir::Ebb;
+
+    #[test]
+    fn comparator() {
+        let ebb1 = Ebb::new(1);
+        let ebb2 = Ebb::new(2);
+        let ebb3 = Ebb::new(3);
+        let ebb4 = Ebb::new(4);
+        let vals = [ebb1, ebb2, ebb4];
+        let comp = ();
+        assert_eq!(comp.search(ebb1, &vals), Ok(0));
+        assert_eq!(comp.search(ebb3, &vals), Err(2));
+        assert_eq!(comp.search(ebb4, &vals), Ok(2));
+    }
+
+    #[test]
+    fn slice_insertion() {
+        let mut a = ['a', 'b', 'c', 'd'];
+
+        slice_insert(&mut a[0..1], 0, 'e');
+        assert_eq!(a, ['e', 'b', 'c', 'd']);
+
+        slice_insert(&mut a, 0, 'a');
+        assert_eq!(a, ['a', 'e', 'b', 'c']);
+
+        slice_insert(&mut a, 3, 'g');
+        assert_eq!(a, ['a', 'e', 'b', 'g']);
+
+        slice_insert(&mut a, 1, 'h');
+        assert_eq!(a, ['a', 'h', 'e', 'b']);
+    }
+
+    #[test]
+    fn slice_shifting() {
+        let mut a = ['a', 'b', 'c', 'd'];
+
+        slice_shift(&mut a[0..1], 1);
+        assert_eq!(a, ['a', 'b', 'c', 'd']);
+
+        slice_shift(&mut a[1..], 1);
+        assert_eq!(a, ['a', 'c', 'd', 'd']);
+
+        slice_shift(&mut a, 2);
+        assert_eq!(a, ['d', 'd', 'd', 'd']);
+    }
+}
--- a/lib/codegen/src/bforest/node.rs
+++ b/lib/codegen/src/bforest/node.rs
@@ -0,0 +1,814 @@
+//! B+-tree nodes.
+
+use super::{slice_insert, slice_shift, Forest, Node, SetValue, INNER_SIZE};
+use std::borrow::{Borrow, BorrowMut};
+use std::fmt;
+
+/// B+-tree node.
+///
+/// A B+-tree has different node types for inner nodes and leaf nodes. Inner nodes contain M node
+/// references and M-1 keys while leaf nodes contain N keys and values. Values for M and N are
+/// chosen such that a node is exactly 64 bytes (a cache line) when keys and values are 32 bits
+/// each.
+///
+/// An inner node contains at least M/2 node references unless it is the right-most node at its
+/// level. A leaf node contains at least N/2 keys unless it is the right-most leaf.
+pub(super) enum NodeData<F: Forest> {
+    Inner {
+        /// The number of keys in this node.
+        /// The number of node references is always one more.
+        size: u8,
+
+        /// Keys discriminating sub-trees.
+        ///
+        /// The key in `keys[i]` is greater than all keys in `tree[i]` and less than or equal to
+        /// all keys in `tree[i+1]`.
+        keys: [F::Key; INNER_SIZE - 1],
+
+        /// Sub-trees.
+        tree: [Node; INNER_SIZE],
+    },
+    Leaf {
+        /// Number of key-value pairs in this node.
+        size: u8,
+
+        // Key array.
+        keys: F::LeafKeys,
+
+        // Value array.
+        vals: F::LeafValues,
+    },
+    /// An unused node on the free list.
+    Free { next: Option<Node> },
+}
+
+// Implement `Clone` and `Copy` manually, because deriving them would also require `Forest` to
+// implement `Clone`.
+impl<F: Forest> Copy for NodeData<F> {}
+impl<F: Forest> Clone for NodeData<F> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<F: Forest> NodeData<F> {
+    /// Is this a free/unused node?
+    pub fn is_free(&self) -> bool {
+        match *self {
+            NodeData::Free { .. } => true,
+            _ => false,
+        }
+    }
+
+    /// Get the number of entries in this node.
+    ///
+    /// This is the number of outgoing edges in an inner node, or the number of key-value pairs in
+    /// a leaf node.
+    pub fn entries(&self) -> usize {
+        match *self {
+            NodeData::Inner { size, .. } => usize::from(size) + 1,
+            NodeData::Leaf { size, .. } => usize::from(size),
+            NodeData::Free { .. } => panic!("freed node"),
+        }
+    }
+
+    /// Create an inner node with a single key and two sub-trees.
+    pub fn inner(left: Node, key: F::Key, right: Node) -> NodeData<F> {
+        // Splat the key and right node to the whole array.
+        // Saves us from inventing a default/reserved value.
+        let mut tree = [right; INNER_SIZE];
+        tree[0] = left;
+        NodeData::Inner {
+            size: 1,
+            keys: [key; INNER_SIZE - 1],
+            tree,
+        }
+    }
+
+    /// Create a leaf node with a single key-value pair.
+    pub fn leaf(key: F::Key, value: F::Value) -> NodeData<F> {
+        NodeData::Leaf {
+            size: 1,
+            keys: F::splat_key(key),
+            vals: F::splat_value(value),
+        }
+    }
+
+    /// Unwrap an inner node into two slices (keys, trees).
+    pub fn unwrap_inner(&self) -> (&[F::Key], &[Node]) {
+        match *self {
+            NodeData::Inner {
+                size,
+                ref keys,
+                ref tree,
+            } => {
+                let size = usize::from(size);
+                // TODO: We could probably use `get_unchecked()` here since `size` is always in
+                // range.
+                (&keys[0..size], &tree[0..size + 1])
+            }
+            _ => panic!("Expected inner node"),
+        }
+    }
+
+    /// Unwrap a leaf node into two slices (keys, values) of the same length.
+    pub fn unwrap_leaf(&self) -> (&[F::Key], &[F::Value]) {
+        match *self {
+            NodeData::Leaf {
+                size,
+                ref keys,
+                ref vals,
+            } => {
+                let size = usize::from(size);
+                let keys = keys.borrow();
+                let vals = vals.borrow();
+                // TODO: We could probably use `get_unchecked()` here since `size` is always in
+                // range.
+                (&keys[0..size], &vals[0..size])
+            }
+            _ => panic!("Expected leaf node"),
+        }
+    }
+
+    /// Unwrap a mutable leaf node into two slices (keys, values) of the same length.
+    pub fn unwrap_leaf_mut(&mut self) -> (&mut [F::Key], &mut [F::Value]) {
+        match *self {
+            NodeData::Leaf {
+                size,
+                ref mut keys,
+                ref mut vals,
+            } => {
+                let size = usize::from(size);
+                let keys = keys.borrow_mut();
+                let vals = vals.borrow_mut();
+                // TODO: We could probably use `get_unchecked_mut()` here since `size` is always in
+                // range.
+                (&mut keys[0..size], &mut vals[0..size])
+            }
+            _ => panic!("Expected leaf node"),
+        }
+    }
+
+    /// Get the critical key for a leaf node.
+    /// This is simply the first key.
+    pub fn leaf_crit_key(&self) -> F::Key {
+        match *self {
+            NodeData::Leaf { size, ref keys, .. } => {
+                debug_assert!(size > 0, "Empty leaf node");
+                keys.borrow()[0]
+            }
+            _ => panic!("Expected leaf node"),
+        }
+    }
+
+    /// Try to insert `(key, node)` at key-position `index` in an inner node.
+    /// This means that `key` is inserted at `keys[i]` and `node` is inserted at `tree[i + 1]`.
+    /// If the node is full, this leaves the node unchanged and returns false.
+    pub fn try_inner_insert(&mut self, index: usize, key: F::Key, node: Node) -> bool {
+        match *self {
+            NodeData::Inner {
+                ref mut size,
+                ref mut keys,
+                ref mut tree,
+            } => {
+                let sz = usize::from(*size);
+                debug_assert!(sz <= keys.len());
+                debug_assert!(index <= sz, "Can't insert at {} with {} keys", index, sz);
+
+                if let Some(ks) = keys.get_mut(0..sz + 1) {
+                    *size = (sz + 1) as u8;
+                    slice_insert(ks, index, key);
+                    slice_insert(&mut tree[1..sz + 2], index, node);
+                    true
+                } else {
+                    false
+                }
+            }
+            _ => panic!("Expected inner node"),
+        }
+    }
+
+    /// Try to insert `key, value` at `index` in a leaf node, but fail and return false if the node
+    /// is full.
+    pub fn try_leaf_insert(&mut self, index: usize, key: F::Key, value: F::Value) -> bool {
+        match *self {
+            NodeData::Leaf {
+                ref mut size,
+                ref mut keys,
+                ref mut vals,
+            } => {
+                let sz = usize::from(*size);
+                let keys = keys.borrow_mut();
+                let vals = vals.borrow_mut();
+                debug_assert!(sz <= keys.len());
+                debug_assert!(index <= sz);
+
+                if let Some(ks) = keys.get_mut(0..sz + 1) {
+                    *size = (sz + 1) as u8;
+                    slice_insert(ks, index, key);
+                    slice_insert(&mut vals[0..sz + 1], index, value);
+                    true
+                } else {
+                    false
+                }
+            }
+            _ => panic!("Expected leaf node"),
+        }
+    }
+
+    /// Split off the second half of this node.
+    /// It is assumed that this a completely full inner or leaf node.
+    ///
+    /// The `insert_index` parameter is the position where an insertion was tried and failed. The
+    /// node will be split in half with a bias towards an even split after the insertion is retried.
+    pub fn split(&mut self, insert_index: usize) -> SplitOff<F> {
+        match *self {
+            NodeData::Inner {
+                ref mut size,
+                ref keys,
+                ref tree,
+            } => {
+                debug_assert_eq!(usize::from(*size), keys.len(), "Node not full");
+
+                // Number of tree entries in the lhs node.
+                let l_ents = split_pos(tree.len(), insert_index + 1);
+                let r_ents = tree.len() - l_ents;
+
+                // With INNER_SIZE=8, we get l_ents=4 and:
+                //
+                // self: [ n0 k0 n1 k1 n2 k2 n3 k3 n4 k4 n5 k5 n6 k6 n7 ]
+                // lhs:  [ n0 k0 n1 k1 n2 k2 n3 ]
+                // crit_key = k3 (not present in either node)
+                // rhs:  [ n4 k4 n5 k5 n6 k6 n7 ]
+
+                // 1. Truncate the LHS.
+                *size = (l_ents - 1) as u8;
+
+                // 2. Copy second half to `rhs_data`.
+                let mut r_keys = *keys;
+                r_keys[0..r_ents - 1].copy_from_slice(&keys[l_ents..]);
+
+                let mut r_tree = *tree;
+                r_tree[0..r_ents].copy_from_slice(&tree[l_ents..]);
+
+                SplitOff {
+                    lhs_entries: l_ents,
+                    rhs_entries: r_ents,
+                    crit_key: keys[l_ents - 1],
+                    rhs_data: NodeData::Inner {
+                        size: (r_ents - 1) as u8,
+                        keys: r_keys,
+                        tree: r_tree,
+                    },
+                }
+            }
+            NodeData::Leaf {
+                ref mut size,
+                ref keys,
+                ref vals,
+            } => {
+                let o_keys = keys.borrow();
+                let o_vals = vals.borrow();
+                debug_assert_eq!(usize::from(*size), o_keys.len(), "Node not full");
+
+                let l_size = split_pos(o_keys.len(), insert_index);
+                let r_size = o_keys.len() - l_size;
+
+                // 1. Truncate the LHS node at `l_size`.
+                *size = l_size as u8;
+
+                // 2. Copy second half to `rhs_data`.
+                let mut r_keys = *keys;
+                r_keys.borrow_mut()[0..r_size].copy_from_slice(&o_keys[l_size..]);
+
+                let mut r_vals = *vals;
+                r_vals.borrow_mut()[0..r_size].copy_from_slice(&o_vals[l_size..]);
+
+                SplitOff {
+                    lhs_entries: l_size,
+                    rhs_entries: r_size,
+                    crit_key: o_keys[l_size],
+                    rhs_data: NodeData::Leaf {
+                        size: r_size as u8,
+                        keys: r_keys,
+                        vals: r_vals,
+                    },
+                }
+            }
+            _ => panic!("Expected leaf node"),
+        }
+    }
+
+    /// Remove the sub-tree at `index` from this inner node.
+    ///
+    /// Note that `index` refers to a sub-tree entry and not a key entry as it does for
+    /// `try_inner_insert()`. It is possible to remove the first sub-tree (which can't be inserted
+    /// by `try_inner_insert()`).
+    ///
+    /// Return an indication of the node's health (i.e. below half capacity).
+    pub fn inner_remove(&mut self, index: usize) -> Removed {
+        match *self {
+            NodeData::Inner {
+                ref mut size,
+                ref mut keys,
+                ref mut tree,
+            } => {
+                let ents = usize::from(*size) + 1;
+                debug_assert!(ents <= tree.len());
+                debug_assert!(index < ents);
+                // Leave an invalid 0xff size when node becomes empty.
+                *size = ents.wrapping_sub(2) as u8;
+                if ents > 1 {
+                    slice_shift(&mut keys[index.saturating_sub(1)..ents - 1], 1);
+                }
+                slice_shift(&mut tree[index..ents], 1);
+                Removed::new(index, ents - 1, tree.len())
+            }
+            _ => panic!("Expected inner node"),
+        }
+    }
+
+    /// Remove the key-value pair at `index` from this leaf node.
+    ///
+    /// Return an indication of the node's health (i.e. below half capacity).
+    pub fn leaf_remove(&mut self, index: usize) -> Removed {
+        match *self {
+            NodeData::Leaf {
+                ref mut size,
+                ref mut keys,
+                ref mut vals,
+            } => {
+                let sz = usize::from(*size);
+                let keys = keys.borrow_mut();
+                let vals = vals.borrow_mut();
+                *size -= 1;
+                slice_shift(&mut keys[index..sz], 1);
+                slice_shift(&mut vals[index..sz], 1);
+                Removed::new(index, sz - 1, keys.len())
+            }
+            _ => panic!("Expected leaf node"),
+        }
+    }
+
+    /// Balance this node with its right sibling.
+    ///
+    /// It is assumed that the current node has underflowed. Look at the right sibling node and do
+    /// one of two things:
+    ///
+    /// 1. Move all entries to the right node, leaving this node empty, or
+    /// 2. Distribute entries evenly between the two nodes.
+    ///
+    /// In the first case, `None` is returned. In the second case, the new critical key for the
+    /// right sibling node is returned.
+    pub fn balance(&mut self, crit_key: F::Key, rhs: &mut NodeData<F>) -> Option<F::Key> {
+        match (self, rhs) {
+            (&mut NodeData::Inner {
+                 size: ref mut l_size,
+                 keys: ref mut l_keys,
+                 tree: ref mut l_tree,
+             },
+             &mut NodeData::Inner {
+                 size: ref mut r_size,
+                 keys: ref mut r_keys,
+                 tree: ref mut r_tree,
+             }) => {
+                let l_ents = usize::from(*l_size) + 1;
+                let r_ents = usize::from(*r_size) + 1;
+                let ents = l_ents + r_ents;
+
+                if ents <= r_tree.len() {
+                    // All entries will fit in the RHS node.
+                    // We'll leave the LHS node empty, but first use it as a scratch space.
+                    *l_size = 0;
+                    // Insert `crit_key` between the two nodes.
+                    l_keys[l_ents - 1] = crit_key;
+                    l_keys[l_ents..ents - 1].copy_from_slice(&r_keys[0..r_ents - 1]);
+                    r_keys[0..ents - 1].copy_from_slice(&l_keys[0..ents - 1]);
+                    l_tree[l_ents..ents].copy_from_slice(&r_tree[0..r_ents]);
+                    r_tree[0..ents].copy_from_slice(&l_tree[0..ents]);
+                    *r_size = (ents - 1) as u8;
+                    None
+                } else {
+                    // The entries don't all fit in one node. Distribute some from RHS -> LHS.
+                    // Split evenly with a bias to putting one entry in LHS.
+                    let r_goal = ents / 2;
+                    let l_goal = ents - r_goal;
+                    debug_assert!(l_goal > l_ents, "Node must be underflowed");
+
+                    l_keys[l_ents - 1] = crit_key;
+                    l_keys[l_ents..l_goal - 1].copy_from_slice(&r_keys[0..l_goal - 1 - l_ents]);
+                    l_tree[l_ents..l_goal].copy_from_slice(&r_tree[0..l_goal - l_ents]);
+                    *l_size = (l_goal - 1) as u8;
+
+                    let new_crit = r_keys[r_ents - r_goal - 1];
+                    slice_shift(&mut r_keys[0..r_ents - 1], r_ents - r_goal);
+                    slice_shift(&mut r_tree[0..r_ents], r_ents - r_goal);
+                    *r_size = (r_goal - 1) as u8;
+
+                    Some(new_crit)
+                }
+            }
+            (&mut NodeData::Leaf {
+                 size: ref mut l_size,
+                 keys: ref mut l_keys,
+                 vals: ref mut l_vals,
+             },
+             &mut NodeData::Leaf {
+                 size: ref mut r_size,
+                 keys: ref mut r_keys,
+                 vals: ref mut r_vals,
+             }) => {
+                let l_ents = usize::from(*l_size);
+                let l_keys = l_keys.borrow_mut();
+                let l_vals = l_vals.borrow_mut();
+                let r_ents = usize::from(*r_size);
+                let r_keys = r_keys.borrow_mut();
+                let r_vals = r_vals.borrow_mut();
+                let ents = l_ents + r_ents;
+
+                if ents <= r_vals.len() {
+                    // We can fit all entries in the RHS node.
+                    // We'll leave the LHS node empty, but first use it as a scratch space.
+                    *l_size = 0;
+                    l_keys[l_ents..ents].copy_from_slice(&r_keys[0..r_ents]);
+                    r_keys[0..ents].copy_from_slice(&l_keys[0..ents]);
+                    l_vals[l_ents..ents].copy_from_slice(&r_vals[0..r_ents]);
+                    r_vals[0..ents].copy_from_slice(&l_vals[0..ents]);
+                    *r_size = ents as u8;
+                    None
+                } else {
+                    // The entries don't all fit in one node. Distribute some from RHS -> LHS.
+                    // Split evenly with a bias to putting one entry in LHS.
+                    let r_goal = ents / 2;
+                    let l_goal = ents - r_goal;
+                    debug_assert!(l_goal > l_ents, "Node must be underflowed");
+
+                    l_keys[l_ents..l_goal].copy_from_slice(&r_keys[0..l_goal - l_ents]);
+                    l_vals[l_ents..l_goal].copy_from_slice(&r_vals[0..l_goal - l_ents]);
+                    *l_size = l_goal as u8;
+
+                    slice_shift(&mut r_keys[0..r_ents], r_ents - r_goal);
+                    slice_shift(&mut r_vals[0..r_ents], r_ents - r_goal);
+                    *r_size = r_goal as u8;
+
+                    Some(r_keys[0])
+                }
+            }
+            _ => panic!("Mismatched nodes"),
+        }
+    }
+}
+
+/// Find the right split position for halving a full node with `len` entries to recover from a
+/// failed insertion at `ins`.
+///
+/// If `len` is even, we should split straight down the middle regardless of `len`.
+///
+/// If `len` is odd, we should split the node such that the two halves are the same size after the
+/// insertion is retried.
+fn split_pos(len: usize, ins: usize) -> usize {
+    // Anticipate `len` being a compile time constant, so this all folds away when `len` is even.
+    if ins <= len / 2 {
+        len / 2
+    } else {
+        (len + 1) / 2
+    }
+}
+
+/// The result of splitting off the second half of a node.
+pub(super) struct SplitOff<F: Forest> {
+    /// The number of entries left in the original node which becomes the left-hand-side of the
+    /// pair. This is the number of outgoing node edges for an inner node, and the number of
+    /// key-value pairs for a leaf node.
+    pub lhs_entries: usize,
+
+    /// The number of entries in the new RHS node.
+    pub rhs_entries: usize,
+
+    /// The critical key separating the LHS and RHS nodes. All keys in the LHS sub-tree are less
+    /// than the critical key, and all entries in the RHS sub-tree are greater or equal to the
+    /// critical key.
+    pub crit_key: F::Key,
+
+    /// The RHS node data containing the elements that were removed from the original node (now the
+    /// LHS).
+    pub rhs_data: NodeData<F>,
+}
+
+/// The result of removing an entry from a node.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(super) enum Removed {
+    /// An entry was removed, and the node is still in good shape.
+    Healthy,
+
+    /// The node is in good shape after removing the rightmost element.
+    Rightmost,
+
+    /// The node has too few entries now, and it should be balanced with a sibling node.
+    Underflow,
+
+    /// The last entry was removed. For an inner node, this means that the `keys` array is empty
+    /// and there is just a single sub-tree left.
+    Empty,
+}
+
+impl Removed {
+    /// Create a `Removed` status from a size and capacity.
+    fn new(removed: usize, new_size: usize, capacity: usize) -> Removed {
+        if 2 * new_size >= capacity {
+            if removed == new_size {
+                Removed::Rightmost
+            } else {
+                Removed::Healthy
+            }
+        } else if new_size > 0 {
+            Removed::Underflow
+        } else {
+            Removed::Empty
+        }
+    }
+}
+
+// Display ": value" or nothing at all for `()`.
+pub(super) trait ValDisp {
+    fn valfmt(&self, f: &mut fmt::Formatter) -> fmt::Result;
+}
+
+impl ValDisp for SetValue {
+    fn valfmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
+        Ok(())
+    }
+}
+
+impl<T: fmt::Display> ValDisp for T {
+    fn valfmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, ":{}", self)
+    }
+}
+
+impl<F> fmt::Display for NodeData<F>
+where
+    F: Forest,
+    F::Key: fmt::Display,
+    F::Value: ValDisp,
+{
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            NodeData::Inner { size, keys, tree } => {
+                write!(f, "[ {}", tree[0])?;
+                for i in 0..usize::from(size) {
+                    write!(f, " {} {}", keys[i], tree[i + 1])?;
+                }
+                write!(f, " ]")
+            }
+            NodeData::Leaf { size, keys, vals } => {
+                let keys = keys.borrow();
+                let vals = vals.borrow();
+                write!(f, "[")?;
+                for i in 0..usize::from(size) {
+                    write!(f, " {}", keys[i])?;
+                    vals[i].valfmt(f)?;
+                }
+                write!(f, " ]")
+            }
+            NodeData::Free { next: Some(n) } => write!(f, "[ free -> {} ]", n),
+            NodeData::Free { next: None } => write!(f, "[ free ]"),
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use std::mem;
+    use std::string::ToString;
+
+    // Forest impl for a set implementation.
+    struct TF();
+
+    impl Forest for TF {
+        type Key = char;
+        type Value = SetValue;
+        type LeafKeys = [char; 15];
+        type LeafValues = [SetValue; 15];
+        type Comparator = ();
+
+        fn splat_key(key: Self::Key) -> Self::LeafKeys {
+            [key; 15]
+        }
+
+        fn splat_value(value: Self::Value) -> Self::LeafValues {
+            [value; 15]
+        }
+    }
+
+    #[test]
+    fn inner() {
+        let n1 = Node(1);
+        let n2 = Node(2);
+        let n3 = Node(3);
+        let n4 = Node(4);
+        let mut inner = NodeData::<TF>::inner(n1, 'c', n4);
+        assert_eq!(mem::size_of_val(&inner), 64);
+        assert_eq!(inner.to_string(), "[ node1 c node4 ]");
+
+        assert!(inner.try_inner_insert(0, 'a', n2));
+        assert_eq!(inner.to_string(), "[ node1 a node2 c node4 ]");
+
+        assert!(inner.try_inner_insert(1, 'b', n3));
+        assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]");
+
+        for i in 3..7 {
+            assert!(inner.try_inner_insert(
+                usize::from(i),
+                ('a' as u8 + i) as char,
+                Node(i as u32 + 2),
+            ));
+        }
+        assert_eq!(
+            inner.to_string(),
+            "[ node1 a node2 b node3 c node4 d node5 e node6 f node7 g node8 ]"
+        );
+
+        // Now the node is full and insertion should fail anywhere.
+        assert!(!inner.try_inner_insert(0, 'x', n3));
+        assert!(!inner.try_inner_insert(4, 'x', n3));
+        assert!(!inner.try_inner_insert(7, 'x', n3));
+
+        // Splitting should be independent of the hint because we have an even number of node
+        // references.
+        let saved = inner.clone();
+        let sp = inner.split(1);
+        assert_eq!(sp.lhs_entries, 4);
+        assert_eq!(sp.rhs_entries, 4);
+        assert_eq!(sp.crit_key, 'd');
+        // The critical key is not present in either of the resulting nodes.
+        assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]");
+        assert_eq!(sp.rhs_data.to_string(), "[ node5 e node6 f node7 g node8 ]");
+
+        assert_eq!(inner.inner_remove(0), Removed::Underflow);
+        assert_eq!(inner.to_string(), "[ node2 b node3 c node4 ]");
+
+        assert_eq!(inner.inner_remove(1), Removed::Underflow);
+        assert_eq!(inner.to_string(), "[ node2 c node4 ]");
+
+        assert_eq!(inner.inner_remove(1), Removed::Underflow);
+        assert_eq!(inner.to_string(), "[ node2 ]");
+
+        assert_eq!(inner.inner_remove(0), Removed::Empty);
+
+        inner = saved;
+        let sp = inner.split(6);
+        assert_eq!(sp.lhs_entries, 4);
+        assert_eq!(sp.rhs_entries, 4);
+        assert_eq!(sp.crit_key, 'd');
+        assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]");
+        assert_eq!(sp.rhs_data.to_string(), "[ node5 e node6 f node7 g node8 ]");
+    }
+
+    #[test]
+    fn leaf() {
+        let mut leaf = NodeData::<TF>::leaf('d', SetValue());
+        assert_eq!(leaf.to_string(), "[ d ]");
+
+        assert!(leaf.try_leaf_insert(0, 'a', SetValue()));
+        assert_eq!(leaf.to_string(), "[ a d ]");
+        assert!(leaf.try_leaf_insert(1, 'b', SetValue()));
+        assert!(leaf.try_leaf_insert(2, 'c', SetValue()));
+        assert_eq!(leaf.to_string(), "[ a b c d ]");
+        for i in 4..15 {
+            assert!(leaf.try_leaf_insert(
+                usize::from(i),
+                ('a' as u8 + i) as char,
+                SetValue(),
+            ));
+        }
+        assert_eq!(leaf.to_string(), "[ a b c d e f g h i j k l m n o ]");
+
+        // Now the node is full and insertion should fail anywhere.
+        assert!(!leaf.try_leaf_insert(0, 'x', SetValue()));
+        assert!(!leaf.try_leaf_insert(8, 'x', SetValue()));
+        assert!(!leaf.try_leaf_insert(15, 'x', SetValue()));
+
+        // The index given to `split` is not the split position, it's a hint for balancing the node.
+        let saved = leaf.clone();
+        let sp = leaf.split(12);
+        assert_eq!(sp.lhs_entries, 8);
+        assert_eq!(sp.rhs_entries, 7);
+        assert_eq!(sp.crit_key, 'i');
+        assert_eq!(leaf.to_string(), "[ a b c d e f g h ]");
+        assert_eq!(sp.rhs_data.to_string(), "[ i j k l m n o ]");
+
+        assert!(leaf.try_leaf_insert(8, 'i', SetValue()));
+        assert_eq!(leaf.leaf_remove(2), Removed::Healthy);
+        assert_eq!(leaf.to_string(), "[ a b d e f g h i ]");
+        assert_eq!(leaf.leaf_remove(7), Removed::Underflow);
+        assert_eq!(leaf.to_string(), "[ a b d e f g h ]");
+
+        leaf = saved;
+        let sp = leaf.split(7);
+        assert_eq!(sp.lhs_entries, 7);
+        assert_eq!(sp.rhs_entries, 8);
+        assert_eq!(sp.crit_key, 'h');
+        assert_eq!(leaf.to_string(), "[ a b c d e f g ]");
+        assert_eq!(sp.rhs_data.to_string(), "[ h i j k l m n o ]");
+    }
+
+    #[test]
+    fn optimal_split_pos() {
+        // An even split is easy.
+        assert_eq!(split_pos(8, 0), 4);
+        assert_eq!(split_pos(8, 8), 4);
+
+        // Easy cases for odd splits.
+        assert_eq!(split_pos(7, 0), 3);
+        assert_eq!(split_pos(7, 7), 4);
+
+        // If the insertion point is the same as the split position, we
+        // will append to the lhs node.
+        assert_eq!(split_pos(7, 3), 3);
+        assert_eq!(split_pos(7, 4), 4);
+    }
+
+    #[test]
+    fn inner_balance() {
+        let n1 = Node(1);
+        let n2 = Node(2);
+        let n3 = Node(3);
+        let mut lhs = NodeData::<TF>::inner(n1, 'a', n2);
+        assert!(lhs.try_inner_insert(1, 'b', n3));
+        assert_eq!(lhs.to_string(), "[ node1 a node2 b node3 ]");
+
+        let n11 = Node(11);
+        let n12 = Node(12);
+        let mut rhs = NodeData::<TF>::inner(n11, 'p', n12);
+
+        for i in 1..4 {
+            assert!(rhs.try_inner_insert(
+                usize::from(i),
+                ('p' as u8 + i) as char,
+                Node(i as u32 + 12),
+            ));
+        }
+        assert_eq!(
+            rhs.to_string(),
+            "[ node11 p node12 q node13 r node14 s node15 ]"
+        );
+
+        // 3+5 elements fit in RHS.
+        assert_eq!(lhs.balance('o', &mut rhs), None);
+        assert_eq!(
+            rhs.to_string(),
+            "[ node1 a node2 b node3 o node11 p node12 q node13 r node14 s node15 ]"
+        );
+
+        // 2+8 elements are redistributed.
+        lhs = NodeData::<TF>::inner(Node(20), 'x', Node(21));
+        assert_eq!(lhs.balance('y', &mut rhs), Some('o'));
+        assert_eq!(
+            lhs.to_string(),
+            "[ node20 x node21 y node1 a node2 b node3 ]"
+        );
+        assert_eq!(
+            rhs.to_string(),
+            "[ node11 p node12 q node13 r node14 s node15 ]"
+        );
+    }
+
+    #[test]
+    fn leaf_balance() {
+        let mut lhs = NodeData::<TF>::leaf('a', SetValue());
+        for i in 1..6 {
+            assert!(lhs.try_leaf_insert(
+                usize::from(i),
+                ('a' as u8 + i) as char,
+                SetValue(),
+            ));
+        }
+        assert_eq!(lhs.to_string(), "[ a b c d e f ]");
+
+        let mut rhs = NodeData::<TF>::leaf('0', SetValue());
+        for i in 1..8 {
+            assert!(rhs.try_leaf_insert(
+                usize::from(i),
+                ('0' as u8 + i) as char,
+                SetValue(),
+            ));
+        }
+        assert_eq!(rhs.to_string(), "[ 0 1 2 3 4 5 6 7 ]");
+
+        // 6+8 elements all fits in rhs.
+        assert_eq!(lhs.balance('0', &mut rhs), None);
+        assert_eq!(rhs.to_string(), "[ a b c d e f 0 1 2 3 4 5 6 7 ]");
+
+        assert!(lhs.try_leaf_insert(0, 'x', SetValue()));
+        assert!(lhs.try_leaf_insert(1, 'y', SetValue()));
+        assert!(lhs.try_leaf_insert(2, 'z', SetValue()));
+        assert_eq!(lhs.to_string(), "[ x y z ]");
+
+        // 3+14 elements need redistribution.
+        assert_eq!(lhs.balance('a', &mut rhs), Some('0'));
+        assert_eq!(lhs.to_string(), "[ x y z a b c d e f ]");
+        assert_eq!(rhs.to_string(), "[ 0 1 2 3 4 5 6 7 ]");
+    }
+}
--- a/lib/codegen/src/bforest/path.rs
+++ b/lib/codegen/src/bforest/path.rs
@@ -0,0 +1,832 @@
+//! A path from the root of a B+-tree to a leaf node.
+
+use super::node::Removed;
+use super::{slice_insert, slice_shift, Comparator, Forest, Node, NodeData, NodePool, MAX_PATH};
+use std::borrow::Borrow;
+use std::marker::PhantomData;
+
+#[cfg(test)]
+use std::fmt;
+
+pub(super) struct Path<F: Forest> {
+    /// Number of path entries including the root and leaf nodes.
+    size: usize,
+
+    /// Path of node references from the root to a leaf node.
+    node: [Node; MAX_PATH],
+
+    /// Entry number in each node.
+    entry: [u8; MAX_PATH],
+
+    unused: PhantomData<F>,
+}
+
+impl<F: Forest> Default for Path<F> {
+    fn default() -> Path<F> {
+        Path {
+            size: 0,
+            node: [Node(0); MAX_PATH],
+            entry: [0; MAX_PATH],
+            unused: PhantomData,
+        }
+    }
+}
+
+impl<F: Forest> Path<F> {
+    /// Reset path by searching for `key` starting from `root`.
+    ///
+    /// If `key` is in the tree, returns the corresponding value and leaved the path pointing at
+    /// the entry. Otherwise returns `None` and:
+    ///
+    /// - A key smaller than all stored keys returns a path to the first entry of the first leaf.
+    /// - A key larger than all stored keys returns a path to one beyond the last element of the
+    ///   last leaf.
+    /// - A key between the stored keys of adjacent leaf nodes returns a path to one beyond the
+    ///   last entry of the first of the leaf nodes.
+    ///
+    pub fn find(
+        &mut self,
+        key: F::Key,
+        root: Node,
+        pool: &NodePool<F>,
+        comp: &F::Comparator,
+    ) -> Option<F::Value> {
+        let mut node = root;
+        for level in 0.. {
+            self.size = level + 1;
+            self.node[level] = node;
+            match pool[node] {
+                NodeData::Inner { size, keys, tree } => {
+                    // Invariant: `tree[i]` contains keys smaller than
+                    // `keys[i]`, greater or equal to `keys[i-1]`.
+                    let i = match comp.search(key, &keys[0..size.into()]) {
+                        // We hit an existing key, so follow the >= branch.
+                        Ok(i) => i + 1,
+                        // Key is less than `keys[i]`, so follow the < branch.
+                        Err(i) => i,
+                    };
+                    self.entry[level] = i as u8;
+                    node = tree[i];
+                }
+                NodeData::Leaf { size, keys, vals } => {
+                    // For a leaf we want either the found key or an insert position.
+                    return match comp.search(key, &keys.borrow()[0..size.into()]) {
+                        Ok(i) => {
+                            self.entry[level] = i as u8;
+                            Some(vals.borrow()[i])
+                        }
+                        Err(i) => {
+                            self.entry[level] = i as u8;
+                            None
+                        }
+                    };
+                }
+                NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
+            }
+        }
+        unreachable!();
+    }
+
+    /// Move path to the first entry of the tree starting at `root` and return it.
+    pub fn first(&mut self, root: Node, pool: &NodePool<F>) -> (F::Key, F::Value) {
+        let mut node = root;
+        for level in 0.. {
+            self.size = level + 1;
+            self.node[level] = node;
+            self.entry[level] = 0;
+            match pool[node] {
+                NodeData::Inner { tree, .. } => node = tree[0],
+                NodeData::Leaf { keys, vals, .. } => return (keys.borrow()[0], vals.borrow()[0]),
+                NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
+            }
+        }
+        unreachable!();
+    }
+
+    /// Move this path to the next key-value pair and return it.
+    pub fn next(&mut self, pool: &NodePool<F>) -> Option<(F::Key, F::Value)> {
+        match self.leaf_pos() {
+            None => return None,
+            Some((node, entry)) => {
+                let (keys, vals) = pool[node].unwrap_leaf();
+                if entry + 1 < keys.len() {
+                    self.entry[self.size - 1] += 1;
+                    return Some((keys[entry + 1], vals[entry + 1]));
+                }
+            }
+        }
+
+        // The current leaf node is exhausted. Move to the next one.
+        let leaf_level = self.size - 1;
+        self.next_node(leaf_level, pool).map(|node| {
+            let (keys, vals) = pool[node].unwrap_leaf();
+            (keys[0], vals[0])
+        })
+    }
+
+    /// Move this path to the previous key-value pair and return it.
+    ///
+    /// If the path is at the off-the-end position, go to the last key-value pair.
+    ///
+    /// If the path is already at the first key-value pair, leave it there and return `None`.
+    pub fn prev(&mut self, root: Node, pool: &NodePool<F>) -> Option<(F::Key, F::Value)> {
+        // We use `size == 0` as a generic off-the-end position.
+        if self.size == 0 {
+            self.goto_subtree_last(0, root, pool);
+            let (node, entry) = self.leaf_pos().unwrap();
+            let (keys, vals) = pool[node].unwrap_leaf();
+            return Some((keys[entry], vals[entry]));
+        }
+
+        match self.leaf_pos() {
+            None => return None,
+            Some((node, entry)) => {
+                if entry > 0 {
+                    self.entry[self.size - 1] -= 1;
+                    let (keys, vals) = pool[node].unwrap_leaf();
+                    return Some((keys[entry - 1], vals[entry - 1]));
+                }
+            }
+        }
+
+        // The current leaf node is exhausted. Move to the previous one.
+        self.prev_leaf(pool).map(|node| {
+            let (keys, vals) = pool[node].unwrap_leaf();
+            let e = self.leaf_entry();
+            (keys[e], vals[e])
+        })
+    }
+
+    /// Move path to the first entry of the next node at level, if one exists.
+    ///
+    /// Returns the new node if it exists.
+    ///
+    /// Reset the path to `size = 0` and return `None` if there is no next node.
+    fn next_node(&mut self, level: usize, pool: &NodePool<F>) -> Option<Node> {
+        match self.right_sibling_branch_level(level, pool) {
+            None => {
+                self.size = 0;
+                None
+            }
+            Some(bl) => {
+                let (_, bnodes) = pool[self.node[bl]].unwrap_inner();
+                self.entry[bl] += 1;
+                let mut node = bnodes[usize::from(self.entry[bl])];
+
+                for l in bl + 1..level {
+                    self.node[l] = node;
+                    self.entry[l] = 0;
+                    node = pool[node].unwrap_inner().1[0];
+                }
+
+                self.node[level] = node;
+                self.entry[level] = 0;
+                Some(node)
+            }
+        }
+    }
+
+    /// Move the path to the last entry of the previous leaf node, if one exists.
+    ///
+    /// Returns the new leaf node if it exists.
+    ///
+    /// Leave the path unchanged and returns `None` if we are already at the first leaf node.
+    fn prev_leaf(&mut self, pool: &NodePool<F>) -> Option<Node> {
+        self.left_sibling_branch_level(self.size - 1).map(|bl| {
+            let entry = self.entry[bl] - 1;
+            self.entry[bl] = entry;
+            let (_, bnodes) = pool[self.node[bl]].unwrap_inner();
+            self.goto_subtree_last(bl + 1, bnodes[usize::from(entry)], pool)
+        })
+    }
+
+    /// Move this path to the last position for the sub-tree at `level, root`.
+    fn goto_subtree_last(&mut self, level: usize, root: Node, pool: &NodePool<F>) -> Node {
+        let mut node = root;
+        for l in level.. {
+            self.node[l] = node;
+            match pool[node] {
+                NodeData::Inner { size, ref tree, .. } => {
+                    self.entry[l] = size;
+                    node = tree[usize::from(size)];
+                }
+                NodeData::Leaf { size, .. } => {
+                    self.entry[l] = size - 1;
+                    self.size = l + 1;
+                    break;
+                }
+                NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
+            }
+        }
+        node
+    }
+
+    /// Set the root node and point the path at the first entry of the node.
+    pub fn set_root_node(&mut self, root: Node) {
+        self.size = 1;
+        self.node[0] = root;
+        self.entry[0] = 0;
+    }
+
+    /// Get the current leaf node and entry, if any.
+    pub fn leaf_pos(&self) -> Option<(Node, usize)> {
+        let i = self.size.wrapping_sub(1);
+        self.node.get(i).map(|&n| (n, self.entry[i].into()))
+    }
+
+    /// Get the current leaf node.
+    fn leaf_node(&self) -> Node {
+        self.node[self.size - 1]
+    }
+
+    /// Get the current entry in the leaf node.
+    fn leaf_entry(&self) -> usize {
+        self.entry[self.size - 1].into()
+    }
+
+    /// Is this path pointing to the first entry in the tree?
+    /// This corresponds to the smallest key.
+    fn at_first_entry(&self) -> bool {
+        self.entry[0..self.size].iter().all(|&i| i == 0)
+    }
+
+    /// Get a mutable reference to the current value.
+    /// This assumes that there is a current value.
+    pub fn value_mut<'a>(&self, pool: &'a mut NodePool<F>) -> &'a mut F::Value {
+        &mut pool[self.leaf_node()].unwrap_leaf_mut().1[self.leaf_entry()]
+    }
+
+    /// Insert the key-value pair at the current position.
+    /// The current position must be the correct insertion location for the key.
+    /// This function does not check for duplicate keys. Use `find` or similar for that.
+    /// Returns the new root node.
+    pub fn insert(&mut self, key: F::Key, value: F::Value, pool: &mut NodePool<F>) -> Node {
+        if !self.try_leaf_insert(key, value, pool) {
+            self.split_and_insert(key, value, pool);
+        }
+        self.node[0]
+    }
+
+    /// Try to insert `key, value` at the current position, but fail and return false if the leaf
+    /// node is full.
+    fn try_leaf_insert(&self, key: F::Key, value: F::Value, pool: &mut NodePool<F>) -> bool {
+        let index = self.leaf_entry();
+
+        // The case `index == 0` should only ever happen when there are no earlier leaf nodes,
+        // otherwise we should have appended to the previous leaf node instead. This invariant
+        // means that we don't need to update keys stored in inner nodes here.
+        debug_assert!(index > 0 || self.at_first_entry());
+
+        pool[self.leaf_node()].try_leaf_insert(index, key, value)
+    }
+
+    /// Split the current leaf node and then insert `key, value`.
+    /// This should only be used if `try_leaf_insert()` fails.
+    fn split_and_insert(&mut self, mut key: F::Key, value: F::Value, pool: &mut NodePool<F>) {
+        let orig_root = self.node[0];
+
+        // Loop invariant: We need to split the node at `level` and then retry a failed insertion.
+        // The items to insert are either `(key, ins_node)` or `(key, value)`.
+        let mut ins_node = None;
+        let mut split;
+        for level in (0..self.size).rev() {
+            // Split the current node.
+            let mut node = self.node[level];
+            let mut entry = self.entry[level].into();
+            split = pool[node].split(entry);
+            let rhs_node = pool.alloc_node(split.rhs_data);
+
+            // Should the path be moved to the new RHS node?
+            // Prefer the smaller node if we're right in the middle.
+            // Prefer to append to LHS all other things being equal.
+            //
+            // When inserting into an inner node (`ins_node.is_some()`), we must point to a valid
+            // entry in the current node since the new entry is inserted *after* the insert
+            // location.
+            if entry > split.lhs_entries ||
+                (entry == split.lhs_entries &&
+                     (split.lhs_entries > split.rhs_entries || ins_node.is_some()))
+            {
+                node = rhs_node;
+                entry -= split.lhs_entries;
+                self.node[level] = node;
+                self.entry[level] = entry as u8;
+            }
+
+            // Now that we have a not-full node, it must be possible to insert.
+            match ins_node {
+                None => {
+                    let inserted = pool[node].try_leaf_insert(entry, key, value);
+                    debug_assert!(inserted);
+                    // If we inserted at the front of the new rhs_node leaf, we need to propagate
+                    // the inserted key as the critical key instead of the previous front key.
+                    if entry == 0 && node == rhs_node {
+                        split.crit_key = key;
+                    }
+                }
+                Some(n) => {
+                    let inserted = pool[node].try_inner_insert(entry, key, n);
+                    debug_assert!(inserted);
+                    // The lower level was moved to the new RHS node, so make sure that is
+                    // reflected here.
+                    if n == self.node[level + 1] {
+                        self.entry[level] += 1;
+                    }
+                }
+            }
+
+            // We are now done with the current level, but `rhs_node` must be inserted in the inner
+            // node above us. If we're already at level 0, the root node needs to be split.
+            key = split.crit_key;
+            ins_node = Some(rhs_node);
+            if level > 0 {
+                let pnode = &mut pool[self.node[level - 1]];
+                let pentry = self.entry[level - 1].into();
+                if pnode.try_inner_insert(pentry, key, rhs_node) {
+                    // If this level level was moved to the new RHS node, update parent entry.
+                    if node == rhs_node {
+                        self.entry[level - 1] += 1;
+                    }
+                    return;
+                }
+            }
+        }
+
+        // If we get here we have split the original root node and need to add an extra level.
+        let rhs_node = ins_node.expect("empty path");
+        let root = pool.alloc_node(NodeData::inner(orig_root, key, rhs_node));
+        let entry = if self.node[0] == rhs_node { 1 } else { 0 };
+        self.size += 1;
+        slice_insert(&mut self.node[0..self.size], 0, root);
+        slice_insert(&mut self.entry[0..self.size], 0, entry);
+    }
+
+    /// Remove the key-value pair at the current position and advance the path to the next
+    /// key-value pair, leaving the path in a normalized state.
+    ///
+    /// Return the new root node.
+    pub fn remove(&mut self, pool: &mut NodePool<F>) -> Option<Node> {
+        let e = self.leaf_entry();
+        match pool[self.leaf_node()].leaf_remove(e) {
+            Removed::Healthy => {
+                if e == 0 {
+                    self.update_crit_key(pool)
+                }
+                Some(self.node[0])
+            }
+            status => self.balance_nodes(status, pool),
+        }
+    }
+
+    /// Get the critical key for the current node at `level`.
+    ///
+    /// The critical key is less than or equal to all keys in the sub-tree at `level` and greater
+    /// than all keys to the left of the current node at `level`.
+    ///
+    /// The left-most node at any level does not have a critical key.
+    fn current_crit_key(&self, level: usize, pool: &NodePool<F>) -> Option<F::Key> {
+        // Find the level containing the critical key for the current node.
+        self.left_sibling_branch_level(level).map(|bl| {
+            let (keys, _) = pool[self.node[bl]].unwrap_inner();
+            keys[usize::from(self.entry[bl]) - 1]
+        })
+    }
+
+    /// Update the critical key after removing the front entry of the leaf node.
+    fn update_crit_key(&mut self, pool: &mut NodePool<F>) {
+        // Find the inner level containing the critical key for the current leaf node.
+        let crit_level = match self.left_sibling_branch_level(self.size - 1) {
+            None => return,
+            Some(l) => l,
+        };
+        let crit_kidx = self.entry[crit_level] - 1;
+
+        // Extract the new critical key from the leaf node.
+        let crit_key = pool[self.leaf_node()].leaf_crit_key();
+        let crit_node = self.node[crit_level];
+
+        match pool[crit_node] {
+            NodeData::Inner { size, ref mut keys, .. } => {
+                debug_assert!(crit_kidx < size);
+                keys[usize::from(crit_kidx)] = crit_key;
+            }
+            _ => panic!("Expected inner node"),
+        }
+    }
+
+    /// Given that the current leaf node is in an unhealthy (underflowed or even empty) status,
+    /// balance it with sibling nodes.
+    ///
+    /// Return the new root node.
+    fn balance_nodes(&mut self, status: Removed, pool: &mut NodePool<F>) -> Option<Node> {
+        // The current leaf node is not in a healthy state, and its critical key may have changed
+        // too.
+        //
+        // Start by dealing with a changed critical key for the leaf level.
+        if status != Removed::Empty && self.leaf_entry() == 0 {
+            self.update_crit_key(pool);
+        }
+
+        let leaf_level = self.size - 1;
+        if self.heal_level(status, leaf_level, pool) {
+            // Tree has become empty.
+            self.size = 0;
+            return None;
+        }
+
+        // Discard the root node if it has shrunk to a single sub-tree.
+        let mut ns = 0;
+        while let NodeData::Inner { size: 0, ref tree, .. } = pool[self.node[ns]] {
+            ns += 1;
+            self.node[ns] = tree[0];
+        }
+
+        if ns > 0 {
+            for l in 0..ns {
+                pool.free_node(self.node[l]);
+            }
+
+            // Shift the whole array instead of just 0..size because `self.size` may be cleared
+            // here if the path is pointing off-the-end.
+            slice_shift(&mut self.node, ns);
+            slice_shift(&mut self.entry, ns);
+
+            if self.size > 0 {
+                self.size -= ns;
+            }
+        }
+
+        // Return the root node, even when `size=0` indicating that we're at the off-the-end
+        // position.
+        Some(self.node[0])
+    }
+
+    /// After removing an entry from the node at `level`, check its health and rebalance as needed.
+    ///
+    /// Leave the path up to and including `level` in a normalized state where all entries are in
+    /// bounds.
+    ///
+    /// Returns true if the tree becomes empty.
+    fn heal_level(&mut self, status: Removed, level: usize, pool: &mut NodePool<F>) -> bool {
+        match status {
+            Removed::Healthy => {}
+            Removed::Rightmost => {
+                // The rightmost entry was removed from the curent node, so move the path so it
+                // points at the first entry of the next node at this level.
+                debug_assert_eq!(
+                    usize::from(self.entry[level]),
+                    pool[self.node[level]].entries()
+                );
+                self.next_node(level, pool);
+            }
+            Removed::Underflow => self.underflowed_node(level, pool),
+            Removed::Empty => return self.empty_node(level, pool),
+        }
+        false
+    }
+
+    /// The current node at `level` has underflowed, meaning that it is below half capacity but
+    /// not completely empty.
+    ///
+    /// Handle this by balancing entries with the right sibling node.
+    ///
+    /// Leave the path up to and including `level` in a valid state that points to the same entry.
+    fn underflowed_node(&mut self, level: usize, pool: &mut NodePool<F>) {
+        // Look for a right sibling node at this level. If none exists, we allow the underflowed
+        // node to persist as the right-most node at its level.
+        if let Some((crit_key, rhs_node)) = self.right_sibling(level, pool) {
+            // New critical key for the updated right sibling node.
+            let new_ck: Option<F::Key>;
+            let empty;
+            // Make a COPY of the sibling node to avoid fighting the borrow checker.
+            let mut rhs = pool[rhs_node];
+            match pool[self.node[level]].balance(crit_key, &mut rhs) {
+                None => {
+                    // Everything got moved to the RHS node.
+                    new_ck = self.current_crit_key(level, pool);
+                    empty = true;
+                }
+                Some(key) => {
+                    // Entries moved from RHS node.
+                    new_ck = Some(key);
+                    empty = false;
+                }
+            }
+            // Put back the updated RHS node data.
+            pool[rhs_node] = rhs;
+            // Update the critical key for the RHS node unless it has become a left-most
+            // node.
+            if let Some(ck) = new_ck {
+                self.update_right_crit_key(level, ck, pool);
+            }
+            if empty {
+                let empty_tree = self.empty_node(level, pool);
+                debug_assert!(!empty_tree);
+            }
+
+            // Any Removed::Rightmost state must have been cleared above by merging nodes. If the
+            // current entry[level] was one off the end of the node, it will now point at a proper
+            // entry.
+            debug_assert!(usize::from(self.entry[level]) < pool[self.node[level]].entries());
+        } else if usize::from(self.entry[level]) >= pool[self.node[level]].entries() {
+            // There's no right sibling at this level, so the node can't be rebalanced.
+            // Check if we are in an off-the-end position.
+            self.size = 0;
+        }
+    }
+
+    /// The current node at `level` has become empty.
+    ///
+    /// Remove the node from its parent node and leave the path in a normalized state. This means
+    /// that the path at this level will go through the right sibling of this node.
+    ///
+    /// If the current node has no right sibling, set `self.size = 0`.
+    ///
+    /// Returns true if the tree becomes empty.
+    fn empty_node(&mut self, level: usize, pool: &mut NodePool<F>) -> bool {
+        pool.free_node(self.node[level]);
+        if level == 0 {
+            // We just deleted the root node, so the tree is now empty.
+            return true;
+        }
+
+        // Get the right sibling node before recursively removing nodes.
+        let rhs_node = self.right_sibling(level, pool).map(|(_, n)| n);
+
+        // Remove the current sub-tree from the parent node.
+        let pl = level - 1;
+        let pe = self.entry[pl].into();
+        let status = pool[self.node[pl]].inner_remove(pe);
+        self.heal_level(status, pl, pool);
+
+        // Finally update the path at this level.
+        match rhs_node {
+            // We'll leave `self.entry[level]` unchanged. It can be non-zero after moving node
+            // entries to the right sibling node.
+            Some(rhs) => self.node[level] = rhs,
+            // We have no right sibling, so we must have deleted the right-most
+            // entry. The path should be moved to the "off-the-end" position.
+            None => self.size = 0,
+        }
+        false
+    }
+
+    /// Find the level where the right sibling to the current node at `level` branches off.
+    ///
+    /// This will be an inner node with two adjacent sub-trees: In one the current node at level is
+    /// a right-most node, in the other, the right sibling is a left-most node.
+    ///
+    /// Returns `None` if the current node is a right-most node so no right sibling exists.
+    fn right_sibling_branch_level(&self, level: usize, pool: &NodePool<F>) -> Option<usize> {
+        (0..level).rposition(|l| match pool[self.node[l]] {
+            NodeData::Inner { size, .. } => self.entry[l] < size,
+            _ => panic!("Expected inner node"),
+        })
+    }
+
+    /// Find the level where the left sibling to the current node at `level` branches off.
+    fn left_sibling_branch_level(&self, level: usize) -> Option<usize> {
+        self.entry[0..level].iter().rposition(|&e| e != 0)
+    }
+
+    /// Get the right sibling node to the current node at `level`.
+    /// Also return the critical key between the current node and the right sibling.
+    fn right_sibling(&self, level: usize, pool: &NodePool<F>) -> Option<(F::Key, Node)> {
+        // Find the critical level: The deepest level where two sibling subtrees contain the
+        // current node and its right sibling.
+        self.right_sibling_branch_level(level, pool).map(|bl| {
+            // Extract the critical key and the `bl+1` node.
+            let be = usize::from(self.entry[bl]);
+            let crit_key;
+            let mut node;
+            {
+                let (keys, tree) = pool[self.node[bl]].unwrap_inner();
+                crit_key = keys[be];
+                node = tree[be + 1];
+            }
+
+            // Follow left-most links back down to `level`.
+            for _ in bl + 1..level {
+                node = pool[node].unwrap_inner().1[0];
+            }
+
+            (crit_key, node)
+        })
+    }
+
+    /// Update the critical key for the right sibling node at `level`.
+    fn update_right_crit_key(&self, level: usize, crit_key: F::Key, pool: &mut NodePool<F>) {
+        let bl = self.right_sibling_branch_level(level, pool).expect(
+            "No right sibling exists",
+        );
+        match pool[self.node[bl]] {
+            NodeData::Inner { ref mut keys, .. } => {
+                keys[usize::from(self.entry[bl])] = crit_key;
+            }
+            _ => panic!("Expected inner node"),
+        }
+    }
+
+    /// Normalize the path position such that it is either pointing at a real entry or `size=0`
+    /// indicating "off-the-end".
+    pub fn normalize(&mut self, pool: &mut NodePool<F>) {
+        if let Some((leaf, entry)) = self.leaf_pos() {
+            if entry >= pool[leaf].entries() {
+                let leaf_level = self.size - 1;
+                self.next_node(leaf_level, pool);
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+impl<F: Forest> Path<F> {
+    /// Check the internal consistency of this path.
+    pub fn verify(&self, pool: &NodePool<F>) {
+        for level in 0..self.size {
+            match pool[self.node[level]] {
+                NodeData::Inner { size, tree, .. } => {
+                    assert!(
+                        level < self.size - 1,
+                        "Expected leaf node at level {}",
+                        level
+                    );
+                    assert!(
+                        self.entry[level] <= size,
+                        "OOB inner entry {}/{} at level {}",
+                        self.entry[level],
+                        size,
+                        level
+                    );
+                    assert_eq!(
+                        self.node[level + 1],
+                        tree[usize::from(self.entry[level])],
+                        "Node mismatch at level {}",
+                        level
+                    );
+                }
+                NodeData::Leaf { size, .. } => {
+                    assert_eq!(level, self.size - 1, "Expected inner node");
+                    assert!(
+                        self.entry[level] <= size,
+                        "OOB leaf entry {}/{}",
+                        self.entry[level],
+                        size,
+                    );
+                }
+                NodeData::Free { .. } => {
+                    panic!("Free {} in path", self.node[level]);
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+impl<F: Forest> fmt::Display for Path<F> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.size == 0 {
+            write!(f, "<empty path>")
+        } else {
+            write!(f, "{}[{}]", self.node[0], self.entry[0])?;
+            for i in 1..self.size {
+                write!(f, "--{}[{}]", self.node[i], self.entry[i])?;
+            }
+            Ok(())
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::super::{Forest, NodeData, NodePool};
+    use super::*;
+    use std::cmp::Ordering;
+
+    struct TC();
+
+    impl Comparator<i32> for TC {
+        fn cmp(&self, a: i32, b: i32) -> Ordering {
+            a.cmp(&b)
+        }
+    }
+
+    struct TF();
+
+    impl Forest for TF {
+        type Key = i32;
+        type Value = char;
+        type LeafKeys = [i32; 7];
+        type LeafValues = [char; 7];
+        type Comparator = TC;
+
+        fn splat_key(key: Self::Key) -> Self::LeafKeys {
+            [key; 7]
+        }
+
+        fn splat_value(value: Self::Value) -> Self::LeafValues {
+            [value; 7]
+        }
+    }
+
+    #[test]
+    fn search_single_leaf() {
+        // Testing Path::new() for trees with a single leaf node.
+        let mut pool = NodePool::<TF>::new();
+        let root = pool.alloc_node(NodeData::leaf(10, 'a'));
+        let mut p = Path::default();
+        let comp = TC();
+
+        // Search for key less than stored key.
+        assert_eq!(p.find(5, root, &pool, &comp), None);
+        assert_eq!(p.size, 1);
+        assert_eq!(p.node[0], root);
+        assert_eq!(p.entry[0], 0);
+
+        // Search for stored key.
+        assert_eq!(p.find(10, root, &pool, &comp), Some('a'));
+        assert_eq!(p.size, 1);
+        assert_eq!(p.node[0], root);
+        assert_eq!(p.entry[0], 0);
+
+        // Search for key greater than stored key.
+        assert_eq!(p.find(15, root, &pool, &comp), None);
+        assert_eq!(p.size, 1);
+        assert_eq!(p.node[0], root);
+        assert_eq!(p.entry[0], 1);
+
+        // Modify leaf node to contain two values.
+        match pool[root] {
+            NodeData::Leaf {
+                ref mut size,
+                ref mut keys,
+                ref mut vals,
+            } => {
+                *size = 2;
+                keys[1] = 20;
+                vals[1] = 'b';
+            }
+            _ => unreachable!(),
+        }
+
+        // Search for key between stored keys.
+        assert_eq!(p.find(15, root, &pool, &comp), None);
+        assert_eq!(p.size, 1);
+        assert_eq!(p.node[0], root);
+        assert_eq!(p.entry[0], 1);
+
+        // Search for key greater than stored keys.
+        assert_eq!(p.find(25, root, &pool, &comp), None);
+        assert_eq!(p.size, 1);
+        assert_eq!(p.node[0], root);
+        assert_eq!(p.entry[0], 2);
+    }
+
+    #[test]
+    fn search_single_inner() {
+        // Testing Path::new() for trees with a single inner node and two leaves.
+        let mut pool = NodePool::<TF>::new();
+        let leaf1 = pool.alloc_node(NodeData::leaf(10, 'a'));
+        let leaf2 = pool.alloc_node(NodeData::leaf(20, 'b'));
+        let root = pool.alloc_node(NodeData::inner(leaf1, 20, leaf2));
+        let mut p = Path::default();
+        let comp = TC();
+
+        // Search for key less than stored keys.
+        assert_eq!(p.find(5, root, &pool, &comp), None);
+        assert_eq!(p.size, 2);
+        assert_eq!(p.node[0], root);
+        assert_eq!(p.entry[0], 0);
+        assert_eq!(p.node[1], leaf1);
+        assert_eq!(p.entry[1], 0);
+
+        assert_eq!(p.find(10, root, &pool, &comp), Some('a'));
+        assert_eq!(p.size, 2);
+        assert_eq!(p.node[0], root);
+        assert_eq!(p.entry[0], 0);
+        assert_eq!(p.node[1], leaf1);
+        assert_eq!(p.entry[1], 0);
+
+        // Midway between the two leaf nodes.
+        assert_eq!(p.find(15, root, &pool, &comp), None);
+        assert_eq!(p.size, 2);
+        assert_eq!(p.node[0], root);
+        assert_eq!(p.entry[0], 0);
+        assert_eq!(p.node[1], leaf1);
+        assert_eq!(p.entry[1], 1);
+
+        assert_eq!(p.find(20, root, &pool, &comp), Some('b'));
+        assert_eq!(p.size, 2);
+        assert_eq!(p.node[0], root);
+        assert_eq!(p.entry[0], 1);
+        assert_eq!(p.node[1], leaf2);
+        assert_eq!(p.entry[1], 0);
+
+        assert_eq!(p.find(25, root, &pool, &comp), None);
+        assert_eq!(p.size, 2);
+        assert_eq!(p.node[0], root);
+        assert_eq!(p.entry[0], 1);
+        assert_eq!(p.node[1], leaf2);
+        assert_eq!(p.entry[1], 1);
+    }
+}
--- a/lib/codegen/src/bforest/pool.rs
+++ b/lib/codegen/src/bforest/pool.rs
@@ -0,0 +1,213 @@
+//! B+-tree node pool.
+
+use super::{Forest, Node, NodeData};
+use entity::PrimaryMap;
+use std::ops::{Index, IndexMut};
+
+/// A pool of nodes, including a free list.
+pub(super) struct NodePool<F: Forest> {
+    nodes: PrimaryMap<Node, NodeData<F>>,
+    freelist: Option<Node>,
+}
+
+impl<F: Forest> NodePool<F> {
+    /// Allocate a new empty pool of nodes.
+    pub fn new() -> NodePool<F> {
+        NodePool {
+            nodes: PrimaryMap::new(),
+            freelist: None,
+        }
+    }
+
+    /// Free all nodes.
+    pub fn clear(&mut self) {
+        self.nodes.clear();
+        self.freelist = None;
+    }
+
+    /// Allocate a new node containing `data`.
+    pub fn alloc_node(&mut self, data: NodeData<F>) -> Node {
+        debug_assert!(!data.is_free(), "can't allocate free node");
+        match self.freelist {
+            Some(node) => {
+                // Remove this node from the free list.
+                match self.nodes[node] {
+                    NodeData::Free { next } => self.freelist = next,
+                    _ => panic!("Invalid {} on free list", node),
+                }
+                self.nodes[node] = data;
+                node
+            }
+            None => {
+                // The free list is empty. Allocate a new node.
+                self.nodes.push(data)
+            }
+        }
+    }
+
+    /// Free a node.
+    pub fn free_node(&mut self, node: Node) {
+        // Quick check for a double free.
+        debug_assert!(!self.nodes[node].is_free(), "{} is already free", node);
+        self.nodes[node] = NodeData::Free { next: self.freelist };
+        self.freelist = Some(node);
+    }
+
+    /// Free the entire tree rooted at `node`.
+    pub fn free_tree(&mut self, node: Node) {
+        if let NodeData::Inner { size, tree, .. } = self[node] {
+            // Note that we have to capture `tree` by value to avoid borrow checker trouble.
+            #[cfg_attr(feature = "cargo-clippy", allow(needless_range_loop))]
+            for i in 0..usize::from(size + 1) {
+                // Recursively free sub-trees. This recursion can never be deeper than `MAX_PATH`,
+                // and since most trees have less than a handful of nodes, it is worthwhile to
+                // avoid the heap allocation for an iterative tree traversal.
+                self.free_tree(tree[i]);
+            }
+        }
+        self.free_node(node);
+    }
+}
+
+#[cfg(test)]
+impl<F: Forest> NodePool<F> {
+    /// Verify the consistency of the tree rooted at `node`.
+    pub fn verify_tree(&self, node: Node, comp: &F::Comparator)
+    where
+        NodeData<F>: ::std::fmt::Display,
+        F::Key: ::std::fmt::Display,
+    {
+        use super::Comparator;
+        use entity::SparseSet;
+        use std::borrow::Borrow;
+        use std::cmp::Ordering;
+        use std::vec::Vec;
+
+        // The root node can't be an inner node with just a single sub-tree. It should have been
+        // pruned.
+        if let &NodeData::Inner { size, .. } = &self[node] {
+            assert!(size > 0, "Root must have more than one sub-tree");
+        }
+
+        let mut done = SparseSet::new();
+        let mut todo = Vec::new();
+
+        // Todo-list entries are:
+        // 1. Optional LHS key which must be <= all node entries.
+        // 2. The node reference.
+        // 3. Optional RHS key which must be > all node entries.
+        todo.push((None, node, None));
+
+        while let Some((lkey, node, rkey)) = todo.pop() {
+            assert_eq!(
+                done.insert(node),
+                None,
+                "Node appears more than once in tree"
+            );
+            let mut lower = lkey;
+
+            match self[node] {
+                NodeData::Inner { size, keys, tree } => {
+                    let size = size as usize;
+                    let capacity = tree.len();
+                    let keys = &keys[0..size];
+
+                    // Verify occupancy.
+                    // Right-most nodes can be small, but others must be at least half full.
+                    assert!(
+                        rkey.is_none() || (size + 1) * 2 >= capacity,
+                        "Only {}/{} entries in {}:{}, upper={}",
+                        size + 1,
+                        capacity,
+                        node,
+                        self[node],
+                        rkey.unwrap()
+                    );
+
+                    // Queue up the sub-trees, checking for duplicates.
+                    for i in 0..size + 1 {
+                        // Get an upper bound for node[i].
+                        let upper = keys.get(i).cloned().or(rkey);
+
+                        // Check that keys are strictly monotonic.
+                        if let (Some(a), Some(b)) = (lower, upper) {
+                            assert_eq!(
+                                comp.cmp(a, b),
+                                Ordering::Less,
+                                "Key order {} < {} failed in {}: {}",
+                                a,
+                                b,
+                                node,
+                                self[node]
+                            );
+                        }
+
+                        // Queue up the sub-tree.
+                        todo.push((lower, tree[i], upper));
+
+                        // Set a lower bound for the next tree.
+                        lower = upper;
+                    }
+                }
+                NodeData::Leaf { size, keys, .. } => {
+                    let size = size as usize;
+                    let capacity = keys.borrow().len();
+                    let keys = &keys.borrow()[0..size];
+
+                    // Verify occupancy.
+                    // Right-most nodes can be small, but others must be at least half full.
+                    assert!(size > 0, "Leaf {} is empty", node);
+                    assert!(
+                        rkey.is_none() || size * 2 >= capacity,
+                        "Only {}/{} entries in {}:{}, upper={}",
+                        size,
+                        capacity,
+                        node,
+                        self[node],
+                        rkey.unwrap()
+                    );
+
+                    for i in 0..size + 1 {
+                        let upper = keys.get(i).cloned().or(rkey);
+
+                        // Check that keys are strictly monotonic.
+                        if let (Some(a), Some(b)) = (lower, upper) {
+                            let wanted = if i == 0 {
+                                Ordering::Equal
+                            } else {
+                                Ordering::Less
+                            };
+                            assert_eq!(
+                                comp.cmp(a, b),
+                                wanted,
+                                "Key order for {} - {} failed in {}: {}",
+                                a,
+                                b,
+                                node,
+                                self[node]
+                            );
+                        }
+
+                        // Set a lower bound for the next key.
+                        lower = upper;
+                    }
+                }
+                NodeData::Free { .. } => panic!("Free {} reached", node),
+            }
+        }
+    }
+}
+
+impl<F: Forest> Index<Node> for NodePool<F> {
+    type Output = NodeData<F>;
+
+    fn index(&self, index: Node) -> &Self::Output {
+        self.nodes.index(index)
+    }
+}
+
+impl<F: Forest> IndexMut<Node> for NodePool<F> {
+    fn index_mut(&mut self, index: Node) -> &mut Self::Output {
+        self.nodes.index_mut(index)
+    }
+}
--- a/lib/codegen/src/bforest/set.rs
+++ b/lib/codegen/src/bforest/set.rs
@@ -0,0 +1,594 @@
+//! Forest of sets.
+
+use super::{Comparator, Forest, Node, NodeData, NodePool, Path, SetValue, INNER_SIZE};
+use packed_option::PackedOption;
+use std::marker::PhantomData;
+
+/// Tag type defining forest types for a set.
+struct SetTypes<K, C>(PhantomData<(K, C)>);
+
+impl<K, C> Forest for SetTypes<K, C>
+where
+    K: Copy,
+    C: Comparator<K>,
+{
+    type Key = K;
+    type Value = SetValue;
+    type LeafKeys = [K; 2 * INNER_SIZE - 1];
+    type LeafValues = [SetValue; 2 * INNER_SIZE - 1];
+    type Comparator = C;
+
+    fn splat_key(key: Self::Key) -> Self::LeafKeys {
+        [key; 2 * INNER_SIZE - 1]
+    }
+
+    fn splat_value(value: Self::Value) -> Self::LeafValues {
+        [value; 2 * INNER_SIZE - 1]
+    }
+}
+
+/// Memory pool for a forest of `Set` instances.
+pub struct SetForest<K, C>
+where
+    K: Copy,
+    C: Comparator<K>,
+{
+    nodes: NodePool<SetTypes<K, C>>,
+}
+
+impl<K, C> SetForest<K, C>
+where
+    K: Copy,
+    C: Comparator<K>,
+{
+    /// Create a new empty forest.
+    pub fn new() -> SetForest<K, C> {
+        SetForest { nodes: NodePool::new() }
+    }
+
+    /// Clear all sets in the forest.
+    ///
+    /// All `Set` instances belong to this forest are invalidated and should no longer be used.
+    pub fn clear(&mut self) {
+        self.nodes.clear();
+    }
+}
+
+/// B-tree representing an ordered set of `K`s using `C` for comparing elements.
+///
+/// This is not a general-purpose replacement for `BTreeSet`. See the [module
+/// documentation](index.html) for more information about design tradeoffs.
+///
+/// Sets can be cloned, but that operation should only be used as part of cloning the whole forest
+/// they belong to. *Cloning a set does not allocate new memory for the clone*. It creates an alias
+/// of the same memory.
+#[derive(Clone)]
+pub struct Set<K, C>
+where
+    K: Copy,
+    C: Comparator<K>,
+{
+    root: PackedOption<Node>,
+    unused: PhantomData<(K, C)>,
+}
+
+impl<K, C> Set<K, C>
+where
+    K: Copy,
+    C: Comparator<K>,
+{
+    /// Make an empty set.
+    pub fn new() -> Set<K, C> {
+        Set {
+            root: None.into(),
+            unused: PhantomData,
+        }
+    }
+
+    /// Is this an empty set?
+    pub fn is_empty(&self) -> bool {
+        self.root.is_none()
+    }
+
+    /// Does the set contain `key`?.
+    pub fn contains(&self, key: K, forest: &SetForest<K, C>, comp: &C) -> bool {
+        self.root
+            .expand()
+            .and_then(|root| Path::default().find(key, root, &forest.nodes, comp))
+            .is_some()
+    }
+
+    /// Try to insert `key` into the set.
+    ///
+    /// If the set did not contain `key`, insert it and return true.
+    ///
+    /// If `key` is already present, don't change the set and return false.
+    pub fn insert(&mut self, key: K, forest: &mut SetForest<K, C>, comp: &C) -> bool {
+        self.cursor(forest, comp).insert(key)
+    }
+
+    /// Remove `key` from the set and return true.
+    ///
+    /// If `key` was not present in the set, return false.
+    pub fn remove(&mut self, key: K, forest: &mut SetForest<K, C>, comp: &C) -> bool {
+        let mut c = self.cursor(forest, comp);
+        if c.goto(key) {
+            c.remove();
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Remove all entries.
+    pub fn clear(&mut self, forest: &mut SetForest<K, C>) {
+        if let Some(root) = self.root.take() {
+            forest.nodes.free_tree(root);
+        }
+    }
+
+    /// Retains only the elements specified by the predicate.
+    ///
+    /// Remove all elements where the predicate returns false.
+    pub fn retain<F>(&mut self, forest: &mut SetForest<K, C>, mut predicate: F)
+    where
+        F: FnMut(K) -> bool,
+    {
+        let mut path = Path::default();
+        if let Some(root) = self.root.expand() {
+            path.first(root, &forest.nodes);
+        }
+        while let Some((node, entry)) = path.leaf_pos() {
+            if predicate(forest.nodes[node].unwrap_leaf().0[entry]) {
+                path.next(&forest.nodes);
+            } else {
+                self.root = path.remove(&mut forest.nodes).into();
+            }
+        }
+    }
+
+    /// Create a cursor for navigating this set. The cursor is initially positioned off the end of
+    /// the set.
+    pub fn cursor<'a>(
+        &'a mut self,
+        forest: &'a mut SetForest<K, C>,
+        comp: &'a C,
+    ) -> SetCursor<'a, K, C> {
+        SetCursor::new(self, forest, comp)
+    }
+
+    /// Create an iterator traversing this set. The iterator type is `K`.
+    pub fn iter<'a>(&'a self, forest: &'a SetForest<K, C>) -> SetIter<'a, K, C> {
+        SetIter {
+            root: self.root,
+            pool: &forest.nodes,
+            path: Path::default(),
+        }
+    }
+}
+
+impl<K, C> Default for Set<K, C>
+where
+    K: Copy,
+    C: Comparator<K>,
+{
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// A position in a `Set` used to navigate and modify the ordered set.
+///
+/// A cursor always points at an element in the set, or "off the end" which is a position after the
+/// last element in the set.
+pub struct SetCursor<'a, K, C>
+where
+    K: 'a + Copy,
+    C: 'a + Comparator<K>,
+{
+    root: &'a mut PackedOption<Node>,
+    pool: &'a mut NodePool<SetTypes<K, C>>,
+    comp: &'a C,
+    path: Path<SetTypes<K, C>>,
+}
+
+impl<'a, K, C> SetCursor<'a, K, C>
+where
+    K: Copy,
+    C: Comparator<K>,
+{
+    /// Create a cursor with a default (invalid) location.
+    fn new(
+        container: &'a mut Set<K, C>,
+        forest: &'a mut SetForest<K, C>,
+        comp: &'a C,
+    ) -> SetCursor<'a, K, C> {
+        SetCursor {
+            root: &mut container.root,
+            pool: &mut forest.nodes,
+            comp,
+            path: Path::default(),
+        }
+    }
+
+    /// Is this cursor pointing to an empty set?
+    pub fn is_empty(&self) -> bool {
+        self.root.is_none()
+    }
+
+    /// Move cursor to the next element and return it.
+    ///
+    /// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end
+    /// position.
+    pub fn next(&mut self) -> Option<K> {
+        self.path.next(self.pool).map(|(k, _)| k)
+    }
+
+    /// Move cursor to the previous element and return it.
+    ///
+    /// If the cursor is already pointing at the first element, leave it there and return `None`.
+    pub fn prev(&mut self) -> Option<K> {
+        self.root.expand().and_then(|root| {
+            self.path.prev(root, self.pool).map(|(k, _)| k)
+        })
+    }
+
+    /// Get the current element, or `None` if the cursor is at the end.
+    pub fn elem(&self) -> Option<K> {
+        self.path.leaf_pos().and_then(|(node, entry)| {
+            self.pool[node].unwrap_leaf().0.get(entry).cloned()
+        })
+    }
+
+    /// Move this cursor to `elem`.
+    ///
+    /// If `elem` is in the set, place the cursor at `elem` and return true.
+    ///
+    /// If `elem` is not in the set, place the cursor at the next larger element (or the end) and
+    /// return false.
+    pub fn goto(&mut self, elem: K) -> bool {
+        match self.root.expand() {
+            None => false,
+            Some(root) => {
+                if self.path.find(elem, root, self.pool, self.comp).is_some() {
+                    true
+                } else {
+                    self.path.normalize(self.pool);
+                    false
+                }
+            }
+        }
+    }
+
+    /// Move this cursor to the first element.
+    pub fn goto_first(&mut self) -> Option<K> {
+        self.root.map(|root| self.path.first(root, self.pool).0)
+    }
+
+    /// Try to insert `elem` into the set and leave the cursor at the inserted element.
+    ///
+    /// If the set did not contain `elem`, insert it and return true.
+    ///
+    /// If `elem` is already present, don't change the set, place the cursor at `goto(elem)`, and
+    /// return false.
+    pub fn insert(&mut self, elem: K) -> bool {
+        match self.root.expand() {
+            None => {
+                let root = self.pool.alloc_node(NodeData::leaf(elem, SetValue()));
+                *self.root = root.into();
+                self.path.set_root_node(root);
+                true
+            }
+            Some(root) => {
+                // TODO: Optimize the case where `self.path` is already at the correct insert pos.
+                if self.path.find(elem, root, self.pool, self.comp).is_none() {
+                    *self.root = self.path.insert(elem, SetValue(), self.pool).into();
+                    true
+                } else {
+                    false
+                }
+            }
+        }
+    }
+
+    /// Remove the current element (if any) and return it.
+    /// This advances the cursor to the next element after the removed one.
+    pub fn remove(&mut self) -> Option<K> {
+        let elem = self.elem();
+        if elem.is_some() {
+            *self.root = self.path.remove(self.pool).into();
+        }
+        elem
+    }
+}
+
+#[cfg(test)]
+impl<'a, K, C> SetCursor<'a, K, C>
+where
+    K: Copy + ::std::fmt::Display,
+    C: Comparator<K>,
+{
+    fn verify(&self) {
+        self.path.verify(self.pool);
+        self.root.map(|root| self.pool.verify_tree(root, self.comp));
+    }
+
+    /// Get a text version of the path to the current position.
+    fn tpath(&self) -> ::std::string::String {
+        use std::string::ToString;
+        self.path.to_string()
+    }
+}
+
+/// An iterator visiting the elements of a `Set`.
+pub struct SetIter<'a, K, C>
+where
+    K: 'a + Copy,
+    C: 'a + Comparator<K>,
+{
+    root: PackedOption<Node>,
+    pool: &'a NodePool<SetTypes<K, C>>,
+    path: Path<SetTypes<K, C>>,
+}
+
+impl<'a, K, C> Iterator for SetIter<'a, K, C>
+where
+    K: 'a + Copy,
+    C: 'a + Comparator<K>,
+{
+    type Item = K;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // We use `self.root` to indicate if we need to go to the first element. Reset to `None`
+        // once we've returned the first element. This also works for an empty tree since the
+        // `path.next()` call returns `None` when the path is empty. This also fuses the iterator.
+        match self.root.take() {
+            Some(root) => Some(self.path.first(root, self.pool).0),
+            None => self.path.next(self.pool).map(|(k, _)| k),
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::super::NodeData;
+    use super::*;
+    use std::mem;
+    use std::vec::Vec;
+
+    #[test]
+    fn node_size() {
+        // check that nodes are cache line sized when keys are 32 bits.
+        type F = SetTypes<u32, ()>;
+        assert_eq!(mem::size_of::<NodeData<F>>(), 64);
+    }
+
+    #[test]
+    fn empty() {
+        let mut f = SetForest::<u32, ()>::new();
+        f.clear();
+
+        let mut s = Set::<u32, ()>::new();
+        assert!(s.is_empty());
+        s.clear(&mut f);
+        assert!(!s.contains(7, &f, &()));
+
+        // Iterator for an empty set.
+        assert_eq!(s.iter(&f).next(), None);
+
+        s.retain(&mut f, |_| unreachable!());
+
+        let mut c = SetCursor::new(&mut s, &mut f, &());
+        c.verify();
+        assert_eq!(c.elem(), None);
+
+        assert_eq!(c.goto_first(), None);
+        assert_eq!(c.tpath(), "<empty path>");
+    }
+
+    #[test]
+    fn simple_cursor() {
+        let mut f = SetForest::<u32, ()>::new();
+        let mut s = Set::<u32, ()>::new();
+        let mut c = SetCursor::new(&mut s, &mut f, &());
+
+        assert!(c.insert(50));
+        c.verify();
+        assert_eq!(c.elem(), Some(50));
+
+        assert!(c.insert(100));
+        c.verify();
+        assert_eq!(c.elem(), Some(100));
+
+        assert!(c.insert(10));
+        c.verify();
+        assert_eq!(c.elem(), Some(10));
+
+        // Basic movement.
+        assert_eq!(c.next(), Some(50));
+        assert_eq!(c.next(), Some(100));
+        assert_eq!(c.next(), None);
+        assert_eq!(c.next(), None);
+        assert_eq!(c.prev(), Some(100));
+        assert_eq!(c.prev(), Some(50));
+        assert_eq!(c.prev(), Some(10));
+        assert_eq!(c.prev(), None);
+        assert_eq!(c.prev(), None);
+
+        assert!(c.goto(50));
+        assert_eq!(c.elem(), Some(50));
+        assert_eq!(c.remove(), Some(50));
+        c.verify();
+
+        assert_eq!(c.elem(), Some(100));
+        assert_eq!(c.remove(), Some(100));
+        c.verify();
+        assert_eq!(c.elem(), None);
+        assert_eq!(c.remove(), None);
+        c.verify();
+    }
+
+    #[test]
+    fn two_level_sparse_tree() {
+        let mut f = SetForest::<u32, ()>::new();
+        let mut s = Set::<u32, ()>::new();
+        let mut c = SetCursor::new(&mut s, &mut f, &());
+
+        // Insert enough elements that we get a two-level tree.
+        // Each leaf node holds 8 elements
+        assert!(c.is_empty());
+        for i in 0..50 {
+            assert!(c.insert(i));
+            assert_eq!(c.elem(), Some(i));
+        }
+        assert!(!c.is_empty());
+
+        assert_eq!(c.goto_first(), Some(0));
+        assert_eq!(c.tpath(), "node2[0]--node0[0]");
+
+        assert_eq!(c.prev(), None);
+        for i in 1..50 {
+            assert_eq!(c.next(), Some(i));
+        }
+        assert_eq!(c.next(), None);
+        for i in (0..50).rev() {
+            assert_eq!(c.prev(), Some(i));
+        }
+        assert_eq!(c.prev(), None);
+
+        assert!(c.goto(25));
+        for i in 25..50 {
+            assert_eq!(c.remove(), Some(i));
+            assert!(!c.is_empty());
+            c.verify();
+        }
+
+        for i in (0..25).rev() {
+            assert!(!c.is_empty());
+            assert_eq!(c.elem(), None);
+            assert_eq!(c.prev(), Some(i));
+            assert_eq!(c.remove(), Some(i));
+            c.verify();
+        }
+        assert_eq!(c.elem(), None);
+        assert!(c.is_empty());
+    }
+
+    #[test]
+    fn three_level_sparse_tree() {
+        let mut f = SetForest::<u32, ()>::new();
+        let mut s = Set::<u32, ()>::new();
+        let mut c = SetCursor::new(&mut s, &mut f, &());
+
+        // Insert enough elements that we get a 3-level tree.
+        // Each leaf node holds 8 elements when filled up sequentially.
+        // Inner nodes hold 8 node pointers.
+        assert!(c.is_empty());
+        for i in 0..150 {
+            assert!(c.insert(i));
+            assert_eq!(c.elem(), Some(i));
+        }
+        assert!(!c.is_empty());
+
+        assert!(c.goto(0));
+        assert_eq!(c.tpath(), "node11[0]--node2[0]--node0[0]");
+
+        assert_eq!(c.prev(), None);
+        for i in 1..150 {
+            assert_eq!(c.next(), Some(i));
+        }
+        assert_eq!(c.next(), None);
+        for i in (0..150).rev() {
+            assert_eq!(c.prev(), Some(i));
+        }
+        assert_eq!(c.prev(), None);
+
+        assert!(c.goto(125));
+        for i in 125..150 {
+            assert_eq!(c.remove(), Some(i));
+            assert!(!c.is_empty());
+            c.verify();
+        }
+
+        for i in (0..125).rev() {
+            assert!(!c.is_empty());
+            assert_eq!(c.elem(), None);
+            assert_eq!(c.prev(), Some(i));
+            assert_eq!(c.remove(), Some(i));
+            c.verify();
+        }
+        assert_eq!(c.elem(), None);
+        assert!(c.is_empty());
+    }
+
+    // Generate a densely populated 4-level tree.
+    //
+    // Level 1: 1 root
+    // Level 2: 8 inner
+    // Level 3: 64 inner
+    // Level 4: 512 leafs, up to 7680 elements
+    //
+    // A 3-level tree can hold at most 960 elements.
+    fn dense4l(f: &mut SetForest<i32, ()>) -> Set<i32, ()> {
+        f.clear();
+        let mut s = Set::new();
+
+        // Insert 400 elements in 7 passes over the range to avoid the half-full leaf node pattern
+        // that comes from sequential insertion. This will generate a normal leaf layer.
+        for n in 0..4000 {
+            assert!(s.insert((n * 7) % 4000, f, &()));
+        }
+        s
+    }
+
+    #[test]
+    fn four_level() {
+        let mut f = SetForest::<i32, ()>::new();
+        let mut s = dense4l(&mut f);
+
+        assert_eq!(
+            s.iter(&f).collect::<Vec<_>>()[0..10],
+            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+        );
+
+        let mut c = s.cursor(&mut f, &());
+
+        c.verify();
+
+        // Peel off a whole sub-tree of the root by deleting from the front.
+        // The 900 element is near the front of the second sub-tree.
+        assert!(c.goto(900));
+        assert_eq!(c.tpath(), "node48[1]--node47[0]--node26[0]--node20[4]");
+        assert!(c.goto(0));
+        for i in 0..900 {
+            assert!(!c.is_empty());
+            assert_eq!(c.remove(), Some(i));
+        }
+        c.verify();
+        assert_eq!(c.elem(), Some(900));
+
+        // Delete backwards from somewhere in the middle.
+        assert!(c.goto(3000));
+        for i in (2000..3000).rev() {
+            assert_eq!(c.prev(), Some(i));
+            assert_eq!(c.remove(), Some(i));
+            assert_eq!(c.elem(), Some(3000));
+        }
+        c.verify();
+
+        // Remove everything in a scattered manner, triggering many collapsing patterns.
+        for i in 0..4000 {
+            if c.goto((i * 7) % 4000) {
+                c.remove();
+            }
+        }
+        assert!(c.is_empty());
+    }
+
+    #[test]
+    fn four_level_clear() {
+        let mut f = SetForest::<i32, ()>::new();
+        let mut s = dense4l(&mut f);
+        s.clear(&mut f);
+    }
+}
--- a/lib/codegen/src/binemit/memorysink.rs
+++ b/lib/codegen/src/binemit/memorysink.rs
@@ -0,0 +1,133 @@
+//! Code sink that writes binary machine code into contiguous memory.
+//!
+//! The `CodeSink` trait is the most general way of extracting binary machine code from Cretonne,
+//! and it is implemented by things like the `test binemit` file test driver to generate
+//! hexadecimal machine code. The `CodeSink` has some undesirable performance properties because of
+//! the dual abstraction: `TargetIsa` is a trait object implemented by each supported ISA, so it
+//! can't have any generic functions that could be specialized for each `CodeSink` implementation.
+//! This results in many virtual function callbacks (one per `put*` call) when
+//! `TargetIsa::emit_inst()` is used.
+//!
+//! The `MemoryCodeSink` type fixes the performance problem because it is a type known to
+//! `TargetIsa` so it can specialize its machine code generation for the type. The trade-off is
+//! that a `MemoryCodeSink` will always write binary machine code to raw memory. It forwards any
+//! relocations to a `RelocSink` trait object. Relocations are less frequent than the
+//! `CodeSink::put*` methods, so the performance impact of the virtual callbacks is less severe.
+
+use super::{Addend, CodeOffset, CodeSink, Reloc};
+use ir::{ExternalName, JumpTable, SourceLoc, TrapCode};
+use std::ptr::write_unaligned;
+
+/// A `CodeSink` that writes binary machine code directly into memory.
+///
+/// A `MemoryCodeSink` object should be used when emitting a Cretonne IR function into executable
+/// memory. It writes machine code directly to a raw pointer without any bounds checking, so make
+/// sure to allocate enough memory for the whole function. The number of bytes required is returned
+/// by the `Context::compile()` function.
+///
+/// Any relocations in the function are forwarded to the `RelocSink` trait object.
+///
+/// Note that `MemoryCodeSink` writes multi-byte values in the native byte order of the host. This
+/// is not the right thing to do for cross compilation.
+pub struct MemoryCodeSink<'a> {
+    data: *mut u8,
+    offset: isize,
+    relocs: &'a mut RelocSink,
+    traps: &'a mut TrapSink,
+}
+
+impl<'a> MemoryCodeSink<'a> {
+    /// Create a new memory code sink that writes a function to the memory pointed to by `data`.
+    pub fn new<'sink>(
+        data: *mut u8,
+        relocs: &'sink mut RelocSink,
+        traps: &'sink mut TrapSink,
+    ) -> MemoryCodeSink<'sink> {
+        MemoryCodeSink {
+            data,
+            offset: 0,
+            relocs,
+            traps,
+        }
+    }
+}
+
+/// A trait for receiving relocations for code that is emitted directly into memory.
+pub trait RelocSink {
+    /// Add a relocation referencing an EBB at the current offset.
+    fn reloc_ebb(&mut self, CodeOffset, Reloc, CodeOffset);
+
+    /// Add a relocation referencing an external symbol at the current offset.
+    fn reloc_external(&mut self, CodeOffset, Reloc, &ExternalName, Addend);
+
+    /// Add a relocation referencing a jump table.
+    fn reloc_jt(&mut self, CodeOffset, Reloc, JumpTable);
+}
+
+/// A trait for receiving trap codes and offsets.
+pub trait TrapSink {
+    /// Add trap information for a specific offset.
+    fn trap(&mut self, CodeOffset, SourceLoc, TrapCode);
+}
+
+impl<'a> CodeSink for MemoryCodeSink<'a> {
+    fn offset(&self) -> CodeOffset {
+        self.offset as CodeOffset
+    }
+
+    fn put1(&mut self, x: u8) {
+        unsafe {
+            write_unaligned(self.data.offset(self.offset), x);
+        }
+        self.offset += 1;
+    }
+
+    fn put2(&mut self, x: u16) {
+        unsafe {
+            write_unaligned(self.data.offset(self.offset) as *mut u16, x);
+        }
+        self.offset += 2;
+    }
+
+    fn put4(&mut self, x: u32) {
+        unsafe {
+            write_unaligned(self.data.offset(self.offset) as *mut u32, x);
+        }
+        self.offset += 4;
+    }
+
+    fn put8(&mut self, x: u64) {
+        unsafe {
+            write_unaligned(self.data.offset(self.offset) as *mut u64, x);
+        }
+        self.offset += 8;
+    }
+
+    fn reloc_ebb(&mut self, rel: Reloc, ebb_offset: CodeOffset) {
+        let ofs = self.offset();
+        self.relocs.reloc_ebb(ofs, rel, ebb_offset);
+    }
+
+    fn reloc_external(&mut self, rel: Reloc, name: &ExternalName, addend: Addend) {
+        let ofs = self.offset();
+        self.relocs.reloc_external(ofs, rel, name, addend);
+    }
+
+    fn reloc_jt(&mut self, rel: Reloc, jt: JumpTable) {
+        let ofs = self.offset();
+        self.relocs.reloc_jt(ofs, rel, jt);
+    }
+
+    fn trap(&mut self, code: TrapCode, srcloc: SourceLoc) {
+        let ofs = self.offset();
+        self.traps.trap(ofs, srcloc, code);
+    }
+}
+
+/// A `TrapSink` implementation that does nothing, which is convenient when
+/// compiling code that does not rely on trapping semantics.
+pub struct NullTrapSink {}
+
+impl TrapSink for NullTrapSink {
+    fn trap(&mut self, _offset: CodeOffset, _srcloc: SourceLoc, _code: TrapCode) {}
+}
--- a/lib/codegen/src/binemit/mod.rs
+++ b/lib/codegen/src/binemit/mod.rs
@@ -0,0 +1,121 @@
+//! Binary machine code emission.
+//!
+//! The `binemit` module contains code for translating Cretonne's intermediate representation into
+//! binary machine code.
+
+mod memorysink;
+mod relaxation;
+
+pub use self::memorysink::{MemoryCodeSink, RelocSink, TrapSink, NullTrapSink};
+pub use self::relaxation::relax_branches;
+pub use regalloc::RegDiversions;
+
+use ir::{ExternalName, Function, Inst, JumpTable, SourceLoc, TrapCode};
+use std::fmt;
+
+/// Offset in bytes from the beginning of the function.
+///
+/// Cretonne can be used as a cross compiler, so we don't want to use a type like `usize` which
+/// depends on the *host* platform, not the *target* platform.
+pub type CodeOffset = u32;
+
+/// Addend to add to the symbol value.
+pub type Addend = i64;
+
+/// Relocation kinds for every ISA
+#[derive(Copy, Clone, Debug)]
+pub enum Reloc {
+    /// absolute 4-byte
+    Abs4,
+    /// absolute 8-byte
+    Abs8,
+    /// x86 PC-relative 4-byte
+    X86PCRel4,
+    /// x86 GOT PC-relative 4-byte
+    X86GOTPCRel4,
+    /// x86 PLT-relative 4-byte
+    X86PLTRel4,
+    /// Arm32 call target
+    Arm32Call,
+    /// Arm64 call target
+    Arm64Call,
+    /// RISC-V call target
+    RiscvCall,
+}
+
+impl fmt::Display for Reloc {
+    /// Display trait implementation drops the arch, since its used in contexts where the arch is
+    /// already unambigious, e.g. cton syntax with isa specified. In other contexts, use Debug.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Reloc::Abs4 => write!(f, "{}", "Abs4"),
+            Reloc::Abs8 => write!(f, "{}", "Abs8"),
+            Reloc::X86PCRel4 => write!(f, "{}", "PCRel4"),
+            Reloc::X86GOTPCRel4 => write!(f, "{}", "GOTPCRel4"),
+            Reloc::X86PLTRel4 => write!(f, "{}", "PLTRel4"),
+            Reloc::Arm32Call | Reloc::Arm64Call | Reloc::RiscvCall => write!(f, "{}", "Call"),
+        }
+    }
+}
+
+/// Abstract interface for adding bytes to the code segment.
+///
+/// A `CodeSink` will receive all of the machine code for a function. It also accepts relocations
+/// which are locations in the code section that need to be fixed up when linking.
+pub trait CodeSink {
+    /// Get the current position.
+    fn offset(&self) -> CodeOffset;
+
+    /// Add 1 byte to the code section.
+    fn put1(&mut self, u8);
+
+    /// Add 2 bytes to the code section.
+    fn put2(&mut self, u16);
+
+    /// Add 4 bytes to the code section.
+    fn put4(&mut self, u32);
+
+    /// Add 8 bytes to the code section.
+    fn put8(&mut self, u64);
+
+    /// Add a relocation referencing an EBB at the current offset.
+    fn reloc_ebb(&mut self, Reloc, CodeOffset);
+
+    /// Add a relocation referencing an external symbol plus the addend at the current offset.
+    fn reloc_external(&mut self, Reloc, &ExternalName, Addend);
+
+    /// Add a relocation referencing a jump table.
+    fn reloc_jt(&mut self, Reloc, JumpTable);
+
+    /// Add trap information for the current offset.
+    fn trap(&mut self, TrapCode, SourceLoc);
+}
+
+/// Report a bad encoding error.
+#[cold]
+pub fn bad_encoding(func: &Function, inst: Inst) -> ! {
+    panic!(
+        "Bad encoding {} for {}",
+        func.encodings[inst],
+        func.dfg.display_inst(inst, None)
+    );
+}
+
+/// Emit a function to `sink`, given an instruction emitter function.
+///
+/// This function is called from the `TargetIsa::emit_function()` implementations with the
+/// appropriate instruction emitter.
+pub fn emit_function<CS, EI>(func: &Function, emit_inst: EI, sink: &mut CS)
+where
+    CS: CodeSink,
+    EI: Fn(&Function, Inst, &mut RegDiversions, &mut CS),
+{
+    let mut divert = RegDiversions::new();
+    for ebb in func.layout.ebbs() {
+        divert.clear();
+        debug_assert_eq!(func.offsets[ebb], sink.offset());
+        for inst in func.layout.ebb_insts(ebb) {
+            emit_inst(func, inst, &mut divert, sink);
+        }
+    }
+}
--- a/lib/codegen/src/binemit/relaxation.rs
+++ b/lib/codegen/src/binemit/relaxation.rs
@@ -0,0 +1,198 @@
+//! Branch relaxation and offset computation.
+//!
+//! # EBB header offsets
+//!
+//! Before we can generate binary machine code for branch instructions, we need to know the final
+//! offsets of all the EBB headers in the function. This information is encoded in the
+//! `func.offsets` table.
+//!
+//! # Branch relaxation
+//!
+//! Branch relaxation is the process of ensuring that all branches in the function have enough
+//! range to encode their destination. It is common to have multiple branch encodings in an ISA.
+//! For example, x86 branches can have either an 8-bit or a 32-bit displacement.
+//!
+//! On RISC architectures, it can happen that conditional branches have a shorter range than
+//! unconditional branches:
+//!
+//! ```cton
+//!     brz v1, ebb17
+//! ```
+//!
+//! can be transformed into:
+//!
+//! ```cton
+//!     brnz v1, ebb23
+//!     jump ebb17
+//! ebb23:
+//! ```
+
+use binemit::CodeOffset;
+use cursor::{Cursor, FuncCursor};
+use ir::{Function, InstructionData, Opcode};
+use isa::{EncInfo, TargetIsa};
+use iterators::IteratorExtras;
+use result::CtonError;
+
+/// Relax branches and compute the final layout of EBB headers in `func`.
+///
+/// Fill in the `func.offsets` table so the function is ready for binary emission.
+pub fn relax_branches(func: &mut Function, isa: &TargetIsa) -> Result<CodeOffset, CtonError> {
+    let encinfo = isa.encoding_info();
+
+    // Clear all offsets so we can recognize EBBs that haven't been visited yet.
+    func.offsets.clear();
+    func.offsets.resize(func.dfg.num_ebbs());
+
+    // Start by inserting fall through instructions.
+    fallthroughs(func);
+
+    let mut offset = 0;
+
+    // The relaxation algorithm iterates to convergence.
+    let mut go_again = true;
+    while go_again {
+        go_again = false;
+        offset = 0;
+
+        // Visit all instructions in layout order
+        let mut cur = FuncCursor::new(func);
+        while let Some(ebb) = cur.next_ebb() {
+            // Record the offset for `ebb` and make sure we iterate until offsets are stable.
+            if cur.func.offsets[ebb] != offset {
+                debug_assert!(
+                    cur.func.offsets[ebb] < offset,
+                    "Code shrinking during relaxation"
+                );
+                cur.func.offsets[ebb] = offset;
+                go_again = true;
+            }
+
+            while let Some(inst) = cur.next_inst() {
+                let enc = cur.func.encodings[inst];
+                let size = encinfo.bytes(enc);
+
+                // See if this might be a branch that is out of range.
+                if let Some(range) = encinfo.branch_range(enc) {
+                    if let Some(dest) = cur.func.dfg[inst].branch_destination() {
+                        let dest_offset = cur.func.offsets[dest];
+                        // This could be an out-of-range branch.
+                        // Relax it unless the destination offset has not been computed yet.
+                        if !range.contains(offset, dest_offset) &&
+                            (dest_offset != 0 || Some(dest) == cur.func.layout.entry_block())
+                        {
+                            offset += relax_branch(&mut cur, offset, dest_offset, &encinfo, isa);
+                            continue;
+                        }
+                    }
+                }
+
+                offset += size;
+            }
+        }
+    }
+
+    Ok(offset)
+}
+
+/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any
+/// existing `fallthrough` instructions are correct.
+fn fallthroughs(func: &mut Function) {
+    for (ebb, succ) in func.layout.ebbs().adjacent_pairs() {
+        let term = func.layout.last_inst(ebb).expect("EBB has no terminator.");
+        if let InstructionData::Jump {
+            ref mut opcode,
+            destination,
+            ..
+        } = func.dfg[term]
+        {
+            match *opcode {
+                Opcode::Fallthrough => {
+                    // Somebody used a fall-through instruction before the branch relaxation pass.
+                    // Make sure it is correct, i.e. the destination is the layout successor.
+                    debug_assert_eq!(destination, succ, "Illegal fall-through in {}", ebb)
+                }
+                Opcode::Jump => {
+                    // If this is a jump to the successor EBB, change it to a fall-through.
+                    if destination == succ {
+                        *opcode = Opcode::Fallthrough;
+                        func.encodings[term] = Default::default();
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+}
+
+/// Relax the branch instruction at `pos` so it can cover the range `offset - dest_offset`.
+///
+/// Return the size of the replacement instructions up to and including the location where `pos` is
+/// left.
+fn relax_branch(
+    cur: &mut FuncCursor,
+    offset: CodeOffset,
+    dest_offset: CodeOffset,
+    encinfo: &EncInfo,
+    isa: &TargetIsa,
+) -> CodeOffset {
+    let inst = cur.current_inst().unwrap();
+    dbg!(
+        "Relaxing [{}] {} for {:#x}-{:#x} range",
+        encinfo.display(cur.func.encodings[inst]),
+        cur.func.dfg.display_inst(inst, isa),
+        offset,
+        dest_offset
+    );
+
+    // Pick the first encoding that can handle the branch range.
+    let dfg = &cur.func.dfg;
+    let ctrl_type = dfg.ctrl_typevar(inst);
+    if let Some(enc) = isa.legal_encodings(cur.func, &dfg[inst], ctrl_type).find(
+        |&enc| {
+            let range = encinfo.branch_range(enc).expect("Branch with no range");
+            if !range.contains(offset, dest_offset) {
+                dbg!("  trying [{}]: out of range", encinfo.display(enc));
+                false
+            } else if encinfo.operand_constraints(enc) !=
+                       encinfo.operand_constraints(cur.func.encodings[inst])
+            {
+                // Conservatively give up if the encoding has different constraints
+                // than the original, so that we don't risk picking a new encoding
+                // which the existing operands don't satisfy. We can't check for
+                // validity directly because we don't have a RegDiversions active so
+                // we don't know which registers are actually in use.
+                dbg!("  trying [{}]: constraints differ", encinfo.display(enc));
+                false
+            } else {
+                dbg!("  trying [{}]: OK", encinfo.display(enc));
+                true
+            }
+        },
+    )
+    {
+        cur.func.encodings[inst] = enc;
+        return encinfo.bytes(enc);
+    }
+
+    // Note: On some RISC ISAs, conditional branches have shorter range than unconditional
+    // branches, so one way of extending the range of a conditional branch is to invert its
+    // condition and make it branch over an unconditional jump which has the larger range.
+    //
+    // Splitting the EBB is problematic this late because there may be register diversions in
+    // effect across the conditional branch, and they can't survive the control flow edge to a new
+    // EBB. We have two options for handling that:
+    //
+    // 1. Set a flag on the new EBB that indicates it wants the preserve the register diversions of
+    //    its layout predecessor, or
+    // 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the EBB.
+    //
+    // It seems that 1. would allow us to share code among RISC ISAs that need this.
+    //
+    // We can't allow register diversions to survive from the layout predecessor because the layout
+    // predecessor could contain kill points for some values that are live in this EBB, and
+    // diversions are not automatically cancelled when the live range of a value ends.
+
+    // This assumes solution 2. above:
+    panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset);
+}
--- a/lib/codegen/src/bitset.rs
+++ b/lib/codegen/src/bitset.rs
@@ -0,0 +1,155 @@
+//! Small Bitset
+//!
+//! This module defines a struct `BitSet<T>` encapsulating a bitset built over the type T.
+//! T is intended to be a primitive unsigned type. Currently it can be any type between u8 and u32
+//!
+//! If you would like to add support for larger bitsets in the future, you need to change the trait
+//! bound Into<u32> and the u32 in the implementation of `max_bits()`.
+use std::convert::{From, Into};
+use std::mem::size_of;
+use std::ops::{Add, BitOr, Shl, Sub};
+
+/// A small bitset built on a single primitive integer type
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct BitSet<T>(pub T);
+
+impl<T> BitSet<T>
+where
+    T: Into<u32>
+        + From<u8>
+        + BitOr<T, Output = T>
+        + Shl<u8, Output = T>
+        + Sub<T, Output = T>
+        + Add<T, Output = T>
+        + PartialEq
+        + Copy,
+{
+    /// Maximum number of bits supported by this BitSet instance
+    pub fn bits() -> usize {
+        size_of::<T>() * 8
+    }
+
+    /// Maximum number of bits supported by any bitset instance atm.
+    pub fn max_bits() -> usize {
+        size_of::<u32>() * 8
+    }
+
+    /// Check if this BitSet contains the number num
+    pub fn contains(&self, num: u8) -> bool {
+        debug_assert!((num as usize) < Self::bits());
+        debug_assert!((num as usize) < Self::max_bits());
+        self.0.into() & (1 << num) != 0
+    }
+
+    /// Return the smallest number contained in the bitset or None if empty
+    pub fn min(&self) -> Option<u8> {
+        if self.0.into() == 0 {
+            None
+        } else {
+            Some(self.0.into().trailing_zeros() as u8)
+        }
+    }
+
+    /// Return the largest number contained in the bitset or None if empty
+    pub fn max(&self) -> Option<u8> {
+        if self.0.into() == 0 {
+            None
+        } else {
+            let leading_zeroes = self.0.into().leading_zeros() as usize;
+            Some((Self::max_bits() - leading_zeroes - 1) as u8)
+        }
+    }
+
+    /// Construct a BitSet with the half-open range [lo,hi) filled in
+    pub fn from_range(lo: u8, hi: u8) -> Self {
+        debug_assert!(lo <= hi);
+        debug_assert!((hi as usize) <= Self::bits());
+        let one: T = T::from(1);
+        // I can't just do (one << hi) - one here as the shift may overflow
+        let hi_rng = if hi >= 1 {
+            (one << (hi - 1)) + ((one << (hi - 1)) - one)
+        } else {
+            T::from(0)
+        };
+
+        let lo_rng = (one << lo) - one;
+
+        BitSet(hi_rng - lo_rng)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn contains() {
+        let s = BitSet::<u8>(255);
+        for i in 0..7 {
+            assert!(s.contains(i));
+        }
+
+        let s1 = BitSet::<u8>(0);
+        for i in 0..7 {
+            assert!(!s1.contains(i));
+        }
+
+        let s2 = BitSet::<u8>(127);
+        for i in 0..6 {
+            assert!(s2.contains(i));
+        }
+        assert!(!s2.contains(7));
+
+        let s3 = BitSet::<u8>(2 | 4 | 64);
+        assert!(!s3.contains(0) && !s3.contains(3) && !s3.contains(4));
+        assert!(!s3.contains(5) && !s3.contains(7));
+        assert!(s3.contains(1) && s3.contains(2) && s3.contains(6));
+
+        let s4 = BitSet::<u16>(4 | 8 | 256 | 1024);
+        assert!(
+            !s4.contains(0) && !s4.contains(1) && !s4.contains(4) && !s4.contains(5) &&
+                !s4.contains(6) && !s4.contains(7) && !s4.contains(9) && !s4.contains(11)
+        );
+        assert!(s4.contains(2) && s4.contains(3) && s4.contains(8) && s4.contains(10));
+    }
+
+    #[test]
+    fn minmax() {
+        let s = BitSet::<u8>(255);
+        assert_eq!(s.min(), Some(0));
+        assert_eq!(s.max(), Some(7));
+        assert!(s.min() == Some(0) && s.max() == Some(7));
+        let s1 = BitSet::<u8>(0);
+        assert!(s1.min() == None && s1.max() == None);
+        let s2 = BitSet::<u8>(127);
+        assert!(s2.min() == Some(0) && s2.max() == Some(6));
+        let s3 = BitSet::<u8>(2 | 4 | 64);
+        assert!(s3.min() == Some(1) && s3.max() == Some(6));
+        let s4 = BitSet::<u16>(4 | 8 | 256 | 1024);
+        assert!(s4.min() == Some(2) && s4.max() == Some(10));
+    }
+
+    #[test]
+    fn from_range() {
+        let s = BitSet::<u8>::from_range(5, 5);
+        assert!(s.0 == 0);
+
+        let s = BitSet::<u8>::from_range(0, 8);
+        assert!(s.0 == 255);
+
+        let s = BitSet::<u16>::from_range(0, 8);
+        assert!(s.0 == 255u16);
+
+        let s = BitSet::<u16>::from_range(0, 16);
+        assert!(s.0 == 65535u16);
+
+        let s = BitSet::<u8>::from_range(5, 6);
+        assert!(s.0 == 32u8);
+
+        let s = BitSet::<u8>::from_range(3, 7);
+        assert!(s.0 == 8 | 16 | 32 | 64);
+
+        let s = BitSet::<u16>::from_range(5, 11);
+        assert!(s.0 == 32 | 64 | 128 | 256 | 512 | 1024);
+    }
+}
--- a/lib/codegen/src/cfg_printer.rs
+++ b/lib/codegen/src/cfg_printer.rs
@@ -0,0 +1,76 @@
+//! The `CFGPrinter` utility.
+
+use std::fmt::{Display, Formatter, Result, Write};
+
+use flowgraph::ControlFlowGraph;
+use ir::Function;
+use ir::instructions::BranchInfo;
+
+/// A utility for pretty-printing the CFG of a `Function`.
+pub struct CFGPrinter<'a> {
+    func: &'a Function,
+    cfg: ControlFlowGraph,
+}
+
+/// A utility for pretty-printing the CFG of a `Function`.
+impl<'a> CFGPrinter<'a> {
+    /// Create a new CFGPrinter.
+    pub fn new(func: &'a Function) -> CFGPrinter<'a> {
+        CFGPrinter {
+            func,
+            cfg: ControlFlowGraph::with_function(func),
+        }
+    }
+
+    /// Write the CFG for this function to `w`.
+    pub fn write(&self, w: &mut Write) -> Result {
+        self.header(w)?;
+        self.ebb_nodes(w)?;
+        self.cfg_connections(w)?;
+        writeln!(w, "}}")
+    }
+
+    fn header(&self, w: &mut Write) -> Result {
+        writeln!(w, "digraph \"{}\" {{", self.func.name)?;
+        if let Some(entry) = self.func.layout.entry_block() {
+            writeln!(w, "    {{rank=min; {}}}", entry)?;
+        }
+        Ok(())
+    }
+
+    fn ebb_nodes(&self, w: &mut Write) -> Result {
+        for ebb in &self.func.layout {
+            write!(w, "    {} [shape=record, label=\"{{{}", ebb, ebb)?;
+            // Add all outgoing branch instructions to the label.
+            for inst in self.func.layout.ebb_insts(ebb) {
+                let idata = &self.func.dfg[inst];
+                match idata.analyze_branch(&self.func.dfg.value_lists) {
+                    BranchInfo::SingleDest(dest, _) => {
+                        write!(w, " | <{}>{} {}", inst, idata.opcode(), dest)?
+                    }
+                    BranchInfo::Table(table) => {
+                        write!(w, " | <{}>{} {}", inst, idata.opcode(), table)?
+                    }
+                    BranchInfo::NotABranch => {}
+                }
+            }
+            writeln!(w, "}}\"]")?
+        }
+        Ok(())
+    }
+
+    fn cfg_connections(&self, w: &mut Write) -> Result {
+        for ebb in &self.func.layout {
+            for (parent, inst) in self.cfg.pred_iter(ebb) {
+                writeln!(w, "    {}:{} -> {}", parent, inst, ebb)?;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl<'a> Display for CFGPrinter<'a> {
+    fn fmt(&self, f: &mut Formatter) -> Result {
+        self.write(f)
+    }
+}
--- a/lib/codegen/src/constant_hash.rs
+++ b/lib/codegen/src/constant_hash.rs
@@ -0,0 +1,78 @@
+//! Runtime support for precomputed constant hash tables.
+//!
+//! The `lib/codegen/meta/constant_hash.py` Python module can generate constant hash tables using
+//! open addressing and quadratic probing. The hash tables are arrays that are guaranteed to:
+//!
+//! - Have a power-of-two size.
+//! - Contain at least one empty slot.
+//!
+//! This module provides runtime support for lookups in these tables.
+
+/// Trait that must be implemented by the entries in a constant hash table.
+pub trait Table<K: Copy + Eq> {
+    /// Get the number of entries in this table which must be a power of two.
+    fn len(&self) -> usize;
+
+    /// Get the key corresponding to the entry at `idx`, or `None` if the entry is empty.
+    /// The `idx` must be in range.
+    fn key(&self, idx: usize) -> Option<K>;
+}
+
+/// Look for `key` in `table`.
+///
+/// The provided `hash` value must have been computed from `key` using the same hash function that
+/// was used to construct the table.
+///
+/// Returns `Ok(idx)` with the table index containing the found entry, or `Err(idx)` with the empty
+/// sentinel entry if no entry could be found.
+pub fn probe<K: Copy + Eq, T: Table<K> + ?Sized>(
+    table: &T,
+    key: K,
+    hash: usize,
+) -> Result<usize, usize> {
+    debug_assert!(table.len().is_power_of_two());
+    let mask = table.len() - 1;
+
+    let mut idx = hash;
+    let mut step = 0;
+
+    loop {
+        idx &= mask;
+
+        match table.key(idx) {
+            None => return Err(idx),
+            Some(k) if k == key => return Ok(idx),
+            _ => {}
+        }
+
+        // Quadratic probing.
+        step += 1;
+        // When `table.len()` is a power of two, it can be proven that `idx` will visit all
+        // entries. This means that this loop will always terminate if the hash table has even
+        // one unused entry.
+        debug_assert!(step < table.len());
+        idx += step;
+    }
+}
+
+/// A primitive hash function for matching opcodes.
+/// Must match `lib/codegen/meta/constant_hash.py`.
+pub fn simple_hash(s: &str) -> usize {
+    let mut h: u32 = 5381;
+    for c in s.chars() {
+        h = (h ^ c as u32).wrapping_add(h.rotate_right(6));
+    }
+    h as usize
+}
+
+#[cfg(test)]
+mod tests {
+    use super::simple_hash;
+
+    #[test]
+    fn basic() {
+        // c.f. `meta/constant_hash.py` tests.
+        assert_eq!(simple_hash("Hello"), 0x2fa70c01);
+        assert_eq!(simple_hash("world"), 0x5b0c31d5);
+    }
+}
--- a/lib/codegen/src/context.rs
+++ b/lib/codegen/src/context.rs
@@ -0,0 +1,272 @@
+//! Cretonne compilation context and main entry point.
+//!
+//! When compiling many small functions, it is important to avoid repeatedly allocating and
+//! deallocating the data structures needed for compilation. The `Context` struct is used to hold
+//! on to memory allocations between function compilations.
+//!
+//! The context does not hold a `TargetIsa` instance which has to be provided as an argument
+//! instead. This is because an ISA instance is immutable and can be used by multiple compilation
+//! contexts concurrently. Typically, you would have one context per compilation thread and only a
+//! single ISA instance.
+
+use binemit::{relax_branches, CodeOffset, MemoryCodeSink, RelocSink, TrapSink};
+use dce::do_dce;
+use dominator_tree::DominatorTree;
+use flowgraph::ControlFlowGraph;
+use ir::Function;
+use isa::TargetIsa;
+use legalize_function;
+use licm::do_licm;
+use loop_analysis::LoopAnalysis;
+use postopt::do_postopt;
+use preopt::do_preopt;
+use regalloc;
+use result::{CtonError, CtonResult};
+use settings::{FlagsOrIsa, OptLevel};
+use simple_gvn::do_simple_gvn;
+use timing;
+use unreachable_code::eliminate_unreachable_code;
+use verifier;
+
+/// Persistent data structures and compilation pipeline.
+pub struct Context {
+    /// The function we're compiling.
+    pub func: Function,
+
+    /// The control flow graph of `func`.
+    pub cfg: ControlFlowGraph,
+
+    /// Dominator tree for `func`.
+    pub domtree: DominatorTree,
+
+    /// Register allocation context.
+    pub regalloc: regalloc::Context,
+
+    /// Loop analysis of `func`.
+    pub loop_analysis: LoopAnalysis,
+}
+
+impl Context {
+    /// Allocate a new compilation context.
+    ///
+    /// The returned instance should be reused for compiling multiple functions in order to avoid
+    /// needless allocator thrashing.
+    pub fn new() -> Self {
+        Context::for_function(Function::new())
+    }
+
+    /// Allocate a new compilation context with an existing Function.
+    ///
+    /// The returned instance should be reused for compiling multiple functions in order to avoid
+    /// needless allocator thrashing.
+    pub fn for_function(func: Function) -> Self {
+        Self {
+            func: func,
+            cfg: ControlFlowGraph::new(),
+            domtree: DominatorTree::new(),
+            regalloc: regalloc::Context::new(),
+            loop_analysis: LoopAnalysis::new(),
+        }
+    }
+
+    /// Clear all data structures in this context.
+    pub fn clear(&mut self) {
+        self.func.clear();
+        self.cfg.clear();
+        self.domtree.clear();
+        self.regalloc.clear();
+        self.loop_analysis.clear();
+    }
+
+    /// Compile the function.
+    ///
+    /// Run the function through all the passes necessary to generate code for the target ISA
+    /// represented by `isa`. This does not include the final step of emitting machine code into a
+    /// code sink.
+    ///
+    /// Returns the size of the function's code.
+    pub fn compile(&mut self, isa: &TargetIsa) -> Result<CodeOffset, CtonError> {
+        let _tt = timing::compile();
+        self.verify_if(isa)?;
+
+        self.compute_cfg();
+        if isa.flags().opt_level() != OptLevel::Fastest {
+            self.preopt(isa)?;
+        }
+        self.legalize(isa)?;
+        if isa.flags().opt_level() != OptLevel::Fastest {
+            self.postopt(isa)?;
+        }
+        if isa.flags().opt_level() == OptLevel::Best {
+            self.compute_domtree();
+            self.compute_loop_analysis();
+            self.licm(isa)?;
+            self.simple_gvn(isa)?;
+        }
+        self.compute_domtree();
+        self.eliminate_unreachable_code(isa)?;
+        if isa.flags().opt_level() != OptLevel::Fastest {
+            self.dce(isa)?;
+        }
+        self.regalloc(isa)?;
+        self.prologue_epilogue(isa)?;
+        self.relax_branches(isa)
+    }
+
+    /// Emit machine code directly into raw memory.
+    ///
+    /// Write all of the function's machine code to the memory at `mem`. The size of the machine
+    /// code is returned by `compile` above.
+    ///
+    /// The machine code is not relocated. Instead, any relocations are emitted into `relocs`.
+    pub fn emit_to_memory(
+        &self,
+        mem: *mut u8,
+        relocs: &mut RelocSink,
+        traps: &mut TrapSink,
+        isa: &TargetIsa,
+    ) {
+        let _tt = timing::binemit();
+        isa.emit_function(&self.func, &mut MemoryCodeSink::new(mem, relocs, traps));
+    }
+
+    /// Run the verifier on the function.
+    ///
+    /// Also check that the dominator tree and control flow graph are consistent with the function.
+    pub fn verify<'a, FOI: Into<FlagsOrIsa<'a>>>(&self, fisa: FOI) -> verifier::Result {
+        verifier::verify_context(&self.func, &self.cfg, &self.domtree, fisa)
+    }
+
+    /// Run the verifier only if the `enable_verifier` setting is true.
+    pub fn verify_if<'a, FOI: Into<FlagsOrIsa<'a>>>(&self, fisa: FOI) -> CtonResult {
+        let fisa = fisa.into();
+        if fisa.flags.enable_verifier() {
+            self.verify(fisa).map_err(Into::into)
+        } else {
+            Ok(())
+        }
+    }
+
+    /// Run the locations verifier on the function.
+    pub fn verify_locations(&self, isa: &TargetIsa) -> verifier::Result {
+        verifier::verify_locations(isa, &self.func, None)
+    }
+
+    /// Run the locations verifier only if the `enable_verifier` setting is true.
+    pub fn verify_locations_if(&self, isa: &TargetIsa) -> CtonResult {
+        if isa.flags().enable_verifier() {
+            self.verify_locations(isa).map_err(Into::into)
+        } else {
+            Ok(())
+        }
+    }
+
+    /// Perform dead-code elimination on the function.
+    pub fn dce<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CtonResult {
+        do_dce(&mut self.func, &mut self.domtree);
+        self.verify_if(fisa)?;
+        Ok(())
+    }
+
+    /// Perform pre-legalization rewrites on the function.
+    pub fn preopt(&mut self, isa: &TargetIsa) -> CtonResult {
+        do_preopt(&mut self.func);
+        self.verify_if(isa)?;
+        Ok(())
+    }
+
+    /// Run the legalizer for `isa` on the function.
+    pub fn legalize(&mut self, isa: &TargetIsa) -> CtonResult {
+        // Legalization invalidates the domtree and loop_analysis by mutating the CFG.
+        // TODO: Avoid doing this when legalization doesn't actually mutate the CFG.
+        self.domtree.clear();
+        self.loop_analysis.clear();
+        legalize_function(&mut self.func, &mut self.cfg, isa);
+        self.verify_if(isa)
+    }
+
+    /// Perform post-legalization rewrites on the function.
+    pub fn postopt(&mut self, isa: &TargetIsa) -> CtonResult {
+        do_postopt(&mut self.func, isa);
+        self.verify_if(isa)?;
+        Ok(())
+    }
+
+    /// Compute the control flow graph.
+    pub fn compute_cfg(&mut self) {
+        self.cfg.compute(&self.func)
+    }
+
+    /// Compute dominator tree.
+    pub fn compute_domtree(&mut self) {
+        self.domtree.compute(&self.func, &self.cfg)
+    }
+
+    /// Compute the loop analysis.
+    pub fn compute_loop_analysis(&mut self) {
+        self.loop_analysis.compute(
+            &self.func,
+            &self.cfg,
+            &self.domtree,
+        )
+    }
+
+    /// Compute the control flow graph and dominator tree.
+    pub fn flowgraph(&mut self) {
+        self.compute_cfg();
+        self.compute_domtree()
+    }
+
+    /// Perform simple GVN on the function.
+    pub fn simple_gvn<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CtonResult {
+        do_simple_gvn(&mut self.func, &mut self.domtree);
+        self.verify_if(fisa)
+    }
+
+    /// Perform LICM on the function.
+    pub fn licm<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CtonResult {
+        do_licm(
+            &mut self.func,
+            &mut self.cfg,
+            &mut self.domtree,
+            &mut self.loop_analysis,
+        );
+        self.verify_if(fisa)
+    }
+
+    /// Perform unreachable code elimination.
+    pub fn eliminate_unreachable_code<'a, FOI>(&mut self, fisa: FOI) -> CtonResult
+    where
+        FOI: Into<FlagsOrIsa<'a>>,
+    {
+        eliminate_unreachable_code(&mut self.func, &mut self.cfg, &self.domtree);
+        self.verify_if(fisa)
+    }
+
+    /// Run the register allocator.
+    pub fn regalloc(&mut self, isa: &TargetIsa) -> CtonResult {
+        self.regalloc.run(
+            isa,
+            &mut self.func,
+            &self.cfg,
+            &mut self.domtree,
+        )
+    }
+
+    /// Insert prologue and epilogues after computing the stack frame layout.
+    pub fn prologue_epilogue(&mut self, isa: &TargetIsa) -> CtonResult {
+        isa.prologue_epilogue(&mut self.func)?;
+        self.verify_if(isa)?;
+        self.verify_locations_if(isa)?;
+        Ok(())
+    }
+
+    /// Run the branch relaxation pass and return the final code size.
+    pub fn relax_branches(&mut self, isa: &TargetIsa) -> Result<CodeOffset, CtonError> {
+        let code_size = relax_branches(&mut self.func, isa)?;
+        self.verify_if(isa)?;
+        self.verify_locations_if(isa)?;
+
+        Ok(code_size)
+    }
+}
--- a/lib/codegen/src/cursor.rs
+++ b/lib/codegen/src/cursor.rs
@@ -0,0 +1,760 @@
+//! Cursor library.
+//!
+//! This module defines cursor data types that can be used for inserting instructions.
+
+use ir;
+use isa::TargetIsa;
+
+/// The possible positions of a cursor.
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum CursorPosition {
+    /// Cursor is not pointing anywhere. No instructions can be inserted.
+    Nowhere,
+    /// Cursor is pointing at an existing instruction.
+    /// New instructions will be inserted *before* the current instruction.
+    At(ir::Inst),
+    /// Cursor is before the beginning of an EBB. No instructions can be inserted. Calling
+    /// `next_inst()` will move to the first instruction in the EBB.
+    Before(ir::Ebb),
+    /// Cursor is pointing after the end of an EBB.
+    /// New instructions will be appended to the EBB.
+    After(ir::Ebb),
+}
+
+/// All cursor types implement the `Cursor` which provides common navigation operations.
+pub trait Cursor {
+    /// Get the current cursor position.
+    fn position(&self) -> CursorPosition;
+
+    /// Set the current position.
+    fn set_position(&mut self, pos: CursorPosition);
+
+    /// Get the source location that should be assigned to new instructions.
+    fn srcloc(&self) -> ir::SourceLoc;
+
+    /// Set the source location that should be assigned to new instructions.
+    fn set_srcloc(&mut self, srcloc: ir::SourceLoc);
+
+    /// Borrow a reference to the function layout that this cursor is navigating.
+    fn layout(&self) -> &ir::Layout;
+
+    /// Borrow a mutable reference to the function layout that this cursor is navigating.
+    fn layout_mut(&mut self) -> &mut ir::Layout;
+
+    /// Exchange this cursor for one with a set source location.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cretonne_codegen::ir::{Function, Ebb, SourceLoc};
+    /// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, srcloc: SourceLoc) {
+    ///     let mut pos = FuncCursor::new(func).with_srcloc(srcloc);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn with_srcloc(mut self, srcloc: ir::SourceLoc) -> Self
+    where
+        Self: Sized,
+    {
+        self.set_srcloc(srcloc);
+        self
+    }
+
+    /// Rebuild this cursor positioned at `pos`.
+    fn at_position(mut self, pos: CursorPosition) -> Self
+    where
+        Self: Sized,
+    {
+        self.set_position(pos);
+        self
+    }
+
+    /// Rebuild this cursor positioned at `inst`.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cretonne_codegen::ir::{Function, Ebb, Inst};
+    /// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, inst: Inst) {
+    ///     let mut pos = FuncCursor::new(func).at_inst(inst);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn at_inst(mut self, inst: ir::Inst) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_inst(inst);
+        self
+    }
+
+    /// Rebuild this cursor positioned at the first insertion point for `ebb`.
+    /// This differs from `at_first_inst` in that it doesn't assume that any
+    /// instructions have been inserted into `ebb` yet.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cretonne_codegen::ir::{Function, Ebb, Inst};
+    /// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, ebb: Ebb) {
+    ///     let mut pos = FuncCursor::new(func).at_first_insertion_point(ebb);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn at_first_insertion_point(mut self, ebb: ir::Ebb) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_first_insertion_point(ebb);
+        self
+    }
+
+    /// Rebuild this cursor positioned at the first instruction in `ebb`.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cretonne_codegen::ir::{Function, Ebb, Inst};
+    /// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, ebb: Ebb) {
+    ///     let mut pos = FuncCursor::new(func).at_first_inst(ebb);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn at_first_inst(mut self, ebb: ir::Ebb) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_first_inst(ebb);
+        self
+    }
+
+    /// Rebuild this cursor positioned at the last instruction in `ebb`.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cretonne_codegen::ir::{Function, Ebb, Inst};
+    /// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, ebb: Ebb) {
+    ///     let mut pos = FuncCursor::new(func).at_last_inst(ebb);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn at_last_inst(mut self, ebb: ir::Ebb) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_last_inst(ebb);
+        self
+    }
+
+    /// Rebuild this cursor positioned after `inst`.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cretonne_codegen::ir::{Function, Ebb, Inst};
+    /// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, inst: Inst) {
+    ///     let mut pos = FuncCursor::new(func).after_inst(inst);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn after_inst(mut self, inst: ir::Inst) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_after_inst(inst);
+        self
+    }
+
+    /// Rebuild this cursor positioned at the top of `ebb`.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cretonne_codegen::ir::{Function, Ebb, Inst};
+    /// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, ebb: Ebb) {
+    ///     let mut pos = FuncCursor::new(func).at_top(ebb);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn at_top(mut self, ebb: ir::Ebb) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_top(ebb);
+        self
+    }
+
+    /// Rebuild this cursor positioned at the bottom of `ebb`.
+    ///
+    /// This is intended to be used as a builder method:
+    ///
+    /// ```
+    /// # use cretonne_codegen::ir::{Function, Ebb, Inst};
+    /// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function, ebb: Ebb) {
+    ///     let mut pos = FuncCursor::new(func).at_bottom(ebb);
+    ///
+    ///     // Use `pos`...
+    /// }
+    /// ```
+    fn at_bottom(mut self, ebb: ir::Ebb) -> Self
+    where
+        Self: Sized,
+    {
+        self.goto_bottom(ebb);
+        self
+    }
+
+    /// Get the EBB corresponding to the current position.
+    fn current_ebb(&self) -> Option<ir::Ebb> {
+        use self::CursorPosition::*;
+        match self.position() {
+            Nowhere => None,
+            At(inst) => self.layout().inst_ebb(inst),
+            Before(ebb) | After(ebb) => Some(ebb),
+        }
+    }
+
+    /// Get the instruction corresponding to the current position, if any.
+    fn current_inst(&self) -> Option<ir::Inst> {
+        use self::CursorPosition::*;
+        match self.position() {
+            At(inst) => Some(inst),
+            _ => None,
+        }
+    }
+
+    /// Go to the position after a specific instruction, which must be inserted
+    /// in the layout. New instructions will be inserted after `inst`.
+    fn goto_after_inst(&mut self, inst: ir::Inst) {
+        debug_assert!(self.layout().inst_ebb(inst).is_some());
+        let new_pos = if let Some(next) = self.layout().next_inst(inst) {
+            CursorPosition::At(next)
+        } else {
+            CursorPosition::After(self.layout().inst_ebb(inst).expect(
+                "current instruction removed?",
+            ))
+        };
+        self.set_position(new_pos);
+    }
+
+    /// Go to a specific instruction which must be inserted in the layout.
+    /// New instructions will be inserted before `inst`.
+    fn goto_inst(&mut self, inst: ir::Inst) {
+        debug_assert!(self.layout().inst_ebb(inst).is_some());
+        self.set_position(CursorPosition::At(inst));
+    }
+
+    /// Go to the position for inserting instructions at the beginning of `ebb`,
+    /// which unlike `goto_first_inst` doesn't assume that any instructions have
+    /// been inserted into `ebb` yet.
+    fn goto_first_insertion_point(&mut self, ebb: ir::Ebb) {
+        if let Some(inst) = self.layout().first_inst(ebb) {
+            self.goto_inst(inst);
+        } else {
+            self.goto_bottom(ebb);
+        }
+    }
+
+    /// Go to the first instruction in `ebb`.
+    fn goto_first_inst(&mut self, ebb: ir::Ebb) {
+        let inst = self.layout().first_inst(ebb).expect("Empty EBB");
+        self.goto_inst(inst);
+    }
+
+    /// Go to the last instruction in `ebb`.
+    fn goto_last_inst(&mut self, ebb: ir::Ebb) {
+        let inst = self.layout().last_inst(ebb).expect("Empty EBB");
+        self.goto_inst(inst);
+    }
+
+    /// Go to the top of `ebb` which must be inserted into the layout.
+    /// At this position, instructions cannot be inserted, but `next_inst()` will move to the first
+    /// instruction in `ebb`.
+    fn goto_top(&mut self, ebb: ir::Ebb) {
+        debug_assert!(self.layout().is_ebb_inserted(ebb));
+        self.set_position(CursorPosition::Before(ebb));
+    }
+
+    /// Go to the bottom of `ebb` which must be inserted into the layout.
+    /// At this position, inserted instructions will be appended to `ebb`.
+    fn goto_bottom(&mut self, ebb: ir::Ebb) {
+        debug_assert!(self.layout().is_ebb_inserted(ebb));
+        self.set_position(CursorPosition::After(ebb));
+    }
+
+    /// Go to the top of the next EBB in layout order and return it.
+    ///
+    /// - If the cursor wasn't pointing at anything, go to the top of the first EBB in the
+    ///   function.
+    /// - If there are no more EBBs, leave the cursor pointing at nothing and return `None`.
+    ///
+    /// # Examples
+    ///
+    /// The `next_ebb()` method is intended for iterating over the EBBs in layout order:
+    ///
+    /// ```
+    /// # use cretonne_codegen::ir::{Function, Ebb};
+    /// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function) {
+    ///     let mut cursor = FuncCursor::new(func);
+    ///     while let Some(ebb) = cursor.next_ebb() {
+    ///         // Edit ebb.
+    ///     }
+    /// }
+    /// ```
+    fn next_ebb(&mut self) -> Option<ir::Ebb> {
+        let next = if let Some(ebb) = self.current_ebb() {
+            self.layout().next_ebb(ebb)
+        } else {
+            self.layout().entry_block()
+        };
+        self.set_position(match next {
+            Some(ebb) => CursorPosition::Before(ebb),
+            None => CursorPosition::Nowhere,
+        });
+        next
+    }
+
+    /// Go to the bottom of the previous EBB in layout order and return it.
+    ///
+    /// - If the cursor wasn't pointing at anything, go to the bottom of the last EBB in the
+    ///   function.
+    /// - If there are no more EBBs, leave the cursor pointing at nothing and return `None`.
+    ///
+    /// # Examples
+    ///
+    /// The `prev_ebb()` method is intended for iterating over the EBBs in backwards layout order:
+    ///
+    /// ```
+    /// # use cretonne_codegen::ir::{Function, Ebb};
+    /// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function) {
+    ///     let mut cursor = FuncCursor::new(func);
+    ///     while let Some(ebb) = cursor.prev_ebb() {
+    ///         // Edit ebb.
+    ///     }
+    /// }
+    /// ```
+    fn prev_ebb(&mut self) -> Option<ir::Ebb> {
+        let prev = if let Some(ebb) = self.current_ebb() {
+            self.layout().prev_ebb(ebb)
+        } else {
+            self.layout().last_ebb()
+        };
+        self.set_position(match prev {
+            Some(ebb) => CursorPosition::After(ebb),
+            None => CursorPosition::Nowhere,
+        });
+        prev
+    }
+
+    /// Move to the next instruction in the same EBB and return it.
+    ///
+    /// - If the cursor was positioned before an EBB, go to the first instruction in that EBB.
+    /// - If there are no more instructions in the EBB, go to the `After(ebb)` position and return
+    ///   `None`.
+    /// - If the cursor wasn't pointing anywhere, keep doing that.
+    ///
+    /// This method will never move the cursor to a different EBB.
+    ///
+    /// # Examples
+    ///
+    /// The `next_inst()` method is intended for iterating over the instructions in an EBB like
+    /// this:
+    ///
+    /// ```
+    /// # use cretonne_codegen::ir::{Function, Ebb};
+    /// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_ebb(func: &mut Function, ebb: Ebb) {
+    ///     let mut cursor = FuncCursor::new(func).at_top(ebb);
+    ///     while let Some(inst) = cursor.next_inst() {
+    ///         // Edit instructions...
+    ///     }
+    /// }
+    /// ```
+    /// The loop body can insert and remove instructions via the cursor.
+    ///
+    /// Iterating over all the instructions in a function looks like this:
+    ///
+    /// ```
+    /// # use cretonne_codegen::ir::{Function, Ebb};
+    /// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_func(func: &mut Function) {
+    ///     let mut cursor = FuncCursor::new(func);
+    ///     while let Some(ebb) = cursor.next_ebb() {
+    ///         while let Some(inst) = cursor.next_inst() {
+    ///             // Edit instructions...
+    ///         }
+    ///     }
+    /// }
+    /// ```
+    fn next_inst(&mut self) -> Option<ir::Inst> {
+        use self::CursorPosition::*;
+        match self.position() {
+            Nowhere | After(..) => None,
+            At(inst) => {
+                if let Some(next) = self.layout().next_inst(inst) {
+                    self.set_position(At(next));
+                    Some(next)
+                } else {
+                    let pos = After(self.layout().inst_ebb(inst).expect(
+                        "current instruction removed?",
+                    ));
+                    self.set_position(pos);
+                    None
+                }
+            }
+            Before(ebb) => {
+                if let Some(next) = self.layout().first_inst(ebb) {
+                    self.set_position(At(next));
+                    Some(next)
+                } else {
+                    self.set_position(After(ebb));
+                    None
+                }
+            }
+        }
+    }
+
+    /// Move to the previous instruction in the same EBB and return it.
+    ///
+    /// - If the cursor was positioned after an EBB, go to the last instruction in that EBB.
+    /// - If there are no more instructions in the EBB, go to the `Before(ebb)` position and return
+    ///   `None`.
+    /// - If the cursor wasn't pointing anywhere, keep doing that.
+    ///
+    /// This method will never move the cursor to a different EBB.
+    ///
+    /// # Examples
+    ///
+    /// The `prev_inst()` method is intended for iterating backwards over the instructions in an
+    /// EBB like this:
+    ///
+    /// ```
+    /// # use cretonne_codegen::ir::{Function, Ebb};
+    /// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
+    /// fn edit_ebb(func: &mut Function, ebb: Ebb) {
+    ///     let mut cursor = FuncCursor::new(func).at_bottom(ebb);
+    ///     while let Some(inst) = cursor.prev_inst() {
+    ///         // Edit instructions...
+    ///     }
+    /// }
+    /// ```
+    fn prev_inst(&mut self) -> Option<ir::Inst> {
+        use self::CursorPosition::*;
+        match self.position() {
+            Nowhere | Before(..) => None,
+            At(inst) => {
+                if let Some(prev) = self.layout().prev_inst(inst) {
+                    self.set_position(At(prev));
+                    Some(prev)
+                } else {
+                    let pos = Before(self.layout().inst_ebb(inst).expect(
+                        "current instruction removed?",
+                    ));
+                    self.set_position(pos);
+                    None
+                }
+            }
+            After(ebb) => {
+                if let Some(prev) = self.layout().last_inst(ebb) {
+                    self.set_position(At(prev));
+                    Some(prev)
+                } else {
+                    self.set_position(Before(ebb));
+                    None
+                }
+            }
+        }
+    }
+
+    /// Insert an instruction at the current position.
+    ///
+    /// - If pointing at an instruction, the new instruction is inserted before the current
+    ///   instruction.
+    /// - If pointing at the bottom of an EBB, the new instruction is appended to the EBB.
+    /// - Otherwise panic.
+    ///
+    /// In either case, the cursor is not moved, such that repeated calls to `insert_inst()` causes
+    /// instructions to appear in insertion order in the EBB.
+    fn insert_inst(&mut self, inst: ir::Inst) {
+        use self::CursorPosition::*;
+        match self.position() {
+            Nowhere | Before(..) => panic!("Invalid insert_inst position"),
+            At(cur) => self.layout_mut().insert_inst(inst, cur),
+            After(ebb) => self.layout_mut().append_inst(inst, ebb),
+        }
+    }
+
+    /// Remove the instruction under the cursor.
+    ///
+    /// The cursor is left pointing at the position following the current instruction.
+    ///
+    /// Return the instruction that was removed.
+    fn remove_inst(&mut self) -> ir::Inst {
+        let inst = self.current_inst().expect("No instruction to remove");
+        self.next_inst();
+        self.layout_mut().remove_inst(inst);
+        inst
+    }
+
+    /// Remove the instruction under the cursor.
+    ///
+    /// The cursor is left pointing at the position preceding the current instruction.
+    ///
+    /// Return the instruction that was removed.
+    fn remove_inst_and_step_back(&mut self) -> ir::Inst {
+        let inst = self.current_inst().expect("No instruction to remove");
+        self.prev_inst();
+        self.layout_mut().remove_inst(inst);
+        inst
+    }
+
+    /// Insert an EBB at the current position and switch to it.
+    ///
+    /// As far as possible, this method behaves as if the EBB header were an instruction inserted
+    /// at the current position.
+    ///
+    /// - If the cursor is pointing at an existing instruction, *the current EBB is split in two*
+    ///   and the current instruction becomes the first instruction in the inserted EBB.
+    /// - If the cursor points at the bottom of an EBB, the new EBB is inserted after the current
+    ///   one, and moved to the bottom of the new EBB where instructions can be appended.
+    /// - If the cursor points to the top of an EBB, the new EBB is inserted above the current one.
+    /// - If the cursor is not pointing at anything, the new EBB is placed last in the layout.
+    ///
+    /// This means that it is always valid to call this method, and it always leaves the cursor in
+    /// a state that will insert instructions into the new EBB.
+    fn insert_ebb(&mut self, new_ebb: ir::Ebb) {
+        use self::CursorPosition::*;
+        match self.position() {
+            At(inst) => {
+                self.layout_mut().split_ebb(new_ebb, inst);
+                // All other cases move to `After(ebb)`, but in this case we'll stay `At(inst)`.
+                return;
+            }
+            Nowhere => self.layout_mut().append_ebb(new_ebb),
+            Before(ebb) => self.layout_mut().insert_ebb(new_ebb, ebb),
+            After(ebb) => self.layout_mut().insert_ebb_after(new_ebb, ebb),
+        }
+        // For everything but `At(inst)` we end up appending to the new EBB.
+        self.set_position(After(new_ebb));
+    }
+}
+
+/// Function cursor.
+///
+/// A `FuncCursor` holds a mutable reference to a whole `ir::Function` while keeping a position
+/// too. The function can be re-borrowed by accessing the public `cur.func` member.
+///
+/// This cursor is for use before legalization. The inserted instructions are not given an
+/// encoding.
+pub struct FuncCursor<'f> {
+    pos: CursorPosition,
+    srcloc: ir::SourceLoc,
+
+    /// The referenced function.
+    pub func: &'f mut ir::Function,
+}
+
+impl<'f> FuncCursor<'f> {
+    /// Create a new `FuncCursor` pointing nowhere.
+    pub fn new(func: &'f mut ir::Function) -> FuncCursor<'f> {
+        FuncCursor {
+            pos: CursorPosition::Nowhere,
+            srcloc: Default::default(),
+            func,
+        }
+    }
+
+    /// Use the source location of `inst` for future instructions.
+    pub fn use_srcloc(&mut self, inst: ir::Inst) {
+        self.srcloc = self.func.srclocs[inst];
+    }
+
+    /// Create an instruction builder that inserts an instruction at the current position.
+    pub fn ins(&mut self) -> ir::InsertBuilder<&mut FuncCursor<'f>> {
+        ir::InsertBuilder::new(self)
+    }
+}
+
+impl<'f> Cursor for FuncCursor<'f> {
+    fn position(&self) -> CursorPosition {
+        self.pos
+    }
+
+    fn set_position(&mut self, pos: CursorPosition) {
+        self.pos = pos
+    }
+
+    fn srcloc(&self) -> ir::SourceLoc {
+        self.srcloc
+    }
+
+    fn set_srcloc(&mut self, srcloc: ir::SourceLoc) {
+        self.srcloc = srcloc;
+    }
+
+    fn layout(&self) -> &ir::Layout {
+        &self.func.layout
+    }
+
+    fn layout_mut(&mut self) -> &mut ir::Layout {
+        &mut self.func.layout
+    }
+}
+
+impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut FuncCursor<'f> {
+    fn data_flow_graph(&self) -> &ir::DataFlowGraph {
+        &self.func.dfg
+    }
+
+    fn data_flow_graph_mut(&mut self) -> &mut ir::DataFlowGraph {
+        &mut self.func.dfg
+    }
+
+    fn insert_built_inst(self, inst: ir::Inst, _: ir::Type) -> &'c mut ir::DataFlowGraph {
+        self.insert_inst(inst);
+        if !self.srcloc.is_default() {
+            self.func.srclocs[inst] = self.srcloc;
+        }
+        &mut self.func.dfg
+    }
+}
+
+/// Encoding cursor.
+///
+/// An `EncCursor` can be used to insert instructions that are immediately assigned an encoding.
+/// The cursor holds a mutable reference to the whole function which can be re-borrowed from the
+/// public `pos.func` member.
+pub struct EncCursor<'f> {
+    pos: CursorPosition,
+    srcloc: ir::SourceLoc,
+    built_inst: Option<ir::Inst>,
+
+    /// The referenced function.
+    pub func: &'f mut ir::Function,
+
+    /// The target ISA that will be used to encode instructions.
+    pub isa: &'f TargetIsa,
+}
+
+impl<'f> EncCursor<'f> {
+    /// Create a new `EncCursor` pointing nowhere.
+    pub fn new(func: &'f mut ir::Function, isa: &'f TargetIsa) -> EncCursor<'f> {
+        EncCursor {
+            pos: CursorPosition::Nowhere,
+            srcloc: Default::default(),
+            built_inst: None,
+            func,
+            isa,
+        }
+    }
+
+    /// Use the source location of `inst` for future instructions.
+    pub fn use_srcloc(&mut self, inst: ir::Inst) {
+        self.srcloc = self.func.srclocs[inst];
+    }
+
+    /// Create an instruction builder that will insert an encoded instruction at the current
+    /// position.
+    ///
+    /// The builder will panic if it is used to insert an instruction that can't be encoded for
+    /// `self.isa`.
+    pub fn ins(&mut self) -> ir::InsertBuilder<&mut EncCursor<'f>> {
+        ir::InsertBuilder::new(self)
+    }
+
+    /// Get the last built instruction.
+    ///
+    /// This returns the last instruction that was built using the `ins()` method on this cursor.
+    /// Panics if no instruction was built.
+    pub fn built_inst(&self) -> ir::Inst {
+        self.built_inst.expect("No instruction was inserted")
+    }
+
+    /// Return an object that can display `inst`.
+    ///
+    /// This is a convenience wrapper for the DFG equivalent.
+    pub fn display_inst(&self, inst: ir::Inst) -> ir::dfg::DisplayInst {
+        self.func.dfg.display_inst(inst, self.isa)
+    }
+}
+
+impl<'f> Cursor for EncCursor<'f> {
+    fn position(&self) -> CursorPosition {
+        self.pos
+    }
+
+    fn set_position(&mut self, pos: CursorPosition) {
+        self.pos = pos
+    }
+
+    fn srcloc(&self) -> ir::SourceLoc {
+        self.srcloc
+    }
+
+    fn set_srcloc(&mut self, srcloc: ir::SourceLoc) {
+        self.srcloc = srcloc;
+    }
+
+    fn layout(&self) -> &ir::Layout {
+        &self.func.layout
+    }
+
+    fn layout_mut(&mut self) -> &mut ir::Layout {
+        &mut self.func.layout
+    }
+}
+
+impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
+    fn data_flow_graph(&self) -> &ir::DataFlowGraph {
+        &self.func.dfg
+    }
+
+    fn data_flow_graph_mut(&mut self) -> &mut ir::DataFlowGraph {
+        &mut self.func.dfg
+    }
+
+    fn insert_built_inst(
+        self,
+        inst: ir::Inst,
+        ctrl_typevar: ir::Type,
+    ) -> &'c mut ir::DataFlowGraph {
+        // Insert the instruction and remember the reference.
+        self.insert_inst(inst);
+        self.built_inst = Some(inst);
+
+        if !self.srcloc.is_default() {
+            self.func.srclocs[inst] = self.srcloc;
+        }
+        // Assign an encoding.
+        // XXX Is there a way to describe this error to the user?
+        #[cfg_attr(feature = "cargo-clippy", allow(match_wild_err_arm))]
+        match self.isa.encode(
+            &self.func,
+            &self.func.dfg[inst],
+            ctrl_typevar,
+        ) {
+            Ok(e) => self.func.encodings[inst] = e,
+            Err(_) => panic!("can't encode {}", self.display_inst(inst)),
+        }
+
+        &mut self.func.dfg
+    }
+}
--- a/lib/codegen/src/dbg.rs
+++ b/lib/codegen/src/dbg.rs
@@ -0,0 +1,146 @@
+//! Debug tracing macros.
+//!
+//! This module defines the `dbg!` macro which works like `println!` except it writes to the
+//! Cretonne tracing output file if enabled.
+//!
+//! Tracing can be enabled by setting the `CRETONNE_DBG` environment variable to something
+/// other than `0`.
+///
+/// The output will appear in files named `cretonne.dbg.*`, where the suffix is named after the
+/// thread doing the logging.
+#[cfg(feature = "std")]
+use std::cell::RefCell;
+#[cfg(feature = "std")]
+use std::env;
+#[cfg(feature = "std")]
+use std::ffi::OsStr;
+use std::fmt;
+#[cfg(feature = "std")]
+use std::fs::File;
+#[cfg(feature = "std")]
+use std::io::{self, Write};
+#[cfg(feature = "std")]
+use std::sync::atomic;
+#[cfg(feature = "std")]
+use std::thread;
+
+#[cfg(feature = "std")]
+static STATE: atomic::AtomicIsize = atomic::ATOMIC_ISIZE_INIT;
+
+/// Is debug tracing enabled?
+///
+/// Debug tracing can be enabled by setting the `CRETONNE_DBG` environment variable to something
+/// other than `0`.
+///
+/// This inline function turns into a constant `false` when debug assertions are disabled.
+#[cfg(feature = "std")]
+#[inline]
+pub fn enabled() -> bool {
+    if cfg!(debug_assertions) {
+        match STATE.load(atomic::Ordering::Relaxed) {
+            0 => initialize(),
+            s => s > 0,
+        }
+    } else {
+        false
+    }
+}
+
+/// Does nothing
+#[cfg(not(feature = "std"))]
+#[inline]
+pub fn enabled() -> bool {
+    false
+}
+
+/// Initialize `STATE` from the environment variable.
+#[cfg(feature = "std")]
+fn initialize() -> bool {
+    let enable = match env::var_os("CRETONNE_DBG") {
+        Some(s) => s != OsStr::new("0"),
+        None => false,
+    };
+
+    if enable {
+        STATE.store(1, atomic::Ordering::Relaxed);
+    } else {
+        STATE.store(-1, atomic::Ordering::Relaxed);
+    }
+
+    enable
+}
+
+#[cfg(feature = "std")]
+thread_local! {
+    static WRITER : RefCell<io::BufWriter<File>> = RefCell::new(open_file());
+}
+
+/// Write a line with the given format arguments.
+///
+/// This is for use by the `dbg!` macro.
+#[cfg(feature = "std")]
+pub fn writeln_with_format_args(args: fmt::Arguments) -> io::Result<()> {
+    WRITER.with(|rc| {
+        let mut w = rc.borrow_mut();
+        writeln!(*w, "{}", args)?;
+        w.flush()
+    })
+}
+
+/// Open the tracing file for the current thread.
+#[cfg(feature = "std")]
+fn open_file() -> io::BufWriter<File> {
+    let curthread = thread::current();
+    let tmpstr;
+    let mut path = "cretonne.dbg.".to_owned();
+    path.extend(
+        match curthread.name() {
+            Some(name) => name.chars(),
+            // The thread is unnamed, so use the thread ID instead.
+            None => {
+                tmpstr = format!("{:?}", curthread.id());
+                tmpstr.chars()
+            }
+        }.filter(|ch| ch.is_alphanumeric() || *ch == '-' || *ch == '_'),
+    );
+    let file = File::create(path).expect("Can't open tracing file");
+    io::BufWriter::new(file)
+}
+
+/// Write a line to the debug trace file if tracing is enabled.
+///
+/// Arguments are the same as for `printf!`.
+#[macro_export]
+macro_rules! dbg {
+    ($($arg:tt)+) => {
+        if $crate::dbg::enabled() {
+            // Drop the error result so we don't get compiler errors for ignoring it.
+            // What are you going to do, log the error?
+            #[cfg(feature = "std")]
+            $crate::dbg::writeln_with_format_args(format_args!($($arg)+)).ok();
+        }
+    }
+}
+
+/// Helper for printing lists.
+pub struct DisplayList<'a, T>(pub &'a [T])
+where
+    T: 'a + fmt::Display;
+
+impl<'a, T> fmt::Display for DisplayList<'a, T>
+where
+    T: 'a + fmt::Display,
+{
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self.0.split_first() {
+            None => write!(f, "[]"),
+            Some((first, rest)) => {
+                write!(f, "[{}", first)?;
+                for x in rest {
+                    write!(f, ", {}", x)?;
+                }
+                write!(f, "]")
+            }
+        }
+    }
+}
--- a/lib/codegen/src/dce.rs
+++ b/lib/codegen/src/dce.rs
@@ -0,0 +1,68 @@
+//! A Dead-Code Elimination (DCE) pass.
+//!
+//! Dead code here means instructions that have no side effects and have no
+//! result values used by other instructions.
+
+use cursor::{Cursor, FuncCursor};
+use dominator_tree::DominatorTree;
+use entity::EntityRef;
+use ir::instructions::InstructionData;
+use ir::{DataFlowGraph, Function, Inst, Opcode};
+use std::vec::Vec;
+use timing;
+
+/// Test whether the given opcode is unsafe to even consider for DCE.
+fn trivially_unsafe_for_dce(opcode: Opcode) -> bool {
+    opcode.is_call() || opcode.is_branch() || opcode.is_terminator() ||
+        opcode.is_return() || opcode.can_trap() || opcode.other_side_effects() ||
+        opcode.can_store()
+}
+
+/// Preserve instructions with used result values.
+fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
+    dfg.inst_results(inst).iter().any(|v| live[v.index()])
+}
+
+/// Load instructions without the `notrap` flag are defined to trap when
+/// operating on inaccessible memory, so we can't DCE them even if the
+/// loaded value is unused.
+fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
+    if !opcode.can_load() {
+        return false;
+    }
+    match *data {
+        InstructionData::StackLoad { .. } => false,
+        InstructionData::Load { flags, .. } => !flags.notrap(),
+        _ => true,
+    }
+}
+
+/// Perform DCE on `func`.
+pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) {
+    let _tt = timing::dce();
+    debug_assert!(domtree.is_valid());
+
+    let mut live = Vec::with_capacity(func.dfg.num_values());
+    live.resize(func.dfg.num_values(), false);
+
+    for &ebb in domtree.cfg_postorder().iter() {
+        let mut pos = FuncCursor::new(func).at_bottom(ebb);
+        while let Some(inst) = pos.prev_inst() {
+            {
+                let data = &pos.func.dfg[inst];
+                let opcode = data.opcode();
+                if trivially_unsafe_for_dce(opcode) ||
+                    is_load_with_defined_trapping(opcode, &data) ||
+                    any_inst_results_used(inst, &live, &pos.func.dfg)
+                {
+                    for arg in pos.func.dfg.inst_args(inst) {
+                        let v = pos.func.dfg.resolve_aliases(*arg);
+                        live[v.index()] = true;
+                    }
+                    continue;
+                }
+            }
+            pos.remove_inst();
+        }
+    }
+}
--- a/lib/codegen/src/divconst_magic_numbers.rs
+++ b/lib/codegen/src/divconst_magic_numbers.rs
@@ -0,0 +1,547 @@
+//! Compute "magic numbers" for division-by-constants transformations.
+//!
+//! Math helpers for division by (non-power-of-2) constants. This is based
+//! on the presentation in "Hacker's Delight" by Henry Warren, 2003. There
+//! are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size
+//! makes little difference, but the signed-vs-unsigned aspect has a large
+//! effect. Therefore everything is presented in the order U32 U64 S32 S64
+//! so as to emphasise the similarity of the U32 and U64 cases and the S32
+//! and S64 cases.
+
+#![allow(non_snake_case)]
+
+// Structures to hold the "magic numbers" computed.
+
+#[derive(PartialEq, Debug)]
+pub struct MU32 {
+    pub mulBy: u32,
+    pub doAdd: bool,
+    pub shiftBy: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MU64 {
+    pub mulBy: u64,
+    pub doAdd: bool,
+    pub shiftBy: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MS32 {
+    pub mulBy: i32,
+    pub shiftBy: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MS64 {
+    pub mulBy: i64,
+    pub shiftBy: i32,
+}
+
+// The actual "magic number" generators follow.
+
+pub fn magicU32(d: u32) -> MU32 {
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
+
+    let mut do_add: bool = false;
+    let mut p: i32 = 31;
+    let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d;
+    let mut q1: u32 = 0x80000000u32 / nc;
+    let mut r1: u32 = 0x80000000u32 - q1 * nc;
+    let mut q2: u32 = 0x7FFFFFFFu32 / d;
+    let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d;
+    loop {
+        p = p + 1;
+        if r1 >= nc - r1 {
+            q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1);
+            r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc);
+        } else {
+            q1 = 2 * q1;
+            r1 = 2 * r1;
+        }
+        if r2 + 1 >= d - r2 {
+            if q2 >= 0x7FFFFFFFu32 {
+                do_add = true;
+            }
+            q2 = 2 * q2 + 1;
+            r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d);
+        } else {
+            if q2 >= 0x80000000u32 {
+                do_add = true;
+            }
+            q2 = u32::wrapping_mul(2, q2);
+            r2 = 2 * r2 + 1;
+        }
+        let delta: u32 = d - 1 - r2;
+        if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) {
+            break;
+        }
+    }
+
+    MU32 {
+        mulBy: q2 + 1,
+        doAdd: do_add,
+        shiftBy: p - 32,
+    }
+}
+
+pub fn magicU64(d: u64) -> MU64 {
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
+
+    let mut do_add: bool = false;
+    let mut p: i32 = 63;
+    let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d;
+    let mut q1: u64 = 0x8000000000000000u64 / nc;
+    let mut r1: u64 = 0x8000000000000000u64 - q1 * nc;
+    let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d;
+    let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d;
+    loop {
+        p = p + 1;
+        if r1 >= nc - r1 {
+            q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1);
+            r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc);
+        } else {
+            q1 = 2 * q1;
+            r1 = 2 * r1;
+        }
+        if r2 + 1 >= d - r2 {
+            if q2 >= 0x7FFFFFFFFFFFFFFFu64 {
+                do_add = true;
+            }
+            q2 = 2 * q2 + 1;
+            r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d);
+        } else {
+            if q2 >= 0x8000000000000000u64 {
+                do_add = true;
+            }
+            q2 = u64::wrapping_mul(2, q2);
+            r2 = 2 * r2 + 1;
+        }
+        let delta: u64 = d - 1 - r2;
+        if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) {
+            break;
+        }
+    }
+
+    MU64 {
+        mulBy: q2 + 1,
+        doAdd: do_add,
+        shiftBy: p - 64,
+    }
+}
+
+pub fn magicS32(d: i32) -> MS32 {
+    debug_assert_ne!(d, -1);
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1);
+    let two31: u32 = 0x80000000u32;
+    let mut p: i32 = 31;
+    let ad: u32 = i32::wrapping_abs(d) as u32;
+    let t: u32 = two31 + ((d as u32) >> 31);
+    let anc: u32 = u32::wrapping_sub(t - 1, t % ad);
+    let mut q1: u32 = two31 / anc;
+    let mut r1: u32 = two31 - q1 * anc;
+    let mut q2: u32 = two31 / ad;
+    let mut r2: u32 = two31 - q2 * ad;
+    loop {
+        p = p + 1;
+        q1 = 2 * q1;
+        r1 = 2 * r1;
+        if r1 >= anc {
+            q1 = q1 + 1;
+            r1 = r1 - anc;
+        }
+        q2 = 2 * q2;
+        r2 = 2 * r2;
+        if r2 >= ad {
+            q2 = q2 + 1;
+            r2 = r2 - ad;
+        }
+        let delta: u32 = ad - r2;
+        if !(q1 < delta || (q1 == delta && r1 == 0)) {
+            break;
+        }
+    }
+
+    MS32 {
+        mulBy: (if d < 0 {
+                    u32::wrapping_neg(q2 + 1)
+                } else {
+                    q2 + 1
+                }) as i32,
+        shiftBy: p - 32,
+    }
+}
+
+pub fn magicS64(d: i64) -> MS64 {
+    debug_assert_ne!(d, -1);
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1);
+    let two63: u64 = 0x8000000000000000u64;
+    let mut p: i32 = 63;
+    let ad: u64 = i64::wrapping_abs(d) as u64;
+    let t: u64 = two63 + ((d as u64) >> 63);
+    let anc: u64 = u64::wrapping_sub(t - 1, t % ad);
+    let mut q1: u64 = two63 / anc;
+    let mut r1: u64 = two63 - q1 * anc;
+    let mut q2: u64 = two63 / ad;
+    let mut r2: u64 = two63 - q2 * ad;
+    loop {
+        p = p + 1;
+        q1 = 2 * q1;
+        r1 = 2 * r1;
+        if r1 >= anc {
+            q1 = q1 + 1;
+            r1 = r1 - anc;
+        }
+        q2 = 2 * q2;
+        r2 = 2 * r2;
+        if r2 >= ad {
+            q2 = q2 + 1;
+            r2 = r2 - ad;
+        }
+        let delta: u64 = ad - r2;
+        if !(q1 < delta || (q1 == delta && r1 == 0)) {
+            break;
+        }
+    }
+
+    MS64 {
+        mulBy: (if d < 0 {
+                    u64::wrapping_neg(q2 + 1)
+                } else {
+                    q2 + 1
+                }) as i64,
+        shiftBy: p - 64,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{MS32, MS64, MU32, MU64};
+    use super::{magicS32, magicS64, magicU32, magicU64};
+
+    fn mkMU32(mulBy: u32, doAdd: bool, shiftBy: i32) -> MU32 {
+        MU32 {
+            mulBy,
+            doAdd,
+            shiftBy,
+        }
+    }
+
+    fn mkMU64(mulBy: u64, doAdd: bool, shiftBy: i32) -> MU64 {
+        MU64 {
+            mulBy,
+            doAdd,
+            shiftBy,
+        }
+    }
+
+    fn mkMS32(mulBy: i32, shiftBy: i32) -> MS32 {
+        MS32 { mulBy, shiftBy }
+    }
+
+    fn mkMS64(mulBy: i64, shiftBy: i32) -> MS64 {
+        MS64 { mulBy, shiftBy }
+    }
+
+    #[test]
+    fn test_magicU32() {
+        assert_eq!(magicU32(2u32), mkMU32(0x80000000u32, false, 0));
+        assert_eq!(magicU32(3u32), mkMU32(0xaaaaaaabu32, false, 1));
+        assert_eq!(magicU32(4u32), mkMU32(0x40000000u32, false, 0));
+        assert_eq!(magicU32(5u32), mkMU32(0xcccccccdu32, false, 2));
+        assert_eq!(magicU32(6u32), mkMU32(0xaaaaaaabu32, false, 2));
+        assert_eq!(magicU32(7u32), mkMU32(0x24924925u32, true, 3));
+        assert_eq!(magicU32(9u32), mkMU32(0x38e38e39u32, false, 1));
+        assert_eq!(magicU32(10u32), mkMU32(0xcccccccdu32, false, 3));
+        assert_eq!(magicU32(11u32), mkMU32(0xba2e8ba3u32, false, 3));
+        assert_eq!(magicU32(12u32), mkMU32(0xaaaaaaabu32, false, 3));
+        assert_eq!(magicU32(25u32), mkMU32(0x51eb851fu32, false, 3));
+        assert_eq!(magicU32(125u32), mkMU32(0x10624dd3u32, false, 3));
+        assert_eq!(magicU32(625u32), mkMU32(0xd1b71759u32, false, 9));
+        assert_eq!(magicU32(1337u32), mkMU32(0x88233b2bu32, true, 11));
+        assert_eq!(magicU32(65535u32), mkMU32(0x80008001u32, false, 15));
+        assert_eq!(magicU32(65536u32), mkMU32(0x00010000u32, false, 0));
+        assert_eq!(magicU32(65537u32), mkMU32(0xffff0001u32, false, 16));
+        assert_eq!(magicU32(31415927u32), mkMU32(0x445b4553u32, false, 23));
+        assert_eq!(magicU32(0xdeadbeefu32), mkMU32(0x93275ab3u32, false, 31));
+        assert_eq!(magicU32(0xfffffffdu32), mkMU32(0x40000001u32, false, 30));
+        assert_eq!(magicU32(0xfffffffeu32), mkMU32(0x00000003u32, true, 32));
+        assert_eq!(magicU32(0xffffffffu32), mkMU32(0x80000001u32, false, 31));
+    }
+    #[test]
+    fn test_magicU64() {
+        assert_eq!(magicU64(2u64), mkMU64(0x8000000000000000u64, false, 0));
+        assert_eq!(magicU64(3u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 1));
+        assert_eq!(magicU64(4u64), mkMU64(0x4000000000000000u64, false, 0));
+        assert_eq!(magicU64(5u64), mkMU64(0xcccccccccccccccdu64, false, 2));
+        assert_eq!(magicU64(6u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 2));
+        assert_eq!(magicU64(7u64), mkMU64(0x2492492492492493u64, true, 3));
+        assert_eq!(magicU64(9u64), mkMU64(0xe38e38e38e38e38fu64, false, 3));
+        assert_eq!(magicU64(10u64), mkMU64(0xcccccccccccccccdu64, false, 3));
+        assert_eq!(magicU64(11u64), mkMU64(0x2e8ba2e8ba2e8ba3u64, false, 1));
+        assert_eq!(magicU64(12u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 3));
+        assert_eq!(magicU64(25u64), mkMU64(0x47ae147ae147ae15u64, true, 5));
+        assert_eq!(magicU64(125u64), mkMU64(0x0624dd2f1a9fbe77u64, true, 7));
+        assert_eq!(magicU64(625u64), mkMU64(0x346dc5d63886594bu64, false, 7));
+        assert_eq!(magicU64(1337u64), mkMU64(0xc4119d952866a139u64, false, 10));
+        assert_eq!(
+            magicU64(31415927u64),
+            mkMU64(0x116d154b9c3d2f85u64, true, 25)
+        );
+        assert_eq!(
+            magicU64(0x00000000deadbeefu64),
+            mkMU64(0x93275ab2dfc9094bu64, false, 31)
+        );
+        assert_eq!(
+            magicU64(0x00000000fffffffdu64),
+            mkMU64(0x8000000180000005u64, false, 31)
+        );
+        assert_eq!(
+            magicU64(0x00000000fffffffeu64),
+            mkMU64(0x0000000200000005u64, true, 32)
+        );
+        assert_eq!(
+            magicU64(0x00000000ffffffffu64),
+            mkMU64(0x8000000080000001u64, false, 31)
+        );
+        assert_eq!(
+            magicU64(0x0000000100000000u64),
+            mkMU64(0x0000000100000000u64, false, 0)
+        );
+        assert_eq!(
+            magicU64(0x0000000100000001u64),
+            mkMU64(0xffffffff00000001u64, false, 32)
+        );
+        assert_eq!(
+            magicU64(0x0ddc0ffeebadf00du64),
+            mkMU64(0x2788e9d394b77da1u64, true, 60)
+        );
+        assert_eq!(
+            magicU64(0xfffffffffffffffdu64),
+            mkMU64(0x4000000000000001u64, false, 62)
+        );
+        assert_eq!(
+            magicU64(0xfffffffffffffffeu64),
+            mkMU64(0x0000000000000003u64, true, 64)
+        );
+        assert_eq!(
+            magicU64(0xffffffffffffffffu64),
+            mkMU64(0x8000000000000001u64, false, 63)
+        );
+    }
+    #[test]
+    fn test_magicS32() {
+        assert_eq!(magicS32(-0x80000000i32), mkMS32(0x7fffffffu32 as i32, 30));
+        assert_eq!(magicS32(-0x7FFFFFFFi32), mkMS32(0xbfffffffu32 as i32, 29));
+        assert_eq!(magicS32(-0x7FFFFFFEi32), mkMS32(0x7ffffffdu32 as i32, 30));
+        assert_eq!(magicS32(-31415927i32), mkMS32(0xbba4baadu32 as i32, 23));
+        assert_eq!(magicS32(-1337i32), mkMS32(0x9df73135u32 as i32, 9));
+        assert_eq!(magicS32(-256i32), mkMS32(0x7fffffffu32 as i32, 7));
+        assert_eq!(magicS32(-5i32), mkMS32(0x99999999u32 as i32, 1));
+        assert_eq!(magicS32(-3i32), mkMS32(0x55555555u32 as i32, 1));
+        assert_eq!(magicS32(-2i32), mkMS32(0x7fffffffu32 as i32, 0));
+        assert_eq!(magicS32(2i32), mkMS32(0x80000001u32 as i32, 0));
+        assert_eq!(magicS32(3i32), mkMS32(0x55555556u32 as i32, 0));
+        assert_eq!(magicS32(4i32), mkMS32(0x80000001u32 as i32, 1));
+        assert_eq!(magicS32(5i32), mkMS32(0x66666667u32 as i32, 1));
+        assert_eq!(magicS32(6i32), mkMS32(0x2aaaaaabu32 as i32, 0));
+        assert_eq!(magicS32(7i32), mkMS32(0x92492493u32 as i32, 2));
+        assert_eq!(magicS32(9i32), mkMS32(0x38e38e39u32 as i32, 1));
+        assert_eq!(magicS32(10i32), mkMS32(0x66666667u32 as i32, 2));
+        assert_eq!(magicS32(11i32), mkMS32(0x2e8ba2e9u32 as i32, 1));
+        assert_eq!(magicS32(12i32), mkMS32(0x2aaaaaabu32 as i32, 1));
+        assert_eq!(magicS32(25i32), mkMS32(0x51eb851fu32 as i32, 3));
+        assert_eq!(magicS32(125i32), mkMS32(0x10624dd3u32 as i32, 3));
+        assert_eq!(magicS32(625i32), mkMS32(0x68db8badu32 as i32, 8));
+        assert_eq!(magicS32(1337i32), mkMS32(0x6208cecbu32 as i32, 9));
+        assert_eq!(magicS32(31415927i32), mkMS32(0x445b4553u32 as i32, 23));
+        assert_eq!(magicS32(0x7ffffffei32), mkMS32(0x80000003u32 as i32, 30));
+        assert_eq!(magicS32(0x7fffffffi32), mkMS32(0x40000001u32 as i32, 29));
+    }
+    #[test]
+    fn test_magicS64() {
+        assert_eq!(
+            magicS64(-0x8000000000000000i64),
+            mkMS64(0x7fffffffffffffffu64 as i64, 62)
+        );
+        assert_eq!(
+            magicS64(-0x7FFFFFFFFFFFFFFFi64),
+            mkMS64(0xbfffffffffffffffu64 as i64, 61)
+        );
+        assert_eq!(
+            magicS64(-0x7FFFFFFFFFFFFFFEi64),
+            mkMS64(0x7ffffffffffffffdu64 as i64, 62)
+        );
+        assert_eq!(
+            magicS64(-0x0ddC0ffeeBadF00di64),
+            mkMS64(0x6c3b8b1635a4412fu64 as i64, 59)
+        );
+        assert_eq!(
+            magicS64(-0x100000001i64),
+            mkMS64(0x800000007fffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0x100000000i64),
+            mkMS64(0x7fffffffffffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xFFFFFFFFi64),
+            mkMS64(0x7fffffff7fffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xFFFFFFFEi64),
+            mkMS64(0x7ffffffefffffffdu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xFFFFFFFDi64),
+            mkMS64(0x7ffffffe7ffffffbu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xDeadBeefi64),
+            mkMS64(0x6cd8a54d2036f6b5u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-31415927i64),
+            mkMS64(0x7749755a31e1683du64 as i64, 24)
+        );
+        assert_eq!(magicS64(-1337i64), mkMS64(0x9df731356bccaf63u64 as i64, 9));
+        assert_eq!(magicS64(-256i64), mkMS64(0x7fffffffffffffffu64 as i64, 7));
+        assert_eq!(magicS64(-5i64), mkMS64(0x9999999999999999u64 as i64, 1));
+        assert_eq!(magicS64(-3i64), mkMS64(0x5555555555555555u64 as i64, 1));
+        assert_eq!(magicS64(-2i64), mkMS64(0x7fffffffffffffffu64 as i64, 0));
+        assert_eq!(magicS64(2i64), mkMS64(0x8000000000000001u64 as i64, 0));
+        assert_eq!(magicS64(3i64), mkMS64(0x5555555555555556u64 as i64, 0));
+        assert_eq!(magicS64(4i64), mkMS64(0x8000000000000001u64 as i64, 1));
+        assert_eq!(magicS64(5i64), mkMS64(0x6666666666666667u64 as i64, 1));
+        assert_eq!(magicS64(6i64), mkMS64(0x2aaaaaaaaaaaaaabu64 as i64, 0));
+        assert_eq!(magicS64(7i64), mkMS64(0x4924924924924925u64 as i64, 1));
+        assert_eq!(magicS64(9i64), mkMS64(0x1c71c71c71c71c72u64 as i64, 0));
+        assert_eq!(magicS64(10i64), mkMS64(0x6666666666666667u64 as i64, 2));
+        assert_eq!(magicS64(11i64), mkMS64(0x2e8ba2e8ba2e8ba3u64 as i64, 1));
+        assert_eq!(magicS64(12i64), mkMS64(0x2aaaaaaaaaaaaaabu64 as i64, 1));
+        assert_eq!(magicS64(25i64), mkMS64(0xa3d70a3d70a3d70bu64 as i64, 4));
+        assert_eq!(magicS64(125i64), mkMS64(0x20c49ba5e353f7cfu64 as i64, 4));
+        assert_eq!(magicS64(625i64), mkMS64(0x346dc5d63886594bu64 as i64, 7));
+        assert_eq!(magicS64(1337i64), mkMS64(0x6208ceca9433509du64 as i64, 9));
+        assert_eq!(
+            magicS64(31415927i64),
+            mkMS64(0x88b68aa5ce1e97c3u64 as i64, 24)
+        );
+        assert_eq!(
+            magicS64(0x00000000deadbeefi64),
+            mkMS64(0x93275ab2dfc9094bu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x00000000fffffffdi64),
+            mkMS64(0x8000000180000005u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x00000000fffffffei64),
+            mkMS64(0x8000000100000003u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x00000000ffffffffi64),
+            mkMS64(0x8000000080000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x0000000100000000i64),
+            mkMS64(0x8000000000000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x0000000100000001i64),
+            mkMS64(0x7fffffff80000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x0ddc0ffeebadf00di64),
+            mkMS64(0x93c474e9ca5bbed1u64 as i64, 59)
+        );
+        assert_eq!(
+            magicS64(0x7ffffffffffffffdi64),
+            mkMS64(0x2000000000000001u64 as i64, 60)
+        );
+        assert_eq!(
+            magicS64(0x7ffffffffffffffei64),
+            mkMS64(0x8000000000000003u64 as i64, 62)
+        );
+        assert_eq!(
+            magicS64(0x7fffffffffffffffi64),
+            mkMS64(0x4000000000000001u64 as i64, 61)
+        );
+    }
+    #[test]
+    fn test_magic_generators_dont_panic() {
+        // The point of this is to check that the magic number generators
+        // don't panic with integer wraparounds, especially at boundary
+        // cases for their arguments. The actual results are thrown away.
+        let mut total: u64 = 0;
+        // Testing UP magicU32
+        for x in 2..(200 * 1000u32) {
+            let m = magicU32(x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+        assert_eq!(total, 1747815691);
+        // Testing DOWN magicU32
+        for x in 0..(200 * 1000u32) {
+            let m = magicU32(0xFFFF_FFFFu32 - x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+        assert_eq!(total, 2210292772);
+
+        // Testing UP magicU64
+        for x in 2..(200 * 1000u64) {
+            let m = magicU64(x);
+            total = total ^ m.mulBy;
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+        assert_eq!(total, 7430004084791260605);
+        // Testing DOWN magicU64
+        for x in 0..(200 * 1000u64) {
+            let m = magicU64(0xFFFF_FFFF_FFFF_FFFFu64 - x);
+            total = total ^ m.mulBy;
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+        assert_eq!(total, 7547519887519825919);
+
+        // Testing UP magicS32
+        for x in 0..(200 * 1000i32) {
+            let m = magicS32(-0x8000_0000i32 + x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+        assert_eq!(total, 10899224186731671235);
+        // Testing DOWN magicS32
+        for x in 0..(200 * 1000i32) {
+            let m = magicS32(0x7FFF_FFFFi32 - x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+        assert_eq!(total, 7547519887517897369);
+
+        // Testing UP magicS64
+        for x in 0..(200 * 1000i64) {
+            let m = magicS64(-0x8000_0000_0000_0000i64 + x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+        assert_eq!(total, 8029756891368555163);
+        // Testing DOWN magicS64
+        for x in 0..(200 * 1000i64) {
+            let m = magicS64(0x7FFF_FFFF_FFFF_FFFFi64 - x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+        // Force `total` -- and hence, the entire computation -- to
+        // be used, so that rustc can't optimise it out.
+        assert_eq!(total, 7547519887532559585u64);
+    }
+}
--- a/lib/codegen/src/dominator_tree.rs
+++ b/lib/codegen/src/dominator_tree.rs
@@ -0,0 +1,935 @@
+//! A Dominator Tree represented as mappings of Ebbs to their immediate dominator.
+
+use entity::EntityMap;
+use flowgraph::{BasicBlock, ControlFlowGraph};
+use ir::instructions::BranchInfo;
+use ir::{Ebb, ExpandedProgramPoint, Function, Inst, Layout, ProgramOrder, Value};
+use packed_option::PackedOption;
+use std::cmp;
+use std::cmp::Ordering;
+use std::mem;
+use std::vec::Vec;
+use timing;
+
+/// RPO numbers are not first assigned in a contiguous way but as multiples of STRIDE, to leave
+/// room for modifications of the dominator tree.
+const STRIDE: u32 = 4;
+
+/// Special RPO numbers used during `compute_postorder`.
+const DONE: u32 = 1;
+const SEEN: u32 = 2;
+
+/// Dominator tree node. We keep one of these per EBB.
+#[derive(Clone, Default)]
+struct DomNode {
+    /// Number of this node in a reverse post-order traversal of the CFG, starting from 1.
+    /// This number is monotonic in the reverse postorder but not contiguous, since we leave
+    /// holes for later localized modifications of the dominator tree.
+    /// Unreachable nodes get number 0, all others are positive.
+    rpo_number: u32,
+
+    /// The immediate dominator of this EBB, represented as the branch or jump instruction at the
+    /// end of the dominating basic block.
+    ///
+    /// This is `None` for unreachable blocks and the entry block which doesn't have an immediate
+    /// dominator.
+    idom: PackedOption<Inst>,
+}
+
+/// The dominator tree for a single function.
+pub struct DominatorTree {
+    nodes: EntityMap<Ebb, DomNode>,
+
+    /// CFG post-order of all reachable EBBs.
+    postorder: Vec<Ebb>,
+
+    /// Scratch memory used by `compute_postorder()`.
+    stack: Vec<Ebb>,
+
+    valid: bool,
+}
+
+/// Methods for querying the dominator tree.
+impl DominatorTree {
+    /// Is `ebb` reachable from the entry block?
+    pub fn is_reachable(&self, ebb: Ebb) -> bool {
+        self.nodes[ebb].rpo_number != 0
+    }
+
+    /// Get the CFG post-order of EBBs that was used to compute the dominator tree.
+    ///
+    /// Note that this post-order is not updated automatically when the CFG is modified. It is
+    /// computed from scratch and cached by `compute()`.
+    pub fn cfg_postorder(&self) -> &[Ebb] {
+        debug_assert!(self.is_valid());
+        &self.postorder
+    }
+
+    /// Returns the immediate dominator of `ebb`.
+    ///
+    /// The immediate dominator of an extended basic block is a basic block which we represent by
+    /// the branch or jump instruction at the end of the basic block. This does not have to be the
+    /// terminator of its EBB.
+    ///
+    /// A branch or jump is said to *dominate* `ebb` if all control flow paths from the function
+    /// entry to `ebb` must go through the branch.
+    ///
+    /// The *immediate dominator* is the dominator that is closest to `ebb`. All other dominators
+    /// also dominate the immediate dominator.
+    ///
+    /// This returns `None` if `ebb` is not reachable from the entry EBB, or if it is the entry EBB
+    /// which has no dominators.
+    pub fn idom(&self, ebb: Ebb) -> Option<Inst> {
+        self.nodes[ebb].idom.into()
+    }
+
+    /// Compare two EBBs relative to the reverse post-order.
+    fn rpo_cmp_ebb(&self, a: Ebb, b: Ebb) -> Ordering {
+        self.nodes[a].rpo_number.cmp(&self.nodes[b].rpo_number)
+    }
+
+    /// Compare two program points relative to a reverse post-order traversal of the control-flow
+    /// graph.
+    ///
+    /// Return `Ordering::Less` if `a` comes before `b` in the RPO.
+    ///
+    /// If `a` and `b` belong to the same EBB, compare their relative position in the EBB.
+    pub fn rpo_cmp<A, B>(&self, a: A, b: B, layout: &Layout) -> Ordering
+    where
+        A: Into<ExpandedProgramPoint>,
+        B: Into<ExpandedProgramPoint>,
+    {
+        let a = a.into();
+        let b = b.into();
+        self.rpo_cmp_ebb(layout.pp_ebb(a), layout.pp_ebb(b)).then(
+            layout.cmp(a, b),
+        )
+    }
+
+    /// Returns `true` if `a` dominates `b`.
+    ///
+    /// This means that every control-flow path from the function entry to `b` must go through `a`.
+    ///
+    /// Dominance is ill defined for unreachable blocks. This function can always determine
+    /// dominance for instructions in the same EBB, but otherwise returns `false` if either block
+    /// is unreachable.
+    ///
+    /// An instruction is considered to dominate itself.
+    pub fn dominates<A, B>(&self, a: A, b: B, layout: &Layout) -> bool
+    where
+        A: Into<ExpandedProgramPoint>,
+        B: Into<ExpandedProgramPoint>,
+    {
+        let a = a.into();
+        let b = b.into();
+        match a {
+            ExpandedProgramPoint::Ebb(ebb_a) => {
+                a == b || self.last_dominator(ebb_a, b, layout).is_some()
+            }
+            ExpandedProgramPoint::Inst(inst_a) => {
+                let ebb_a = layout.inst_ebb(inst_a).expect("Instruction not in layout.");
+                match self.last_dominator(ebb_a, b, layout) {
+                    Some(last) => layout.cmp(inst_a, last) != Ordering::Greater,
+                    None => false,
+                }
+            }
+        }
+    }
+
+    /// Find the last instruction in `a` that dominates `b`.
+    /// If no instructions in `a` dominate `b`, return `None`.
+    pub fn last_dominator<B>(&self, a: Ebb, b: B, layout: &Layout) -> Option<Inst>
+    where
+        B: Into<ExpandedProgramPoint>,
+    {
+        let (mut ebb_b, mut inst_b) = match b.into() {
+            ExpandedProgramPoint::Ebb(ebb) => (ebb, None),
+            ExpandedProgramPoint::Inst(inst) => (
+                layout.inst_ebb(inst).expect(
+                    "Instruction not in layout.",
+                ),
+                Some(inst),
+            ),
+        };
+        let rpo_a = self.nodes[a].rpo_number;
+
+        // Run a finger up the dominator tree from b until we see a.
+        // Do nothing if b is unreachable.
+        while rpo_a < self.nodes[ebb_b].rpo_number {
+            let idom = match self.idom(ebb_b) {
+                Some(idom) => idom,
+                None => return None, // a is unreachable, so we climbed past the entry
+            };
+            ebb_b = layout.inst_ebb(idom).expect("Dominator got removed.");
+            inst_b = Some(idom);
+        }
+        if a == ebb_b { inst_b } else { None }
+    }
+
+    /// Compute the common dominator of two basic blocks.
+    ///
+    /// Both basic blocks are assumed to be reachable.
+    pub fn common_dominator(
+        &self,
+        mut a: BasicBlock,
+        mut b: BasicBlock,
+        layout: &Layout,
+    ) -> BasicBlock {
+        loop {
+            match self.rpo_cmp_ebb(a.0, b.0) {
+                Ordering::Less => {
+                    // `a` comes before `b` in the RPO. Move `b` up.
+                    let idom = self.nodes[b.0].idom.expect("Unreachable basic block?");
+                    b = (
+                        layout.inst_ebb(idom).expect("Dangling idom instruction"),
+                        idom,
+                    );
+                }
+                Ordering::Greater => {
+                    // `b` comes before `a` in the RPO. Move `a` up.
+                    let idom = self.nodes[a.0].idom.expect("Unreachable basic block?");
+                    a = (
+                        layout.inst_ebb(idom).expect("Dangling idom instruction"),
+                        idom,
+                    );
+                }
+                Ordering::Equal => break,
+            }
+        }
+
+        debug_assert_eq!(a.0, b.0, "Unreachable block passed to common_dominator?");
+
+        // We're in the same EBB. The common dominator is the earlier instruction.
+        if layout.cmp(a.1, b.1) == Ordering::Less {
+            a
+        } else {
+            b
+        }
+    }
+}
+
+impl DominatorTree {
+    /// Allocate a new blank dominator tree. Use `compute` to compute the dominator tree for a
+    /// function.
+    pub fn new() -> Self {
+        Self {
+            nodes: EntityMap::new(),
+            postorder: Vec::new(),
+            stack: Vec::new(),
+            valid: false,
+        }
+    }
+
+    /// Allocate and compute a dominator tree.
+    pub fn with_function(func: &Function, cfg: &ControlFlowGraph) -> Self {
+        let mut domtree = Self::new();
+        domtree.compute(func, cfg);
+        domtree
+    }
+
+    /// Reset and compute a CFG post-order and dominator tree.
+    pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) {
+        let _tt = timing::domtree();
+        debug_assert!(cfg.is_valid());
+        self.compute_postorder(func);
+        self.compute_domtree(func, cfg);
+        self.valid = true;
+    }
+
+    /// Clear the data structures used to represent the dominator tree. This will leave the tree in
+    /// a state where `is_valid()` returns false.
+    pub fn clear(&mut self) {
+        self.nodes.clear();
+        self.postorder.clear();
+        debug_assert!(self.stack.is_empty());
+        self.valid = false;
+    }
+
+    /// Check if the dominator tree is in a valid state.
+    ///
+    /// Note that this doesn't perform any kind of validity checks. It simply checks if the
+    /// `compute()` method has been called since the last `clear()`. It does not check that the
+    /// dominator tree is consistent with the CFG.
+    pub fn is_valid(&self) -> bool {
+        self.valid
+    }
+
+    /// Reset all internal data structures and compute a post-order of the control flow graph.
+    ///
+    /// This leaves `rpo_number == 1` for all reachable EBBs, 0 for unreachable ones.
+    fn compute_postorder(&mut self, func: &Function) {
+        self.clear();
+        self.nodes.resize(func.dfg.num_ebbs());
+
+        // This algorithm is a depth first traversal (DFT) of the control flow graph, computing a
+        // post-order of the EBBs that are reachable form the entry block. A DFT post-order is not
+        // unique. The specific order we get is controlled by two factors:
+        //
+        // 1. The order each node's children are visited, and
+        // 2. The method used for pruning graph edges to get a tree.
+        //
+        // There are two ways of viewing the CFG as a graph:
+        //
+        // 1. Each EBB is a node, with outgoing edges for all the branches in the EBB>
+        // 2. Each basic block is a node, with outgoing edges for the single branch at the end of
+        //    the BB. (An EBB is a linear sequence of basic blocks).
+        //
+        // The first graph is a contraction of the second one. We want to compute an EBB post-order
+        // that is compatible both graph interpretations. That is, if you compute a BB post-order
+        // and then remove those BBs that do not correspond to EBB headers, you get a post-order of
+        // the EBB graph.
+        //
+        // Node child order:
+        //
+        //     In the BB graph, we always go down the fall-through path first and follow the branch
+        //     destination second.
+        //
+        //     In the EBB graph, this is equivalent to visiting EBB successors in a bottom-up
+        //     order, starting from the destination of the EBB's terminating jump, ending at the
+        //     destination of the first branch in the EBB.
+        //
+        // Edge pruning:
+        //
+        //     In the BB graph, we keep an edge to an EBB the first time we visit the *source* side
+        //     of the edge. Any subsequent edges to the same EBB are pruned.
+        //
+        //     The equivalent tree is reached in the EBB graph by keeping the first edge to an EBB
+        //     in a top-down traversal of the successors. (And then visiting edges in a bottom-up
+        //     order).
+        //
+        // This pruning method makes it possible to compute the DFT without storing lots of
+        // information about the progress through an EBB.
+
+        // During this algorithm only, use `rpo_number` to hold the following state:
+        //
+        //   0:    EBB has not yet been reached in the pre-order.
+        //   SEEN: EBB has been pushed on the stack but successors not yet pushed.
+        //   DONE: Successors pushed.
+
+        match func.layout.entry_block() {
+            Some(ebb) => {
+                self.stack.push(ebb);
+                self.nodes[ebb].rpo_number = SEEN;
+            }
+            None => return,
+        }
+
+        while let Some(ebb) = self.stack.pop() {
+            match self.nodes[ebb].rpo_number {
+                SEEN => {
+                    // This is the first time we pop the EBB, so we need to scan its successors and
+                    // then revisit it.
+                    self.nodes[ebb].rpo_number = DONE;
+                    self.stack.push(ebb);
+                    self.push_successors(func, ebb);
+                }
+                DONE => {
+                    // This is the second time we pop the EBB, so all successors have been
+                    // processed.
+                    self.postorder.push(ebb);
+                }
+                _ => unreachable!(),
+            }
+        }
+    }
+
+    /// Push `ebb` successors onto `self.stack`, filtering out those that have already been seen.
+    ///
+    /// The successors are pushed in program order which is important to get a split-invariant
+    /// post-order. Split-invariant means that if an EBB is split in two, we get the same
+    /// post-order except for the insertion of the new EBB header at the split point.
+    fn push_successors(&mut self, func: &Function, ebb: Ebb) {
+        for inst in func.layout.ebb_insts(ebb) {
+            match func.dfg.analyze_branch(inst) {
+                BranchInfo::SingleDest(succ, _) => {
+                    if self.nodes[succ].rpo_number == 0 {
+                        self.nodes[succ].rpo_number = SEEN;
+                        self.stack.push(succ);
+                    }
+                }
+                BranchInfo::Table(jt) => {
+                    for (_, succ) in func.jump_tables[jt].entries() {
+                        if self.nodes[succ].rpo_number == 0 {
+                            self.nodes[succ].rpo_number = SEEN;
+                            self.stack.push(succ);
+                        }
+                    }
+                }
+                BranchInfo::NotABranch => {}
+            }
+        }
+    }
+
+    /// Build a dominator tree from a control flow graph using Keith D. Cooper's
+    /// "Simple, Fast Dominator Algorithm."
+    fn compute_domtree(&mut self, func: &Function, cfg: &ControlFlowGraph) {
+        // During this algorithm, `rpo_number` has the following values:
+        //
+        // 0: EBB is not reachable.
+        // 1: EBB is reachable, but has not yet been visited during the first pass. This is set by
+        // `compute_postorder`.
+        // 2+: EBB is reachable and has an assigned RPO number.
+
+        // We'll be iterating over a reverse post-order of the CFG, skipping the entry block.
+        let (entry_block, postorder) = match self.postorder.as_slice().split_last() {
+            Some((&eb, rest)) => (eb, rest),
+            None => return,
+        };
+        debug_assert_eq!(Some(entry_block), func.layout.entry_block());
+
+        // Do a first pass where we assign RPO numbers to all reachable nodes.
+        self.nodes[entry_block].rpo_number = 2 * STRIDE;
+        for (rpo_idx, &ebb) in postorder.iter().rev().enumerate() {
+            // Update the current node and give it an RPO number.
+            // The entry block got 2, the rest start at 3 by multiples of STRIDE to leave
+            // room for future dominator tree modifications.
+            //
+            // Since `compute_idom` will only look at nodes with an assigned RPO number, the
+            // function will never see an uninitialized predecessor.
+            //
+            // Due to the nature of the post-order traversal, every node we visit will have at
+            // least one predecessor that has previously been visited during this RPO.
+            self.nodes[ebb] = DomNode {
+                idom: self.compute_idom(ebb, cfg, &func.layout).into(),
+                rpo_number: (rpo_idx as u32 + 3) * STRIDE,
+            }
+        }
+
+        // Now that we have RPO numbers for everything and initial immediate dominator estimates,
+        // iterate until convergence.
+        //
+        // If the function is free of irreducible control flow, this will exit after one iteration.
+        let mut changed = true;
+        while changed {
+            changed = false;
+            for &ebb in postorder.iter().rev() {
+                let idom = self.compute_idom(ebb, cfg, &func.layout).into();
+                if self.nodes[ebb].idom != idom {
+                    self.nodes[ebb].idom = idom;
+                    changed = true;
+                }
+            }
+        }
+    }
+
+    // Compute the immediate dominator for `ebb` using the current `idom` states for the reachable
+    // nodes.
+    fn compute_idom(&self, ebb: Ebb, cfg: &ControlFlowGraph, layout: &Layout) -> Inst {
+        // Get an iterator with just the reachable, already visited predecessors to `ebb`.
+        // Note that during the first pass, `rpo_number` is 1 for reachable blocks that haven't
+        // been visited yet, 0 for unreachable blocks.
+        let mut reachable_preds = cfg.pred_iter(ebb).filter(|&(pred, _)| {
+            self.nodes[pred].rpo_number > 1
+        });
+
+        // The RPO must visit at least one predecessor before this node.
+        let mut idom = reachable_preds.next().expect(
+            "EBB node must have one reachable predecessor",
+        );
+
+        for pred in reachable_preds {
+            idom = self.common_dominator(idom, pred, layout);
+        }
+
+        idom.1
+    }
+}
+
+impl DominatorTree {
+    /// When splitting an `Ebb` using `Layout::split_ebb`, you can use this method to update
+    /// the dominator tree locally rather than recomputing it.
+    ///
+    /// `old_ebb` is the `Ebb` before splitting, and `new_ebb` is the `Ebb` which now contains
+    /// the second half of `old_ebb`. `split_jump_inst` is the terminator jump instruction of
+    /// `old_ebb` that points to `new_ebb`.
+    pub fn recompute_split_ebb(&mut self, old_ebb: Ebb, new_ebb: Ebb, split_jump_inst: Inst) {
+        if !self.is_reachable(old_ebb) {
+            // old_ebb is unreachable, it stays so and new_ebb is unreachable too
+            self.nodes[new_ebb] = Default::default();
+            return;
+        }
+        // We use the RPO comparison on the postorder list so we invert the operands of the
+        // comparison
+        let old_ebb_postorder_index =
+            self.postorder
+                .as_slice()
+                .binary_search_by(|probe| self.rpo_cmp_ebb(old_ebb, *probe))
+                .expect("the old ebb is not declared to the dominator tree");
+        let new_ebb_rpo = self.insert_after_rpo(old_ebb, old_ebb_postorder_index, new_ebb);
+        self.nodes[new_ebb] = DomNode {
+            rpo_number: new_ebb_rpo,
+            idom: Some(split_jump_inst).into(),
+        };
+    }
+
+    // Insert new_ebb just after ebb in the RPO. This function checks
+    // if there is a gap in rpo numbers; if yes it returns the number in the gap and if
+    // not it renumbers.
+    fn insert_after_rpo(&mut self, ebb: Ebb, ebb_postorder_index: usize, new_ebb: Ebb) -> u32 {
+        let ebb_rpo_number = self.nodes[ebb].rpo_number;
+        let inserted_rpo_number = ebb_rpo_number + 1;
+        // If there is no gaps in RPo numbers to insert this new number, we iterate
+        // forward in RPO numbers and backwards in the postorder list of EBBs, renumbering the Ebbs
+        // until we find a gap
+        for (&current_ebb, current_rpo) in
+            self.postorder[0..ebb_postorder_index].iter().rev().zip(
+                inserted_rpo_number +
+                    1..,
+            )
+        {
+            if self.nodes[current_ebb].rpo_number < current_rpo {
+                // There is no gap, we renumber
+                self.nodes[current_ebb].rpo_number = current_rpo;
+            } else {
+                // There is a gap, we stop the renumbering and exit
+                break;
+            }
+        }
+        // TODO: insert in constant time?
+        self.postorder.insert(ebb_postorder_index, new_ebb);
+        inserted_rpo_number
+    }
+}
+
+/// Optional pre-order information that can be computed for a dominator tree.
+///
+/// This data structure is computed from a `DominatorTree` and provides:
+///
+/// - A forward traversable dominator tree through the `children()` iterator.
+/// - An ordering of EBBs according to a dominator tree pre-order.
+/// - Constant time dominance checks at the EBB granularity.
+///
+/// The information in this auxillary data structure is not easy to update when the control flow
+/// graph changes, which is why it is kept separate.
+pub struct DominatorTreePreorder {
+    nodes: EntityMap<Ebb, ExtraNode>,
+
+    // Scratch memory used by `compute_postorder()`.
+    stack: Vec<Ebb>,
+}
+
+#[derive(Default, Clone)]
+struct ExtraNode {
+    /// First child node in the domtree.
+    child: PackedOption<Ebb>,
+
+    /// Next sibling node in the domtree. This linked list is ordered according to the CFG RPO.
+    sibling: PackedOption<Ebb>,
+
+    /// Sequence number for this node in a pre-order traversal of the dominator tree.
+    /// Unreachable blocks have number 0, the entry block is 1.
+    pre_number: u32,
+
+    /// Maximum `pre_number` for the sub-tree of the dominator tree that is rooted at this node.
+    /// This is always >= `pre_number`.
+    pre_max: u32,
+}
+
+/// Creating and computing the dominator tree pre-order.
+impl DominatorTreePreorder {
+    /// Create a new blank `DominatorTreePreorder`.
+    pub fn new() -> DominatorTreePreorder {
+        DominatorTreePreorder {
+            nodes: EntityMap::new(),
+            stack: Vec::new(),
+        }
+    }
+
+    /// Recompute this data structure to match `domtree`.
+    pub fn compute(&mut self, domtree: &DominatorTree, layout: &Layout) {
+        self.nodes.clear();
+        debug_assert_eq!(self.stack.len(), 0);
+
+        // Step 1: Populate the child and sibling links.
+        //
+        // By following the CFG post-order and pushing to the front of the lists, we make sure that
+        // sibling lists are ordered according to the CFG reverse post-order.
+        for &ebb in domtree.cfg_postorder() {
+            if let Some(idom_inst) = domtree.idom(ebb) {
+                let idom = layout.pp_ebb(idom_inst);
+                let sib = mem::replace(&mut self.nodes[idom].child, ebb.into());
+                self.nodes[ebb].sibling = sib;
+            } else {
+                // The only EBB without an immediate dominator is the entry.
+                self.stack.push(ebb);
+            }
+        }
+
+        // Step 2. Assign pre-order numbers from a DFS of the dominator tree.
+        debug_assert!(self.stack.len() <= 1);
+        let mut n = 0;
+        while let Some(ebb) = self.stack.pop() {
+            n += 1;
+            let node = &mut self.nodes[ebb];
+            node.pre_number = n;
+            node.pre_max = n;
+            if let Some(n) = node.sibling.expand() {
+                self.stack.push(n);
+            }
+            if let Some(n) = node.child.expand() {
+                self.stack.push(n);
+            }
+        }
+
+        // Step 3. Propagate the `pre_max` numbers up the tree.
+        // The CFG post-order is topologically ordered w.r.t. dominance so a node comes after all
+        // its dominator tree children.
+        for &ebb in domtree.cfg_postorder() {
+            if let Some(idom_inst) = domtree.idom(ebb) {
+                let idom = layout.pp_ebb(idom_inst);
+                let pre_max = cmp::max(self.nodes[ebb].pre_max, self.nodes[idom].pre_max);
+                self.nodes[idom].pre_max = pre_max;
+            }
+        }
+    }
+}
+
+/// An iterator that enumerates the direct children of an EBB in the dominator tree.
+pub struct ChildIter<'a> {
+    dtpo: &'a DominatorTreePreorder,
+    next: PackedOption<Ebb>,
+}
+
+impl<'a> Iterator for ChildIter<'a> {
+    type Item = Ebb;
+
+    fn next(&mut self) -> Option<Ebb> {
+        let n = self.next.expand();
+        if let Some(ebb) = n {
+            self.next = self.dtpo.nodes[ebb].sibling;
+        }
+        n
+    }
+}
+
+/// Query interface for the dominator tree pre-order.
+impl DominatorTreePreorder {
+    /// Get an iterator over the direct children of `ebb` in the dominator tree.
+    ///
+    /// These are the EBB's whose immediate dominator is an instruction in `ebb`, ordered according
+    /// to the CFG reverse post-order.
+    pub fn children(&self, ebb: Ebb) -> ChildIter {
+        ChildIter {
+            dtpo: self,
+            next: self.nodes[ebb].child,
+        }
+    }
+
+    /// Fast, constant time dominance check with EBB granularity.
+    ///
+    /// This computes the same result as `domtree.dominates(a, b)`, but in guaranteed fast constant
+    /// time. This is less general than the `DominatorTree` method because it only works with EBB
+    /// program points.
+    ///
+    /// An EBB is considered to dominate itself.
+    pub fn dominates(&self, a: Ebb, b: Ebb) -> bool {
+        let na = &self.nodes[a];
+        let nb = &self.nodes[b];
+        na.pre_number <= nb.pre_number && na.pre_max >= nb.pre_max
+    }
+
+    /// Compare two EBBs according to the dominator pre-order.
+    pub fn pre_cmp_ebb(&self, a: Ebb, b: Ebb) -> Ordering {
+        self.nodes[a].pre_number.cmp(&self.nodes[b].pre_number)
+    }
+
+    /// Compare two program points according to the dominator tree pre-order.
+    ///
+    /// This ordering of program points have the property that given a program point, pp, all the
+    /// program points dominated by pp follow immediately and contiguously after pp in the order.
+    pub fn pre_cmp<A, B>(&self, a: A, b: B, layout: &Layout) -> Ordering
+    where
+        A: Into<ExpandedProgramPoint>,
+        B: Into<ExpandedProgramPoint>,
+    {
+        let a = a.into();
+        let b = b.into();
+        self.pre_cmp_ebb(layout.pp_ebb(a), layout.pp_ebb(b)).then(
+            layout.cmp(a, b),
+        )
+    }
+
+    /// Compare two value defs according to the dominator tree pre-order.
+    ///
+    /// Two values defined at the same program point are compared according to their parameter or
+    /// result order.
+    ///
+    /// This is a total ordering of the values in the function.
+    pub fn pre_cmp_def(&self, a: Value, b: Value, func: &Function) -> Ordering {
+        let da = func.dfg.value_def(a);
+        let db = func.dfg.value_def(b);
+        self.pre_cmp(da, db, &func.layout).then_with(
+            || da.num().cmp(&db.num()),
+        )
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use cursor::{Cursor, FuncCursor};
+    use flowgraph::ControlFlowGraph;
+    use ir::types::*;
+    use ir::{Function, InstBuilder, TrapCode};
+    use settings;
+    use verifier::verify_context;
+
+    #[test]
+    fn empty() {
+        let func = Function::new();
+        let cfg = ControlFlowGraph::with_function(&func);
+        debug_assert!(cfg.is_valid());
+        let dtree = DominatorTree::with_function(&func, &cfg);
+        assert_eq!(0, dtree.nodes.keys().count());
+        assert_eq!(dtree.cfg_postorder(), &[]);
+
+        let mut dtpo = DominatorTreePreorder::new();
+        dtpo.compute(&dtree, &func.layout);
+    }
+
+    #[test]
+    fn unreachable_node() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let v0 = func.dfg.append_ebb_param(ebb0, I32);
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+
+        let mut cur = FuncCursor::new(&mut func);
+
+        cur.insert_ebb(ebb0);
+        cur.ins().brnz(v0, ebb2, &[]);
+        cur.ins().trap(TrapCode::User(0));
+
+        cur.insert_ebb(ebb1);
+        let v1 = cur.ins().iconst(I32, 1);
+        let v2 = cur.ins().iadd(v0, v1);
+        cur.ins().jump(ebb0, &[v2]);
+
+        cur.insert_ebb(ebb2);
+        cur.ins().return_(&[v0]);
+
+        let cfg = ControlFlowGraph::with_function(cur.func);
+        let dt = DominatorTree::with_function(cur.func, &cfg);
+
+        // Fall-through-first, prune-at-source DFT:
+        //
+        // ebb0 {
+        //   brnz ebb2 {
+        //     trap
+        //     ebb2 {
+        //       return
+        //     } ebb2
+        // } ebb0
+        assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0]);
+
+        let v2_def = cur.func.dfg.value_def(v2).unwrap_inst();
+        assert!(!dt.dominates(v2_def, ebb0, &cur.func.layout));
+        assert!(!dt.dominates(ebb0, v2_def, &cur.func.layout));
+
+        let mut dtpo = DominatorTreePreorder::new();
+        dtpo.compute(&dt, &cur.func.layout);
+        assert!(dtpo.dominates(ebb0, ebb0));
+        assert!(!dtpo.dominates(ebb0, ebb1));
+        assert!(dtpo.dominates(ebb0, ebb2));
+        assert!(!dtpo.dominates(ebb1, ebb0));
+        assert!(dtpo.dominates(ebb1, ebb1));
+        assert!(!dtpo.dominates(ebb1, ebb2));
+        assert!(!dtpo.dominates(ebb2, ebb0));
+        assert!(!dtpo.dominates(ebb2, ebb1));
+        assert!(dtpo.dominates(ebb2, ebb2));
+    }
+
+    #[test]
+    fn non_zero_entry_block() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+        let ebb3 = func.dfg.make_ebb();
+        let cond = func.dfg.append_ebb_param(ebb3, I32);
+
+        let mut cur = FuncCursor::new(&mut func);
+
+        cur.insert_ebb(ebb3);
+        let jmp_ebb3_ebb1 = cur.ins().jump(ebb1, &[]);
+
+        cur.insert_ebb(ebb1);
+        let br_ebb1_ebb0 = cur.ins().brnz(cond, ebb0, &[]);
+        let jmp_ebb1_ebb2 = cur.ins().jump(ebb2, &[]);
+
+        cur.insert_ebb(ebb2);
+        cur.ins().jump(ebb0, &[]);
+
+        cur.insert_ebb(ebb0);
+
+        let cfg = ControlFlowGraph::with_function(cur.func);
+        let dt = DominatorTree::with_function(cur.func, &cfg);
+
+        // Fall-through-first, prune-at-source DFT:
+        //
+        // ebb3 {
+        //   ebb3:jump ebb1 {
+        //     ebb1 {
+        //       ebb1:brnz ebb0 {
+        //         ebb1:jump ebb2 {
+        //           ebb2 {
+        //             ebb2:jump ebb0 (seen)
+        //           } ebb2
+        //         } ebb1:jump ebb2
+        //         ebb0 {
+        //         } ebb0
+        //       } ebb1:brnz ebb0
+        //     } ebb1
+        //   } ebb3:jump ebb1
+        // } ebb3
+
+        assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0, ebb1, ebb3]);
+
+        assert_eq!(cur.func.layout.entry_block().unwrap(), ebb3);
+        assert_eq!(dt.idom(ebb3), None);
+        assert_eq!(dt.idom(ebb1).unwrap(), jmp_ebb3_ebb1);
+        assert_eq!(dt.idom(ebb2).unwrap(), jmp_ebb1_ebb2);
+        assert_eq!(dt.idom(ebb0).unwrap(), br_ebb1_ebb0);
+
+        assert!(dt.dominates(br_ebb1_ebb0, br_ebb1_ebb0, &cur.func.layout));
+        assert!(!dt.dominates(br_ebb1_ebb0, jmp_ebb3_ebb1, &cur.func.layout));
+        assert!(dt.dominates(jmp_ebb3_ebb1, br_ebb1_ebb0, &cur.func.layout));
+
+        assert_eq!(dt.rpo_cmp(ebb3, ebb3, &cur.func.layout), Ordering::Equal);
+        assert_eq!(dt.rpo_cmp(ebb3, ebb1, &cur.func.layout), Ordering::Less);
+        assert_eq!(
+            dt.rpo_cmp(ebb3, jmp_ebb3_ebb1, &cur.func.layout),
+            Ordering::Less
+        );
+        assert_eq!(
+            dt.rpo_cmp(jmp_ebb3_ebb1, jmp_ebb1_ebb2, &cur.func.layout),
+            Ordering::Less
+        );
+    }
+
+    #[test]
+    fn backwards_layout() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+
+        let mut cur = FuncCursor::new(&mut func);
+
+        cur.insert_ebb(ebb0);
+        let jmp02 = cur.ins().jump(ebb2, &[]);
+
+        cur.insert_ebb(ebb1);
+        let trap = cur.ins().trap(TrapCode::User(5));
+
+        cur.insert_ebb(ebb2);
+        let jmp21 = cur.ins().jump(ebb1, &[]);
+
+        let cfg = ControlFlowGraph::with_function(cur.func);
+        let dt = DominatorTree::with_function(cur.func, &cfg);
+
+        assert_eq!(cur.func.layout.entry_block(), Some(ebb0));
+        assert_eq!(dt.idom(ebb0), None);
+        assert_eq!(dt.idom(ebb1), Some(jmp21));
+        assert_eq!(dt.idom(ebb2), Some(jmp02));
+
+        assert!(dt.dominates(ebb0, ebb0, &cur.func.layout));
+        assert!(dt.dominates(ebb0, jmp02, &cur.func.layout));
+        assert!(dt.dominates(ebb0, ebb1, &cur.func.layout));
+        assert!(dt.dominates(ebb0, trap, &cur.func.layout));
+        assert!(dt.dominates(ebb0, ebb2, &cur.func.layout));
+        assert!(dt.dominates(ebb0, jmp21, &cur.func.layout));
+
+        assert!(!dt.dominates(jmp02, ebb0, &cur.func.layout));
+        assert!(dt.dominates(jmp02, jmp02, &cur.func.layout));
+        assert!(dt.dominates(jmp02, ebb1, &cur.func.layout));
+        assert!(dt.dominates(jmp02, trap, &cur.func.layout));
+        assert!(dt.dominates(jmp02, ebb2, &cur.func.layout));
+        assert!(dt.dominates(jmp02, jmp21, &cur.func.layout));
+
+        assert!(!dt.dominates(ebb1, ebb0, &cur.func.layout));
+        assert!(!dt.dominates(ebb1, jmp02, &cur.func.layout));
+        assert!(dt.dominates(ebb1, ebb1, &cur.func.layout));
+        assert!(dt.dominates(ebb1, trap, &cur.func.layout));
+        assert!(!dt.dominates(ebb1, ebb2, &cur.func.layout));
+        assert!(!dt.dominates(ebb1, jmp21, &cur.func.layout));
+
+        assert!(!dt.dominates(trap, ebb0, &cur.func.layout));
+        assert!(!dt.dominates(trap, jmp02, &cur.func.layout));
+        assert!(!dt.dominates(trap, ebb1, &cur.func.layout));
+        assert!(dt.dominates(trap, trap, &cur.func.layout));
+        assert!(!dt.dominates(trap, ebb2, &cur.func.layout));
+        assert!(!dt.dominates(trap, jmp21, &cur.func.layout));
+
+        assert!(!dt.dominates(ebb2, ebb0, &cur.func.layout));
+        assert!(!dt.dominates(ebb2, jmp02, &cur.func.layout));
+        assert!(dt.dominates(ebb2, ebb1, &cur.func.layout));
+        assert!(dt.dominates(ebb2, trap, &cur.func.layout));
+        assert!(dt.dominates(ebb2, ebb2, &cur.func.layout));
+        assert!(dt.dominates(ebb2, jmp21, &cur.func.layout));
+
+        assert!(!dt.dominates(jmp21, ebb0, &cur.func.layout));
+        assert!(!dt.dominates(jmp21, jmp02, &cur.func.layout));
+        assert!(dt.dominates(jmp21, ebb1, &cur.func.layout));
+        assert!(dt.dominates(jmp21, trap, &cur.func.layout));
+        assert!(!dt.dominates(jmp21, ebb2, &cur.func.layout));
+        assert!(dt.dominates(jmp21, jmp21, &cur.func.layout));
+    }
+
+    #[test]
+    fn renumbering() {
+        let mut func = Function::new();
+        let entry = func.dfg.make_ebb();
+        let ebb0 = func.dfg.make_ebb();
+        let ebb100 = func.dfg.make_ebb();
+
+        let mut cur = FuncCursor::new(&mut func);
+
+        cur.insert_ebb(entry);
+        cur.ins().jump(ebb0, &[]);
+
+        cur.insert_ebb(ebb0);
+        let cond = cur.ins().iconst(I32, 0);
+        let inst2 = cur.ins().brz(cond, ebb0, &[]);
+        let inst3 = cur.ins().brz(cond, ebb0, &[]);
+        let inst4 = cur.ins().brz(cond, ebb0, &[]);
+        let inst5 = cur.ins().brz(cond, ebb0, &[]);
+        cur.ins().jump(ebb100, &[]);
+        cur.insert_ebb(ebb100);
+        cur.ins().return_(&[]);
+
+        let mut cfg = ControlFlowGraph::with_function(cur.func);
+        let mut dt = DominatorTree::with_function(cur.func, &cfg);
+
+        let ebb1 = cur.func.dfg.make_ebb();
+        cur.func.layout.split_ebb(ebb1, inst2);
+        cur.goto_bottom(ebb0);
+        let middle_jump_inst = cur.ins().jump(ebb1, &[]);
+
+        dt.recompute_split_ebb(ebb0, ebb1, middle_jump_inst);
+
+        let ebb2 = cur.func.dfg.make_ebb();
+        cur.func.layout.split_ebb(ebb2, inst3);
+        cur.goto_bottom(ebb1);
+        let middle_jump_inst = cur.ins().jump(ebb2, &[]);
+        dt.recompute_split_ebb(ebb1, ebb2, middle_jump_inst);
+
+        let ebb3 = cur.func.dfg.make_ebb();
+        cur.func.layout.split_ebb(ebb3, inst4);
+        cur.goto_bottom(ebb2);
+        let middle_jump_inst = cur.ins().jump(ebb3, &[]);
+        dt.recompute_split_ebb(ebb2, ebb3, middle_jump_inst);
+
+        let ebb4 = cur.func.dfg.make_ebb();
+        cur.func.layout.split_ebb(ebb4, inst5);
+        cur.goto_bottom(ebb3);
+        let middle_jump_inst = cur.ins().jump(ebb4, &[]);
+        dt.recompute_split_ebb(ebb3, ebb4, middle_jump_inst);
+
+        cfg.compute(cur.func);
+
+        let flags = settings::Flags::new(&settings::builder());
+        verify_context(cur.func, &cfg, &dt, &flags).unwrap();
+    }
+}
--- a/lib/codegen/src/flowgraph.rs
+++ b/lib/codegen/src/flowgraph.rs
@@ -0,0 +1,316 @@
+//! A control flow graph represented as mappings of extended basic blocks to their predecessors
+//! and successors.
+//!
+//! Successors are represented as extended basic blocks while predecessors are represented by basic
+//! blocks. Basic blocks are denoted by tuples of EBB and branch/jump instructions. Each
+//! predecessor tuple corresponds to the end of a basic block.
+//!
+//! ```c
+//!     Ebb0:
+//!         ...          ; beginning of basic block
+//!
+//!         ...
+//!
+//!         brz vx, Ebb1 ; end of basic block
+//!
+//!         ...          ; beginning of basic block
+//!
+//!         ...
+//!
+//!         jmp Ebb2     ; end of basic block
+//! ```
+//!
+//! Here `Ebb1` and `Ebb2` would each have a single predecessor denoted as `(Ebb0, brz)`
+//! and `(Ebb0, jmp Ebb2)` respectively.
+
+use bforest;
+use entity::EntityMap;
+use ir::instructions::BranchInfo;
+use ir::{Ebb, Function, Inst};
+use std::mem;
+use timing;
+
+/// A basic block denoted by its enclosing Ebb and last instruction.
+pub type BasicBlock = (Ebb, Inst);
+
+/// A container for the successors and predecessors of some Ebb.
+#[derive(Clone, Default)]
+struct CFGNode {
+    /// Instructions that can branch or jump to this EBB.
+    ///
+    /// This maps branch instruction -> predecessor EBB which is redundant since the EBB containing
+    /// the branch instruction is available from the `layout.inst_ebb()` method. We store the
+    /// redundant information because:
+    ///
+    /// 1. Many `pred_iter()` consumers want the EBB anyway, so it is handily available.
+    /// 2. The `invalidate_ebb_successors()` may be called *after* branches have been removed from
+    ///    their EBB, but we still need to remove them form the old EBB predecessor map.
+    ///
+    /// The redundant EBB stored here is always consistent with the CFG successor lists, even after
+    /// the IR has been edited.
+    pub predecessors: bforest::Map<Inst, Ebb, ()>,
+
+    /// Set of EBBs that are the targets of branches and jumps in this EBB.
+    /// The set is ordered by EBB number, indicated by the `()` comparator type.
+    pub successors: bforest::Set<Ebb, ()>,
+}
+
+/// The Control Flow Graph maintains a mapping of ebbs to their predecessors
+/// and successors where predecessors are basic blocks and successors are
+/// extended basic blocks.
+pub struct ControlFlowGraph {
+    data: EntityMap<Ebb, CFGNode>,
+    pred_forest: bforest::MapForest<Inst, Ebb, ()>,
+    succ_forest: bforest::SetForest<Ebb, ()>,
+    valid: bool,
+}
+
+impl ControlFlowGraph {
+    /// Allocate a new blank control flow graph.
+    pub fn new() -> Self {
+        Self {
+            data: EntityMap::new(),
+            valid: false,
+            pred_forest: bforest::MapForest::new(),
+            succ_forest: bforest::SetForest::new(),
+        }
+    }
+
+    /// Clear all data structures in this control flow graph.
+    pub fn clear(&mut self) {
+        self.data.clear();
+        self.pred_forest.clear();
+        self.succ_forest.clear();
+        self.valid = false;
+    }
+
+    /// Allocate and compute the control flow graph for `func`.
+    pub fn with_function(func: &Function) -> Self {
+        let mut cfg = Self::new();
+        cfg.compute(func);
+        cfg
+    }
+
+    /// Compute the control flow graph of `func`.
+    ///
+    /// This will clear and overwrite any information already stored in this data structure.
+    pub fn compute(&mut self, func: &Function) {
+        let _tt = timing::flowgraph();
+        self.clear();
+        self.data.resize(func.dfg.num_ebbs());
+
+        for ebb in &func.layout {
+            self.compute_ebb(func, ebb);
+        }
+
+        self.valid = true;
+    }
+
+    fn compute_ebb(&mut self, func: &Function, ebb: Ebb) {
+        for inst in func.layout.ebb_insts(ebb) {
+            match func.dfg.analyze_branch(inst) {
+                BranchInfo::SingleDest(dest, _) => {
+                    self.add_edge((ebb, inst), dest);
+                }
+                BranchInfo::Table(jt) => {
+                    for (_, dest) in func.jump_tables[jt].entries() {
+                        self.add_edge((ebb, inst), dest);
+                    }
+                }
+                BranchInfo::NotABranch => {}
+            }
+        }
+    }
+
+    fn invalidate_ebb_successors(&mut self, ebb: Ebb) {
+        // Temporarily take ownership because we need mutable access to self.data inside the loop.
+        // Unfortunately borrowck cannot see that our mut accesses to predecessors don't alias
+        // our iteration over successors.
+        let mut successors = mem::replace(&mut self.data[ebb].successors, Default::default());
+        for succ in successors.iter(&self.succ_forest) {
+            self.data[succ].predecessors.retain(
+                &mut self.pred_forest,
+                |_, &mut e| e != ebb,
+            );
+        }
+        successors.clear(&mut self.succ_forest);
+    }
+
+    /// Recompute the control flow graph of `ebb`.
+    ///
+    /// This is for use after modifying instructions within a specific EBB. It recomputes all edges
+    /// from `ebb` while leaving edges to `ebb` intact. Its functionality a subset of that of the
+    /// more expensive `compute`, and should be used when we know we don't need to recompute the CFG
+    /// from scratch, but rather that our changes have been restricted to specific EBBs.
+    pub fn recompute_ebb(&mut self, func: &Function, ebb: Ebb) {
+        debug_assert!(self.is_valid());
+        self.invalidate_ebb_successors(ebb);
+        self.compute_ebb(func, ebb);
+    }
+
+    fn add_edge(&mut self, from: BasicBlock, to: Ebb) {
+        self.data[from.0].successors.insert(
+            to,
+            &mut self.succ_forest,
+            &(),
+        );
+        self.data[to].predecessors.insert(
+            from.1,
+            from.0,
+            &mut self.pred_forest,
+            &(),
+        );
+    }
+
+    /// Get an iterator over the CFG predecessors to `ebb`.
+    pub fn pred_iter(&self, ebb: Ebb) -> PredIter {
+        PredIter(self.data[ebb].predecessors.iter(&self.pred_forest))
+    }
+
+    /// Get an iterator over the CFG successors to `ebb`.
+    pub fn succ_iter(&self, ebb: Ebb) -> SuccIter {
+        debug_assert!(self.is_valid());
+        self.data[ebb].successors.iter(&self.succ_forest)
+    }
+
+    /// Check if the CFG is in a valid state.
+    ///
+    /// Note that this doesn't perform any kind of validity checks. It simply checks if the
+    /// `compute()` method has been called since the last `clear()`. It does not check that the
+    /// CFG is consistent with the function.
+    pub fn is_valid(&self) -> bool {
+        self.valid
+    }
+}
+
+/// An iterator over EBB predecessors. The iterator type is `BasicBlock`.
+///
+/// Each predecessor is an instruction that branches to the EBB.
+pub struct PredIter<'a>(bforest::MapIter<'a, Inst, Ebb, ()>);
+
+impl<'a> Iterator for PredIter<'a> {
+    type Item = BasicBlock;
+
+    fn next(&mut self) -> Option<BasicBlock> {
+        self.0.next().map(|(i, e)| (e, i))
+    }
+}
+
+/// An iterator over EBB successors. The iterator type is `Ebb`.
+pub type SuccIter<'a> = bforest::SetIter<'a, Ebb, ()>;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use cursor::{Cursor, FuncCursor};
+    use ir::{types, Function, InstBuilder};
+    use std::vec::Vec;
+
+    #[test]
+    fn empty() {
+        let func = Function::new();
+        ControlFlowGraph::with_function(&func);
+    }
+
+    #[test]
+    fn no_predecessors() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+        func.layout.append_ebb(ebb0);
+        func.layout.append_ebb(ebb1);
+        func.layout.append_ebb(ebb2);
+
+        let cfg = ControlFlowGraph::with_function(&func);
+
+        let mut fun_ebbs = func.layout.ebbs();
+        for ebb in func.layout.ebbs() {
+            assert_eq!(ebb, fun_ebbs.next().unwrap());
+            assert_eq!(cfg.pred_iter(ebb).count(), 0);
+            assert_eq!(cfg.succ_iter(ebb).count(), 0);
+        }
+    }
+
+    #[test]
+    fn branches_and_jumps() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let cond = func.dfg.append_ebb_param(ebb0, types::I32);
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+
+        let br_ebb0_ebb2;
+        let br_ebb1_ebb1;
+        let jmp_ebb0_ebb1;
+        let jmp_ebb1_ebb2;
+
+        {
+            let mut cur = FuncCursor::new(&mut func);
+
+            cur.insert_ebb(ebb0);
+            br_ebb0_ebb2 = cur.ins().brnz(cond, ebb2, &[]);
+            jmp_ebb0_ebb1 = cur.ins().jump(ebb1, &[]);
+
+            cur.insert_ebb(ebb1);
+            br_ebb1_ebb1 = cur.ins().brnz(cond, ebb1, &[]);
+            jmp_ebb1_ebb2 = cur.ins().jump(ebb2, &[]);
+
+            cur.insert_ebb(ebb2);
+        }
+
+        let mut cfg = ControlFlowGraph::with_function(&func);
+
+        {
+            let ebb0_predecessors = cfg.pred_iter(ebb0).collect::<Vec<_>>();
+            let ebb1_predecessors = cfg.pred_iter(ebb1).collect::<Vec<_>>();
+            let ebb2_predecessors = cfg.pred_iter(ebb2).collect::<Vec<_>>();
+
+            let ebb0_successors = cfg.succ_iter(ebb0).collect::<Vec<_>>();
+            let ebb1_successors = cfg.succ_iter(ebb1).collect::<Vec<_>>();
+            let ebb2_successors = cfg.succ_iter(ebb2).collect::<Vec<_>>();
+
+            assert_eq!(ebb0_predecessors.len(), 0);
+            assert_eq!(ebb1_predecessors.len(), 2);
+            assert_eq!(ebb2_predecessors.len(), 2);
+
+            assert_eq!(ebb1_predecessors.contains(&(ebb0, jmp_ebb0_ebb1)), true);
+            assert_eq!(ebb1_predecessors.contains(&(ebb1, br_ebb1_ebb1)), true);
+            assert_eq!(ebb2_predecessors.contains(&(ebb0, br_ebb0_ebb2)), true);
+            assert_eq!(ebb2_predecessors.contains(&(ebb1, jmp_ebb1_ebb2)), true);
+
+            assert_eq!(ebb0_successors, [ebb1, ebb2]);
+            assert_eq!(ebb1_successors, [ebb1, ebb2]);
+            assert_eq!(ebb2_successors, []);
+        }
+
+        // Change some instructions and recompute ebb0
+        func.dfg.replace(br_ebb0_ebb2).brnz(cond, ebb1, &[]);
+        func.dfg.replace(jmp_ebb0_ebb1).return_(&[]);
+        cfg.recompute_ebb(&mut func, ebb0);
+        let br_ebb0_ebb1 = br_ebb0_ebb2;
+
+        {
+            let ebb0_predecessors = cfg.pred_iter(ebb0).collect::<Vec<_>>();
+            let ebb1_predecessors = cfg.pred_iter(ebb1).collect::<Vec<_>>();
+            let ebb2_predecessors = cfg.pred_iter(ebb2).collect::<Vec<_>>();
+
+            let ebb0_successors = cfg.succ_iter(ebb0);
+            let ebb1_successors = cfg.succ_iter(ebb1);
+            let ebb2_successors = cfg.succ_iter(ebb2);
+
+            assert_eq!(ebb0_predecessors.len(), 0);
+            assert_eq!(ebb1_predecessors.len(), 2);
+            assert_eq!(ebb2_predecessors.len(), 1);
+
+            assert_eq!(ebb1_predecessors.contains(&(ebb0, br_ebb0_ebb1)), true);
+            assert_eq!(ebb1_predecessors.contains(&(ebb1, br_ebb1_ebb1)), true);
+            assert_eq!(ebb2_predecessors.contains(&(ebb0, br_ebb0_ebb2)), false);
+            assert_eq!(ebb2_predecessors.contains(&(ebb1, jmp_ebb1_ebb2)), true);
+
+            assert_eq!(ebb0_successors.collect::<Vec<_>>(), [ebb1]);
+            assert_eq!(ebb1_successors.collect::<Vec<_>>(), [ebb1, ebb2]);
+            assert_eq!(ebb2_successors.collect::<Vec<_>>(), []);
+        }
+    }
+}
--- a/lib/codegen/src/ir/builder.rs
+++ b/lib/codegen/src/ir/builder.rs
@@ -0,0 +1,266 @@
+//! Cretonne instruction builder.
+//!
+//! A `Builder` provides a convenient interface for inserting instructions into a Cretonne
+//! function. Many of its methods are generated from the meta language instruction definitions.
+
+use ir;
+use ir::types;
+use ir::{DataFlowGraph, InstructionData};
+use ir::{Inst, Opcode, Type, Value};
+use isa;
+
+/// Base trait for instruction builders.
+///
+/// The `InstBuilderBase` trait provides the basic functionality required by the methods of the
+/// generated `InstBuilder` trait. These methods should not normally be used directly. Use the
+/// methods in the `InstBuilder` trait instead.
+///
+/// Any data type that implements `InstBuilderBase` also gets all the methods of the `InstBuilder`
+/// trait.
+pub trait InstBuilderBase<'f>: Sized {
+    /// Get an immutable reference to the data flow graph that will hold the constructed
+    /// instructions.
+    fn data_flow_graph(&self) -> &DataFlowGraph;
+    /// Get a mutable reference to the data flow graph that will hold the constructed
+    /// instructions.
+    fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph;
+
+    /// Insert an instruction and return a reference to it, consuming the builder.
+    ///
+    /// The result types may depend on a controlling type variable. For non-polymorphic
+    /// instructions with multiple results, pass `VOID` for the `ctrl_typevar` argument.
+    fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph);
+}
+
+// Include trait code generated by `lib/codegen/meta/gen_instr.py`.
+//
+// This file defines the `InstBuilder` trait as an extension of `InstBuilderBase` with methods per
+// instruction format and per opcode.
+include!(concat!(env!("OUT_DIR"), "/inst_builder.rs"));
+
+/// Any type implementing `InstBuilderBase` gets all the `InstBuilder` methods for free.
+impl<'f, T: InstBuilderBase<'f>> InstBuilder<'f> for T {}
+
+/// Base trait for instruction inserters.
+///
+/// This is an alternative base trait for an instruction builder to implement.
+///
+/// An instruction inserter can be adapted into an instruction builder by wrapping it in an
+/// `InsertBuilder`. This provides some common functionality for instruction builders that insert
+/// new instructions, as opposed to the `ReplaceBuilder` which overwrites existing instructions.
+pub trait InstInserterBase<'f>: Sized {
+    /// Get an immutable reference to the data flow graph.
+    fn data_flow_graph(&self) -> &DataFlowGraph;
+
+    /// Get a mutable reference to the data flow graph.
+    fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph;
+
+    /// Insert a new instruction which belongs to the DFG.
+    fn insert_built_inst(self, inst: Inst, ctrl_typevar: Type) -> &'f mut DataFlowGraph;
+}
+
+use std::marker::PhantomData;
+
+/// Builder that inserts an instruction at the current position.
+///
+/// An `InsertBuilder` is a wrapper for an `InstInserterBase` that turns it into an instruction
+/// builder with some additional facilities for creating instructions that reuse existing values as
+/// their results.
+pub struct InsertBuilder<'f, IIB: InstInserterBase<'f>> {
+    inserter: IIB,
+    unused: PhantomData<&'f u32>,
+}
+
+impl<'f, IIB: InstInserterBase<'f>> InsertBuilder<'f, IIB> {
+    /// Create a new builder which inserts instructions at `pos`.
+    /// The `dfg` and `pos.layout` references should be from the same `Function`.
+    pub fn new(inserter: IIB) -> InsertBuilder<'f, IIB> {
+        InsertBuilder {
+            inserter,
+            unused: PhantomData,
+        }
+    }
+
+    /// Reuse result values in `reuse`.
+    ///
+    /// Convert this builder into one that will reuse the provided result values instead of
+    /// allocating new ones. The provided values for reuse must not be attached to anything. Any
+    /// missing result values will be allocated as normal.
+    ///
+    /// The `reuse` argument is expected to be an array of `Option<Value>`.
+    pub fn with_results<Array>(self, reuse: Array) -> InsertReuseBuilder<'f, IIB, Array>
+    where
+        Array: AsRef<[Option<Value>]>,
+    {
+        InsertReuseBuilder {
+            inserter: self.inserter,
+            reuse,
+            unused: PhantomData,
+        }
+    }
+
+    /// Reuse a single result value.
+    ///
+    /// Convert this into a builder that will reuse `v` as the single result value. The reused
+    /// result value `v` must not be attached to anything.
+    ///
+    /// This method should only be used when building an instruction with exactly one result. Use
+    /// `with_results()` for the more general case.
+    pub fn with_result(self, v: Value) -> InsertReuseBuilder<'f, IIB, [Option<Value>; 1]> {
+        // TODO: Specialize this to return a different builder that just attaches `v` instead of
+        // calling `make_inst_results_reusing()`.
+        self.with_results([Some(v)])
+    }
+}
+
+impl<'f, IIB: InstInserterBase<'f>> InstBuilderBase<'f> for InsertBuilder<'f, IIB> {
+    fn data_flow_graph(&self) -> &DataFlowGraph {
+        self.inserter.data_flow_graph()
+    }
+
+    fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph {
+        self.inserter.data_flow_graph_mut()
+    }
+
+    fn build(mut self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) {
+        let inst;
+        {
+            let dfg = self.inserter.data_flow_graph_mut();
+            inst = dfg.make_inst(data);
+            dfg.make_inst_results(inst, ctrl_typevar);
+        }
+        (inst, self.inserter.insert_built_inst(inst, ctrl_typevar))
+    }
+}
+
+/// Builder that inserts a new instruction like `InsertBuilder`, but reusing result values.
+pub struct InsertReuseBuilder<'f, IIB, Array>
+where
+    IIB: InstInserterBase<'f>,
+    Array: AsRef<[Option<Value>]>,
+{
+    inserter: IIB,
+    reuse: Array,
+    unused: PhantomData<&'f u32>,
+}
+
+impl<'f, IIB, Array> InstBuilderBase<'f> for InsertReuseBuilder<'f, IIB, Array>
+where
+    IIB: InstInserterBase<'f>,
+    Array: AsRef<[Option<Value>]>,
+{
+    fn data_flow_graph(&self) -> &DataFlowGraph {
+        self.inserter.data_flow_graph()
+    }
+
+    fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph {
+        self.inserter.data_flow_graph_mut()
+    }
+
+    fn build(mut self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) {
+        let inst;
+        {
+            let dfg = self.inserter.data_flow_graph_mut();
+            inst = dfg.make_inst(data);
+            // Make an `Interator<Item = Option<Value>>`.
+            let ru = self.reuse.as_ref().iter().cloned();
+            dfg.make_inst_results_reusing(inst, ctrl_typevar, ru);
+        }
+        (inst, self.inserter.insert_built_inst(inst, ctrl_typevar))
+    }
+}
+
+/// Instruction builder that replaces an existing instruction.
+///
+/// The inserted instruction will have the same `Inst` number as the old one.
+///
+/// If the old instruction still has result values attached, it is assumed that the new instruction
+/// produces the same number and types of results. The old result values are preserved. If the
+/// replacement instruction format does not support multiple results, the builder panics. It is a
+/// bug to leave result values dangling.
+pub struct ReplaceBuilder<'f> {
+    dfg: &'f mut DataFlowGraph,
+    inst: Inst,
+}
+
+impl<'f> ReplaceBuilder<'f> {
+    /// Create a `ReplaceBuilder` that will overwrite `inst`.
+    pub fn new(dfg: &'f mut DataFlowGraph, inst: Inst) -> ReplaceBuilder {
+        ReplaceBuilder { dfg, inst }
+    }
+}
+
+impl<'f> InstBuilderBase<'f> for ReplaceBuilder<'f> {
+    fn data_flow_graph(&self) -> &DataFlowGraph {
+        self.dfg
+    }
+
+    fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph {
+        self.dfg
+    }
+
+    fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) {
+        // Splat the new instruction on top of the old one.
+        self.dfg[self.inst] = data;
+
+        if !self.dfg.has_results(self.inst) {
+            // The old result values were either detached or non-existent.
+            // Construct new ones.
+            self.dfg.make_inst_results(self.inst, ctrl_typevar);
+        }
+
+        (self.inst, self.dfg)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use cursor::{Cursor, FuncCursor};
+    use ir::condcodes::*;
+    use ir::types::*;
+    use ir::{Function, InstBuilder, ValueDef};
+
+    #[test]
+    fn types() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let arg0 = func.dfg.append_ebb_param(ebb0, I32);
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_ebb(ebb0);
+
+        // Explicit types.
+        let v0 = pos.ins().iconst(I32, 3);
+        assert_eq!(pos.func.dfg.value_type(v0), I32);
+
+        // Inferred from inputs.
+        let v1 = pos.ins().iadd(arg0, v0);
+        assert_eq!(pos.func.dfg.value_type(v1), I32);
+
+        // Formula.
+        let cmp = pos.ins().icmp(IntCC::Equal, arg0, v0);
+        assert_eq!(pos.func.dfg.value_type(cmp), B1);
+    }
+
+    #[test]
+    fn reuse_results() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let arg0 = func.dfg.append_ebb_param(ebb0, I32);
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_ebb(ebb0);
+
+        let v0 = pos.ins().iadd_imm(arg0, 17);
+        assert_eq!(pos.func.dfg.value_type(v0), I32);
+        let iadd = pos.prev_inst().unwrap();
+        assert_eq!(pos.func.dfg.value_def(v0), ValueDef::Result(iadd, 0));
+
+        // Detach v0 and reuse it for a different instruction.
+        pos.func.dfg.clear_results(iadd);
+        let v0b = pos.ins().with_result(v0).iconst(I32, 3);
+        assert_eq!(v0, v0b);
+        assert_eq!(pos.current_inst(), Some(iadd));
+        let iconst = pos.prev_inst().unwrap();
+        assert!(iadd != iconst);
+        assert_eq!(pos.func.dfg.value_def(v0), ValueDef::Result(iconst, 0));
+    }
+}
--- a/lib/codegen/src/ir/condcodes.rs
+++ b/lib/codegen/src/ir/condcodes.rs
@@ -0,0 +1,358 @@
+//! Condition codes for the Cretonne code generator.
+//!
+//! A condition code here is an enumerated type that determined how to compare two numbers. There
+//! are different rules for comparing integers and floating point numbers, so they use different
+//! condition codes.
+
+use std::fmt::{self, Display, Formatter};
+use std::str::FromStr;
+
+/// Common traits of condition codes.
+pub trait CondCode: Copy {
+    /// Get the inverse condition code of `self`.
+    ///
+    /// The inverse condition code produces the opposite result for all comparisons.
+    /// That is, `cmp CC, x, y` is true if and only if `cmp CC.inverse(), x, y` is false.
+    #[must_use]
+    fn inverse(self) -> Self;
+
+    /// Get the reversed condition code for `self`.
+    ///
+    /// The reversed condition code produces the same result as swapping `x` and `y` in the
+    /// comparison. That is, `cmp CC, x, y` is the same as `cmp CC.reverse(), y, x`.
+    #[must_use]
+    fn reverse(self) -> Self;
+}
+
+/// Condition code for comparing integers.
+///
+/// This condition code is used by the `icmp` instruction to compare integer values. There are
+/// separate codes for comparing the integers as signed or unsigned numbers where it makes a
+/// difference.
+#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
+pub enum IntCC {
+    /// `==`.
+    Equal,
+    /// `!=`.
+    NotEqual,
+    /// Signed `<`.
+    SignedLessThan,
+    /// Signed `>=`.
+    SignedGreaterThanOrEqual,
+    /// Signed `>`.
+    SignedGreaterThan,
+    /// Signed `<=`.
+    SignedLessThanOrEqual,
+    /// Unsigned `<`.
+    UnsignedLessThan,
+    /// Unsigned `>=`.
+    UnsignedGreaterThanOrEqual,
+    /// Unsigned `>`.
+    UnsignedGreaterThan,
+    /// Unsigned `<=`.
+    UnsignedLessThanOrEqual,
+}
+
+impl CondCode for IntCC {
+    fn inverse(self) -> Self {
+        use self::IntCC::*;
+        match self {
+            Equal => NotEqual,
+            NotEqual => Equal,
+            SignedLessThan => SignedGreaterThanOrEqual,
+            SignedGreaterThanOrEqual => SignedLessThan,
+            SignedGreaterThan => SignedLessThanOrEqual,
+            SignedLessThanOrEqual => SignedGreaterThan,
+            UnsignedLessThan => UnsignedGreaterThanOrEqual,
+            UnsignedGreaterThanOrEqual => UnsignedLessThan,
+            UnsignedGreaterThan => UnsignedLessThanOrEqual,
+            UnsignedLessThanOrEqual => UnsignedGreaterThan,
+        }
+    }
+
+    fn reverse(self) -> Self {
+        use self::IntCC::*;
+        match self {
+            Equal => Equal,
+            NotEqual => NotEqual,
+            SignedGreaterThan => SignedLessThan,
+            SignedGreaterThanOrEqual => SignedLessThanOrEqual,
+            SignedLessThan => SignedGreaterThan,
+            SignedLessThanOrEqual => SignedGreaterThanOrEqual,
+            UnsignedGreaterThan => UnsignedLessThan,
+            UnsignedGreaterThanOrEqual => UnsignedLessThanOrEqual,
+            UnsignedLessThan => UnsignedGreaterThan,
+            UnsignedLessThanOrEqual => UnsignedGreaterThanOrEqual,
+        }
+    }
+}
+
+impl Display for IntCC {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        use self::IntCC::*;
+        f.write_str(match *self {
+            Equal => "eq",
+            NotEqual => "ne",
+            SignedGreaterThan => "sgt",
+            SignedGreaterThanOrEqual => "sge",
+            SignedLessThan => "slt",
+            SignedLessThanOrEqual => "sle",
+            UnsignedGreaterThan => "ugt",
+            UnsignedGreaterThanOrEqual => "uge",
+            UnsignedLessThan => "ult",
+            UnsignedLessThanOrEqual => "ule",
+        })
+    }
+}
+
+impl FromStr for IntCC {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        use self::IntCC::*;
+        match s {
+            "eq" => Ok(Equal),
+            "ne" => Ok(NotEqual),
+            "sge" => Ok(SignedGreaterThanOrEqual),
+            "sgt" => Ok(SignedGreaterThan),
+            "sle" => Ok(SignedLessThanOrEqual),
+            "slt" => Ok(SignedLessThan),
+            "uge" => Ok(UnsignedGreaterThanOrEqual),
+            "ugt" => Ok(UnsignedGreaterThan),
+            "ule" => Ok(UnsignedLessThanOrEqual),
+            "ult" => Ok(UnsignedLessThan),
+            _ => Err(()),
+        }
+    }
+}
+
+/// Condition code for comparing floating point numbers.
+///
+/// This condition code is used by the `fcmp` instruction to compare floating point values. Two
+/// IEEE floating point values relate in exactly one of four ways:
+///
+/// 1. `UN` - unordered when either value is NaN.
+/// 2. `EQ` - equal numerical value.
+/// 3. `LT` - `x` is less than `y`.
+/// 4. `GT` - `x` is greater than `y`.
+///
+/// Note that `0.0` and `-0.0` relate as `EQ` because they both represent the number 0.
+///
+/// The condition codes described here are used to produce a single boolean value from the
+/// comparison. The 14 condition codes here cover every possible combination of the relation above
+/// except the impossible `!UN & !EQ & !LT & !GT` and the always true `UN | EQ | LT | GT`.
+#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
+pub enum FloatCC {
+    /// EQ | LT | GT
+    Ordered,
+    /// UN
+    Unordered,
+
+    /// EQ
+    Equal,
+    /// The C '!=' operator is the inverse of '==': `NotEqual`.
+    /// UN | LT | GT
+    NotEqual,
+    /// LT | GT
+    OrderedNotEqual,
+    /// UN | EQ
+    UnorderedOrEqual,
+
+    /// LT
+    LessThan,
+    /// LT | EQ
+    LessThanOrEqual,
+    /// GT
+    GreaterThan,
+    /// GT | EQ
+    GreaterThanOrEqual,
+
+    /// UN | LT
+    UnorderedOrLessThan,
+    /// UN | LT | EQ
+    UnorderedOrLessThanOrEqual,
+    /// UN | GT
+    UnorderedOrGreaterThan,
+    /// UN | GT | EQ
+    UnorderedOrGreaterThanOrEqual,
+}
+
+impl CondCode for FloatCC {
+    fn inverse(self) -> Self {
+        use self::FloatCC::*;
+        match self {
+            Ordered => Unordered,
+            Unordered => Ordered,
+            Equal => NotEqual,
+            NotEqual => Equal,
+            OrderedNotEqual => UnorderedOrEqual,
+            UnorderedOrEqual => OrderedNotEqual,
+            LessThan => UnorderedOrGreaterThanOrEqual,
+            LessThanOrEqual => UnorderedOrGreaterThan,
+            GreaterThan => UnorderedOrLessThanOrEqual,
+            GreaterThanOrEqual => UnorderedOrLessThan,
+            UnorderedOrLessThan => GreaterThanOrEqual,
+            UnorderedOrLessThanOrEqual => GreaterThan,
+            UnorderedOrGreaterThan => LessThanOrEqual,
+            UnorderedOrGreaterThanOrEqual => LessThan,
+        }
+    }
+    fn reverse(self) -> Self {
+        use self::FloatCC::*;
+        match self {
+            Ordered => Ordered,
+            Unordered => Unordered,
+            Equal => Equal,
+            NotEqual => NotEqual,
+            OrderedNotEqual => OrderedNotEqual,
+            UnorderedOrEqual => UnorderedOrEqual,
+            LessThan => GreaterThan,
+            LessThanOrEqual => GreaterThanOrEqual,
+            GreaterThan => LessThan,
+            GreaterThanOrEqual => LessThanOrEqual,
+            UnorderedOrLessThan => UnorderedOrGreaterThan,
+            UnorderedOrLessThanOrEqual => UnorderedOrGreaterThanOrEqual,
+            UnorderedOrGreaterThan => UnorderedOrLessThan,
+            UnorderedOrGreaterThanOrEqual => UnorderedOrLessThanOrEqual,
+        }
+    }
+}
+
+impl Display for FloatCC {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        use self::FloatCC::*;
+        f.write_str(match *self {
+            Ordered => "ord",
+            Unordered => "uno",
+            Equal => "eq",
+            NotEqual => "ne",
+            OrderedNotEqual => "one",
+            UnorderedOrEqual => "ueq",
+            LessThan => "lt",
+            LessThanOrEqual => "le",
+            GreaterThan => "gt",
+            GreaterThanOrEqual => "ge",
+            UnorderedOrLessThan => "ult",
+            UnorderedOrLessThanOrEqual => "ule",
+            UnorderedOrGreaterThan => "ugt",
+            UnorderedOrGreaterThanOrEqual => "uge",
+        })
+    }
+}
+
+impl FromStr for FloatCC {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        use self::FloatCC::*;
+        match s {
+            "ord" => Ok(Ordered),
+            "uno" => Ok(Unordered),
+            "eq" => Ok(Equal),
+            "ne" => Ok(NotEqual),
+            "one" => Ok(OrderedNotEqual),
+            "ueq" => Ok(UnorderedOrEqual),
+            "lt" => Ok(LessThan),
+            "le" => Ok(LessThanOrEqual),
+            "gt" => Ok(GreaterThan),
+            "ge" => Ok(GreaterThanOrEqual),
+            "ult" => Ok(UnorderedOrLessThan),
+            "ule" => Ok(UnorderedOrLessThanOrEqual),
+            "ugt" => Ok(UnorderedOrGreaterThan),
+            "uge" => Ok(UnorderedOrGreaterThanOrEqual),
+            _ => Err(()),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::string::ToString;
+
+    static INT_ALL: [IntCC; 10] = [
+        IntCC::Equal,
+        IntCC::NotEqual,
+        IntCC::SignedLessThan,
+        IntCC::SignedGreaterThanOrEqual,
+        IntCC::SignedGreaterThan,
+        IntCC::SignedLessThanOrEqual,
+        IntCC::UnsignedLessThan,
+        IntCC::UnsignedGreaterThanOrEqual,
+        IntCC::UnsignedGreaterThan,
+        IntCC::UnsignedLessThanOrEqual,
+    ];
+
+    #[test]
+    fn int_inverse() {
+        for r in &INT_ALL {
+            let cc = *r;
+            let inv = cc.inverse();
+            assert!(cc != inv);
+            assert_eq!(inv.inverse(), cc);
+        }
+    }
+
+    #[test]
+    fn int_reverse() {
+        for r in &INT_ALL {
+            let cc = *r;
+            let rev = cc.reverse();
+            assert_eq!(rev.reverse(), cc);
+        }
+    }
+
+    #[test]
+    fn int_display() {
+        for r in &INT_ALL {
+            let cc = *r;
+            assert_eq!(cc.to_string().parse(), Ok(cc));
+        }
+        assert_eq!("bogus".parse::<IntCC>(), Err(()));
+    }
+
+    static FLOAT_ALL: [FloatCC; 14] = [
+        FloatCC::Ordered,
+        FloatCC::Unordered,
+        FloatCC::Equal,
+        FloatCC::NotEqual,
+        FloatCC::OrderedNotEqual,
+        FloatCC::UnorderedOrEqual,
+        FloatCC::LessThan,
+        FloatCC::LessThanOrEqual,
+        FloatCC::GreaterThan,
+        FloatCC::GreaterThanOrEqual,
+        FloatCC::UnorderedOrLessThan,
+        FloatCC::UnorderedOrLessThanOrEqual,
+        FloatCC::UnorderedOrGreaterThan,
+        FloatCC::UnorderedOrGreaterThanOrEqual,
+    ];
+
+    #[test]
+    fn float_inverse() {
+        for r in &FLOAT_ALL {
+            let cc = *r;
+            let inv = cc.inverse();
+            assert!(cc != inv);
+            assert_eq!(inv.inverse(), cc);
+        }
+    }
+
+    #[test]
+    fn float_reverse() {
+        for r in &FLOAT_ALL {
+            let cc = *r;
+            let rev = cc.reverse();
+            assert_eq!(rev.reverse(), cc);
+        }
+    }
+
+    #[test]
+    fn float_display() {
+        for r in &FLOAT_ALL {
+            let cc = *r;
+            assert_eq!(cc.to_string().parse(), Ok(cc));
+        }
+        assert_eq!("bogus".parse::<FloatCC>(), Err(()));
+    }
+}
--- a/lib/codegen/src/ir/dfg.rs
+++ b/lib/codegen/src/ir/dfg.rs
--- a/lib/codegen/src/ir/entities.rs
+++ b/lib/codegen/src/ir/entities.rs
@@ -0,0 +1,286 @@
+//! Cretonne IR entity references.
+//!
+//! Instructions in Cretonne IR need to reference other entities in the function. This can be other
+//! parts of the function like extended basic blocks or stack slots, or it can be external entities
+//! that are declared in the function preamble in the text format.
+//!
+//! These entity references in instruction operands are not implemented as Rust references both
+//! because Rust's ownership and mutability rules make it difficult, and because 64-bit pointers
+//! take up a lot of space, and we want a compact in-memory representation. Instead, entity
+//! references are structs wrapping a `u32` index into a table in the `Function` main data
+//! structure. There is a separate index type for each entity type, so we don't lose type safety.
+//!
+//! The `entities` module defines public types for the entity references along with constants
+//! representing an invalid reference. We prefer to use `Option<EntityRef>` whenever possible, but
+//! unfortunately that type is twice as large as the 32-bit index type on its own. Thus, compact
+//! data structures use the `PackedOption<EntityRef>` representation, while function arguments and
+//! return values prefer the more Rust-like `Option<EntityRef>` variant.
+//!
+//! The entity references all implement the `Display` trait in a way that matches the textual IR
+//! format.
+
+use std::fmt;
+use std::u32;
+
+/// An opaque reference to an extended basic block in a function.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct Ebb(u32);
+entity_impl!(Ebb, "ebb");
+
+impl Ebb {
+    /// Create a new EBB reference from its number. This corresponds to the `ebbNN` representation.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Ebb> {
+        if n < u32::MAX { Some(Ebb(n)) } else { None }
+    }
+}
+
+/// An opaque reference to an SSA value.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct Value(u32);
+entity_impl!(Value, "v");
+
+impl Value {
+    /// Create a value from its number representation.
+    /// This is the number in the `vNN` notation.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Value> {
+        if n < u32::MAX / 2 {
+            Some(Value(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// An opaque reference to an instruction in a function.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct Inst(u32);
+entity_impl!(Inst, "inst");
+
+/// An opaque reference to a stack slot.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct StackSlot(u32);
+entity_impl!(StackSlot, "ss");
+
+impl StackSlot {
+    /// Create a new stack slot reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<StackSlot> {
+        if n < u32::MAX {
+            Some(StackSlot(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// An opaque reference to a global variable.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct GlobalVar(u32);
+entity_impl!(GlobalVar, "gv");
+
+impl GlobalVar {
+    /// Create a new global variable reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<GlobalVar> {
+        if n < u32::MAX {
+            Some(GlobalVar(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// An opaque reference to a jump table.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct JumpTable(u32);
+entity_impl!(JumpTable, "jt");
+
+impl JumpTable {
+    /// Create a new jump table reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<JumpTable> {
+        if n < u32::MAX {
+            Some(JumpTable(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// A reference to an external function.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct FuncRef(u32);
+entity_impl!(FuncRef, "fn");
+
+impl FuncRef {
+    /// Create a new external function reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<FuncRef> {
+        if n < u32::MAX { Some(FuncRef(n)) } else { None }
+    }
+}
+
+/// A reference to a function signature.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct SigRef(u32);
+entity_impl!(SigRef, "sig");
+
+impl SigRef {
+    /// Create a new function signature reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<SigRef> {
+        if n < u32::MAX { Some(SigRef(n)) } else { None }
+    }
+}
+
+/// A reference to a heap.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Heap(u32);
+entity_impl!(Heap, "heap");
+
+impl Heap {
+    /// Create a new heap reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Heap> {
+        if n < u32::MAX { Some(Heap(n)) } else { None }
+    }
+}
+
+/// A reference to any of the entities defined in this module.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub enum AnyEntity {
+    /// The whole function.
+    Function,
+    /// An extended basic block.
+    Ebb(Ebb),
+    /// An instruction.
+    Inst(Inst),
+    /// An SSA value.
+    Value(Value),
+    /// A stack slot.
+    StackSlot(StackSlot),
+    /// A Global variable.
+    GlobalVar(GlobalVar),
+    /// A jump table.
+    JumpTable(JumpTable),
+    /// An external function.
+    FuncRef(FuncRef),
+    /// A function call signature.
+    SigRef(SigRef),
+    /// A heap.
+    Heap(Heap),
+}
+
+impl fmt::Display for AnyEntity {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            AnyEntity::Function => write!(f, "function"),
+            AnyEntity::Ebb(r) => r.fmt(f),
+            AnyEntity::Inst(r) => r.fmt(f),
+            AnyEntity::Value(r) => r.fmt(f),
+            AnyEntity::StackSlot(r) => r.fmt(f),
+            AnyEntity::GlobalVar(r) => r.fmt(f),
+            AnyEntity::JumpTable(r) => r.fmt(f),
+            AnyEntity::FuncRef(r) => r.fmt(f),
+            AnyEntity::SigRef(r) => r.fmt(f),
+            AnyEntity::Heap(r) => r.fmt(f),
+        }
+    }
+}
+
+impl fmt::Debug for AnyEntity {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        (self as &fmt::Display).fmt(f)
+    }
+}
+
+impl From<Ebb> for AnyEntity {
+    fn from(r: Ebb) -> AnyEntity {
+        AnyEntity::Ebb(r)
+    }
+}
+
+impl From<Inst> for AnyEntity {
+    fn from(r: Inst) -> AnyEntity {
+        AnyEntity::Inst(r)
+    }
+}
+
+impl From<Value> for AnyEntity {
+    fn from(r: Value) -> AnyEntity {
+        AnyEntity::Value(r)
+    }
+}
+
+impl From<StackSlot> for AnyEntity {
+    fn from(r: StackSlot) -> AnyEntity {
+        AnyEntity::StackSlot(r)
+    }
+}
+
+impl From<GlobalVar> for AnyEntity {
+    fn from(r: GlobalVar) -> AnyEntity {
+        AnyEntity::GlobalVar(r)
+    }
+}
+
+impl From<JumpTable> for AnyEntity {
+    fn from(r: JumpTable) -> AnyEntity {
+        AnyEntity::JumpTable(r)
+    }
+}
+
+impl From<FuncRef> for AnyEntity {
+    fn from(r: FuncRef) -> AnyEntity {
+        AnyEntity::FuncRef(r)
+    }
+}
+
+impl From<SigRef> for AnyEntity {
+    fn from(r: SigRef) -> AnyEntity {
+        AnyEntity::SigRef(r)
+    }
+}
+
+impl From<Heap> for AnyEntity {
+    fn from(r: Heap) -> AnyEntity {
+        AnyEntity::Heap(r)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::string::ToString;
+    use std::u32;
+
+    #[test]
+    fn value_with_number() {
+        assert_eq!(Value::with_number(0).unwrap().to_string(), "v0");
+        assert_eq!(Value::with_number(1).unwrap().to_string(), "v1");
+
+        assert_eq!(Value::with_number(u32::MAX / 2), None);
+        assert!(Value::with_number(u32::MAX / 2 - 1).is_some());
+    }
+
+    #[test]
+    fn memory() {
+        use packed_option::PackedOption;
+        use std::mem;
+        // This is the whole point of `PackedOption`.
+        assert_eq!(
+            mem::size_of::<Value>(),
+            mem::size_of::<PackedOption<Value>>()
+        );
+    }
+}
--- a/lib/codegen/src/ir/extfunc.rs
+++ b/lib/codegen/src/ir/extfunc.rs
@@ -0,0 +1,456 @@
+//! External function calls.
+//!
+//! To a Cretonne function, all functions are "external". Directly called functions must be
+//! declared in the preamble, and all function calls must have a signature.
+//!
+//! This module declares the data types used to represent external functions and call signatures.
+
+use ir::{ArgumentLoc, ExternalName, SigRef, Type};
+use isa::{RegInfo, RegUnit};
+use std::cmp;
+use std::fmt;
+use std::str::FromStr;
+use std::vec::Vec;
+
+/// Function signature.
+///
+/// The function signature describes the types of formal parameters and return values along with
+/// other details that are needed to call a function correctly.
+///
+/// A signature can optionally include ISA-specific ABI information which specifies exactly how
+/// arguments and return values are passed.
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub struct Signature {
+    /// The arguments passed to the function.
+    pub params: Vec<AbiParam>,
+    /// Values returned from the function.
+    pub returns: Vec<AbiParam>,
+
+    /// Calling convention.
+    pub call_conv: CallConv,
+
+    /// When the signature has been legalized to a specific ISA, this holds the size of the
+    /// argument array on the stack. Before legalization, this is `None`.
+    ///
+    /// This can be computed from the legalized `params` array as the maximum (offset plus
+    /// byte size) of the `ArgumentLoc::Stack(offset)` argument.
+    pub argument_bytes: Option<u32>,
+}
+
+impl Signature {
+    /// Create a new blank signature.
+    pub fn new(call_conv: CallConv) -> Self {
+        Self {
+            params: Vec::new(),
+            returns: Vec::new(),
+            call_conv,
+            argument_bytes: None,
+        }
+    }
+
+    /// Clear the signature so it is identical to a fresh one returned by `new()`.
+    pub fn clear(&mut self, call_conv: CallConv) {
+        self.params.clear();
+        self.returns.clear();
+        self.call_conv = call_conv;
+        self.argument_bytes = None;
+    }
+
+    /// Compute the size of the stack arguments and mark signature as legalized.
+    ///
+    /// Even if there are no stack arguments, this will set `params` to `Some(0)` instead
+    /// of `None`. This indicates that the signature has been legalized.
+    pub fn compute_argument_bytes(&mut self) {
+        let bytes = self.params
+            .iter()
+            .filter_map(|arg| match arg.location {
+                ArgumentLoc::Stack(offset) if offset >= 0 => {
+                    Some(offset as u32 + arg.value_type.bytes())
+                }
+                _ => None,
+            })
+            .fold(0, cmp::max);
+        self.argument_bytes = Some(bytes);
+    }
+
+    /// Return an object that can display `self` with correct register names.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplaySignature<'a> {
+        DisplaySignature(self, regs.into())
+    }
+
+    /// Find the index of a presumed unique special-purpose parameter.
+    pub fn special_param_index(&self, purpose: ArgumentPurpose) -> Option<usize> {
+        self.params.iter().rposition(|arg| arg.purpose == purpose)
+    }
+}
+
+/// Wrapper type capable of displaying a `Signature` with correct register names.
+pub struct DisplaySignature<'a>(&'a Signature, Option<&'a RegInfo>);
+
+fn write_list(f: &mut fmt::Formatter, args: &[AbiParam], regs: Option<&RegInfo>) -> fmt::Result {
+    match args.split_first() {
+        None => {}
+        Some((first, rest)) => {
+            write!(f, "{}", first.display(regs))?;
+            for arg in rest {
+                write!(f, ", {}", arg.display(regs))?;
+            }
+        }
+    }
+    Ok(())
+}
+
+impl<'a> fmt::Display for DisplaySignature<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "(")?;
+        write_list(f, &self.0.params, self.1)?;
+        write!(f, ")")?;
+        if !self.0.returns.is_empty() {
+            write!(f, " -> ")?;
+            write_list(f, &self.0.returns, self.1)?;
+        }
+        write!(f, " {}", self.0.call_conv)
+    }
+}
+
+impl fmt::Display for Signature {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.display(None).fmt(f)
+    }
+}
+
+/// Function parameter or return value descriptor.
+///
+/// This describes the value type being passed to or from a function along with flags that affect
+/// how the argument is passed.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub struct AbiParam {
+    /// Type of the argument value.
+    pub value_type: Type,
+    /// Special purpose of argument, or `Normal`.
+    pub purpose: ArgumentPurpose,
+    /// Method for extending argument to a full register.
+    pub extension: ArgumentExtension,
+
+    /// ABI-specific location of this argument, or `Unassigned` for arguments that have not yet
+    /// been legalized.
+    pub location: ArgumentLoc,
+}
+
+impl AbiParam {
+    /// Create a parameter with default flags.
+    pub fn new(vt: Type) -> Self {
+        Self {
+            value_type: vt,
+            extension: ArgumentExtension::None,
+            purpose: ArgumentPurpose::Normal,
+            location: Default::default(),
+        }
+    }
+
+    /// Create a special-purpose parameter that is not (yet) bound to a specific register.
+    pub fn special(vt: Type, purpose: ArgumentPurpose) -> Self {
+        Self {
+            value_type: vt,
+            extension: ArgumentExtension::None,
+            purpose,
+            location: Default::default(),
+        }
+    }
+
+    /// Create a parameter for a special-purpose register.
+    pub fn special_reg(vt: Type, purpose: ArgumentPurpose, regunit: RegUnit) -> Self {
+        Self {
+            value_type: vt,
+            extension: ArgumentExtension::None,
+            purpose,
+            location: ArgumentLoc::Reg(regunit),
+        }
+    }
+
+    /// Convert `self` to a parameter with the `uext` flag set.
+    pub fn uext(self) -> Self {
+        debug_assert!(self.value_type.is_int(), "uext on {} arg", self.value_type);
+        Self {
+            extension: ArgumentExtension::Uext,
+            ..self
+        }
+    }
+
+    /// Convert `self` to a parameter type with the `sext` flag set.
+    pub fn sext(self) -> Self {
+        debug_assert!(self.value_type.is_int(), "sext on {} arg", self.value_type);
+        Self {
+            extension: ArgumentExtension::Sext,
+            ..self
+        }
+    }
+
+    /// Return an object that can display `self` with correct register names.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayAbiParam<'a> {
+        DisplayAbiParam(self, regs.into())
+    }
+}
+
+/// Wrapper type capable of displaying a `AbiParam` with correct register names.
+pub struct DisplayAbiParam<'a>(&'a AbiParam, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayAbiParam<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", self.0.value_type)?;
+        match self.0.extension {
+            ArgumentExtension::None => {}
+            ArgumentExtension::Uext => write!(f, " uext")?,
+            ArgumentExtension::Sext => write!(f, " sext")?,
+        }
+        if self.0.purpose != ArgumentPurpose::Normal {
+            write!(f, " {}", self.0.purpose)?;
+        }
+
+        if self.0.location.is_assigned() {
+            write!(f, " [{}]", self.0.location.display(self.1))?;
+        }
+
+        Ok(())
+    }
+}
+
+impl fmt::Display for AbiParam {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.display(None).fmt(f)
+    }
+}
+
+/// Function argument extension options.
+///
+/// On some architectures, small integer function arguments are extended to the width of a
+/// general-purpose register.
+#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+pub enum ArgumentExtension {
+    /// No extension, high bits are indeterminate.
+    None,
+    /// Unsigned extension: high bits in register are 0.
+    Uext,
+    /// Signed extension: high bits in register replicate sign bit.
+    Sext,
+}
+
+/// The special purpose of a function argument.
+///
+/// Function arguments and return values are used to pass user program values between functions,
+/// but they are also used to represent special registers with significance to the ABI such as
+/// frame pointers and callee-saved registers.
+///
+/// The argument purpose is used to indicate any special meaning of an argument or return value.
+#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+pub enum ArgumentPurpose {
+    /// A normal user program value passed to or from a function.
+    Normal,
+
+    /// Struct return pointer.
+    ///
+    /// When a function needs to return more data than will fit in registers, the caller passes a
+    /// pointer to a memory location where the return value can be written. In some ABIs, this
+    /// struct return pointer is passed in a specific register.
+    ///
+    /// This argument kind can also appear as a return value for ABIs that require a function with
+    /// a `StructReturn` pointer argument to also return that pointer in a register.
+    StructReturn,
+
+    /// The link register.
+    ///
+    /// Most RISC architectures implement calls by saving the return address in a designated
+    /// register rather than pushing it on the stack. This is represented with a `Link` argument.
+    ///
+    /// Similarly, some return instructions expect the return address in a register represented as
+    /// a `Link` return value.
+    Link,
+
+    /// The frame pointer.
+    ///
+    /// This indicates the frame pointer register which has a special meaning in some ABIs.
+    ///
+    /// The frame pointer appears as an argument and as a return value since it is a callee-saved
+    /// register.
+    FramePointer,
+
+    /// A callee-saved register.
+    ///
+    /// Some calling conventions have registers that must be saved by the callee. These registers
+    /// are represented as `CalleeSaved` arguments and return values.
+    CalleeSaved,
+
+    /// A VM context pointer.
+    ///
+    /// This is a pointer to a context struct containing details about the current sandbox. It is
+    /// used as a base pointer for `vmctx` global variables.
+    VMContext,
+
+    /// A signature identifier.
+    ///
+    /// This is a special-purpose argument used to identify the calling convention expected by the
+    /// caller in an indirect call. The callee can verify that the expected signature ID matches.
+    SignatureId,
+}
+
+/// Text format names of the `ArgumentPurpose` variants.
+static PURPOSE_NAMES: [&str; 7] = ["normal", "sret", "link", "fp", "csr", "vmctx", "sigid"];
+
+impl fmt::Display for ArgumentPurpose {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(PURPOSE_NAMES[*self as usize])
+    }
+}
+
+impl FromStr for ArgumentPurpose {
+    type Err = ();
+    fn from_str(s: &str) -> Result<ArgumentPurpose, ()> {
+        match s {
+            "normal" => Ok(ArgumentPurpose::Normal),
+            "sret" => Ok(ArgumentPurpose::StructReturn),
+            "link" => Ok(ArgumentPurpose::Link),
+            "fp" => Ok(ArgumentPurpose::FramePointer),
+            "csr" => Ok(ArgumentPurpose::CalleeSaved),
+            "vmctx" => Ok(ArgumentPurpose::VMContext),
+            "sigid" => Ok(ArgumentPurpose::SignatureId),
+            _ => Err(()),
+        }
+    }
+}
+
+/// An external function.
+///
+/// Information about a function that can be called directly with a direct `call` instruction.
+#[derive(Clone, Debug)]
+pub struct ExtFuncData {
+    /// Name of the external function.
+    pub name: ExternalName,
+    /// Call signature of function.
+    pub signature: SigRef,
+    /// Will this function be defined nearby, such that it will always be a certain distance away,
+    /// after linking? If so, references to it can avoid going through a GOT or PLT. Note that
+    /// symbols meant to be preemptible cannot be considered colocated.
+    pub colocated: bool,
+}
+
+impl fmt::Display for ExtFuncData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.colocated {
+            write!(f, "colocated ")?;
+        }
+        write!(f, "{} {}", self.name, self.signature)
+    }
+}
+
+/// A Calling convention.
+///
+/// A function's calling convention determines exactly how arguments and return values are passed,
+/// and how stack frames are managed. Since all of these details depend on both the instruction set
+/// architecture and possibly the operating system, a function's calling convention is only fully
+/// determined by a `(TargetIsa, CallConv)` tuple.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum CallConv {
+    /// The System V-style calling convention.
+    ///
+    /// This is the System V-style calling convention that a C compiler would
+    /// use on many platforms.
+    SystemV,
+
+    /// A JIT-compiled WebAssembly function in the SpiderMonkey VM.
+    SpiderWASM,
+}
+
+impl fmt::Display for CallConv {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use self::CallConv::*;
+        f.write_str(match *self {
+            SystemV => "system_v",
+            SpiderWASM => "spiderwasm",
+        })
+    }
+}
+
+impl FromStr for CallConv {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        use self::CallConv::*;
+        match s {
+            "system_v" => Ok(SystemV),
+            "spiderwasm" => Ok(SpiderWASM),
+            _ => Err(()),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use ir::types::{B8, F32, I32};
+    use std::string::ToString;
+
+    #[test]
+    fn argument_type() {
+        let t = AbiParam::new(I32);
+        assert_eq!(t.to_string(), "i32");
+        let mut t = t.uext();
+        assert_eq!(t.to_string(), "i32 uext");
+        assert_eq!(t.sext().to_string(), "i32 sext");
+        t.purpose = ArgumentPurpose::StructReturn;
+        assert_eq!(t.to_string(), "i32 uext sret");
+    }
+
+    #[test]
+    fn argument_purpose() {
+        let all_purpose = [
+            ArgumentPurpose::Normal,
+            ArgumentPurpose::StructReturn,
+            ArgumentPurpose::Link,
+            ArgumentPurpose::FramePointer,
+            ArgumentPurpose::CalleeSaved,
+            ArgumentPurpose::VMContext,
+        ];
+        for (&e, &n) in all_purpose.iter().zip(PURPOSE_NAMES.iter()) {
+            assert_eq!(e.to_string(), n);
+            assert_eq!(Ok(e), n.parse());
+        }
+    }
+
+    #[test]
+    fn call_conv() {
+        for &cc in &[CallConv::SystemV, CallConv::SpiderWASM] {
+            assert_eq!(Ok(cc), cc.to_string().parse())
+        }
+    }
+
+    #[test]
+    fn signatures() {
+        let mut sig = Signature::new(CallConv::SpiderWASM);
+        assert_eq!(sig.to_string(), "() spiderwasm");
+        sig.params.push(AbiParam::new(I32));
+        assert_eq!(sig.to_string(), "(i32) spiderwasm");
+        sig.returns.push(AbiParam::new(F32));
+        assert_eq!(sig.to_string(), "(i32) -> f32 spiderwasm");
+        sig.params.push(AbiParam::new(I32.by(4).unwrap()));
+        assert_eq!(sig.to_string(), "(i32, i32x4) -> f32 spiderwasm");
+        sig.returns.push(AbiParam::new(B8));
+        assert_eq!(sig.to_string(), "(i32, i32x4) -> f32, b8 spiderwasm");
+
+        // Test the offset computation algorithm.
+        assert_eq!(sig.argument_bytes, None);
+        sig.params[1].location = ArgumentLoc::Stack(8);
+        sig.compute_argument_bytes();
+        // An `i32x4` at offset 8 requires a 24-byte argument array.
+        assert_eq!(sig.argument_bytes, Some(24));
+        // Order does not matter.
+        sig.params[0].location = ArgumentLoc::Stack(24);
+        sig.compute_argument_bytes();
+        assert_eq!(sig.argument_bytes, Some(28));
+
+        // Writing ABI-annotated signatures.
+        assert_eq!(
+            sig.to_string(),
+            "(i32 [24], i32x4 [8]) -> f32, b8 spiderwasm"
+        );
+    }
+}
--- a/lib/codegen/src/ir/extname.rs
+++ b/lib/codegen/src/ir/extname.rs
@@ -0,0 +1,165 @@
+//! External names.
+//!
+//! These are identifiers for declaring entities defined outside the current
+//! function. The name of an external declaration doesn't have any meaning to
+//! Cretonne, which compiles functions independently.
+
+use ir::LibCall;
+use std::cmp;
+use std::fmt::{self, Write};
+use std::str::FromStr;
+
+const TESTCASE_NAME_LENGTH: usize = 16;
+
+/// The name of an external is either a reference to a user-defined symbol
+/// table, or a short sequence of ascii bytes so that test cases do not have
+/// to keep track of a sy mbol table.
+///
+/// External names are primarily used as keys by code using Cretonne to map
+/// from a `cretonne_codegen::ir::FuncRef` or similar to additional associated
+/// data.
+///
+/// External names can also serve as a primitive testing and debugging tool.
+/// In particular, many `.cton` test files use function names to identify
+/// functions.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum ExternalName {
+    /// A name in a user-defined symbol table. Cretonne does not interpret
+    /// these numbers in any way.
+    User {
+        /// Arbitrary.
+        namespace: u32,
+        /// Arbitrary.
+        index: u32,
+    },
+    /// A test case function name of up to 10 ascii characters. This is
+    /// not intended to be used outside test cases.
+    TestCase {
+        /// How many of the bytes in `ascii` are valid?
+        length: u8,
+        /// Ascii bytes of the name.
+        ascii: [u8; TESTCASE_NAME_LENGTH],
+    },
+    /// A well-known runtime library function.
+    LibCall(LibCall),
+}
+
+impl ExternalName {
+    /// Creates a new external name from a sequence of bytes. Caller is expected
+    /// to guarantee bytes are only ascii alphanumeric or `_`.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use cretonne_codegen::ir::ExternalName;
+    /// // Create `ExternalName` from a string.
+    /// let name = ExternalName::testcase("hello");
+    /// assert_eq!(name.to_string(), "%hello");
+    /// ```
+    pub fn testcase<T: AsRef<[u8]>>(v: T) -> ExternalName {
+        let vec = v.as_ref();
+        let len = cmp::min(vec.len(), TESTCASE_NAME_LENGTH);
+        let mut bytes = [0u8; TESTCASE_NAME_LENGTH];
+        bytes[0..len].copy_from_slice(&vec[0..len]);
+
+        ExternalName::TestCase {
+            length: len as u8,
+            ascii: bytes,
+        }
+    }
+
+    /// Create a new external name from user-provided integer indicies.
+    ///
+    /// # Examples
+    /// ```rust
+    /// # use cretonne_codegen::ir::ExternalName;
+    /// // Create `ExternalName` from integer indicies
+    /// let name = ExternalName::user(123, 456);
+    /// assert_eq!(name.to_string(), "u123:456");
+    /// ```
+    pub fn user(namespace: u32, index: u32) -> ExternalName {
+        ExternalName::User {
+            namespace: namespace,
+            index: index,
+        }
+    }
+}
+
+impl Default for ExternalName {
+    fn default() -> ExternalName {
+        ExternalName::user(0, 0)
+    }
+}
+
+impl fmt::Display for ExternalName {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            ExternalName::User { namespace, index } => write!(f, "u{}:{}", namespace, index),
+            ExternalName::TestCase { length, ascii } => {
+                f.write_char('%')?;
+                for byte in ascii.iter().take(length as usize) {
+                    f.write_char(*byte as char)?;
+                }
+                Ok(())
+            }
+            ExternalName::LibCall(lc) => write!(f, "%{}", lc),
+        }
+    }
+}
+
+impl FromStr for ExternalName {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        // Try to parse as a libcall name, otherwise it's a test case.
+        match s.parse() {
+            Ok(lc) => Ok(ExternalName::LibCall(lc)),
+            Err(_) => Ok(ExternalName::testcase(s.as_bytes())),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::ExternalName;
+    use ir::LibCall;
+    use std::string::ToString;
+
+    #[test]
+    fn display_testcase() {
+        assert_eq!(ExternalName::testcase("").to_string(), "%");
+        assert_eq!(ExternalName::testcase("x").to_string(), "%x");
+        assert_eq!(ExternalName::testcase("x_1").to_string(), "%x_1");
+        assert_eq!(
+            ExternalName::testcase("longname12345678").to_string(),
+            "%longname12345678"
+        );
+        // Constructor will silently drop bytes beyond the 16th
+        assert_eq!(
+            ExternalName::testcase("longname123456789").to_string(),
+            "%longname12345678"
+        );
+    }
+
+    #[test]
+    fn display_user() {
+        assert_eq!(ExternalName::user(0, 0).to_string(), "u0:0");
+        assert_eq!(ExternalName::user(1, 1).to_string(), "u1:1");
+        assert_eq!(
+            ExternalName::user(::std::u32::MAX, ::std::u32::MAX).to_string(),
+            "u4294967295:4294967295"
+        );
+    }
+
+    #[test]
+    fn parsing() {
+        assert_eq!(
+            "FloorF32".parse(),
+            Ok(ExternalName::LibCall(LibCall::FloorF32))
+        );
+        assert_eq!(
+            ExternalName::LibCall(LibCall::FloorF32).to_string(),
+            "%FloorF32"
+        );
+    }
+}
--- a/lib/codegen/src/ir/function.rs
+++ b/lib/codegen/src/ir/function.rs
@@ -0,0 +1,232 @@
+//! Intermediate representation of a function.
+//!
+//! The `Function` struct defined in this module owns all of its extended basic blocks and
+//! instructions.
+
+use binemit::CodeOffset;
+use entity::{EntityMap, PrimaryMap};
+use ir;
+use ir::{CallConv, DataFlowGraph, ExternalName, Layout, Signature};
+use ir::{Ebb, ExtFuncData, FuncRef, GlobalVar, GlobalVarData, Heap, HeapData, JumpTable,
+         JumpTableData, SigRef, StackSlot, StackSlotData};
+use ir::{EbbOffsets, InstEncodings, JumpTables, SourceLocs, StackSlots, ValueLocations};
+use isa::{EncInfo, Legalize, TargetIsa, Encoding};
+use std::fmt;
+use write::write_function;
+
+/// A function.
+///
+/// Functions can be cloned, but it is not a very fast operation.
+/// The clone will have all the same entity numbers as the original.
+#[derive(Clone)]
+pub struct Function {
+    /// Name of this function. Mostly used by `.cton` files.
+    pub name: ExternalName,
+
+    /// Signature of this function.
+    pub signature: Signature,
+
+    /// Stack slots allocated in this function.
+    pub stack_slots: StackSlots,
+
+    /// Global variables referenced.
+    pub global_vars: PrimaryMap<ir::GlobalVar, ir::GlobalVarData>,
+
+    /// Heaps referenced.
+    pub heaps: PrimaryMap<ir::Heap, ir::HeapData>,
+
+    /// Jump tables used in this function.
+    pub jump_tables: JumpTables,
+
+    /// Data flow graph containing the primary definition of all instructions, EBBs and values.
+    pub dfg: DataFlowGraph,
+
+    /// Layout of EBBs and instructions in the function body.
+    pub layout: Layout,
+
+    /// Encoding recipe and bits for the legal instructions.
+    /// Illegal instructions have the `Encoding::default()` value.
+    pub encodings: InstEncodings,
+
+    /// Location assigned to every value.
+    pub locations: ValueLocations,
+
+    /// Code offsets of the EBB headers.
+    ///
+    /// This information is only transiently available after the `binemit::relax_branches` function
+    /// computes it, and it can easily be recomputed by calling that function. It is not included
+    /// in the textual IR format.
+    pub offsets: EbbOffsets,
+
+    /// Source locations.
+    ///
+    /// Track the original source location for each instruction. The source locations are not
+    /// interpreted by Cretonne, only preserved.
+    pub srclocs: SourceLocs,
+}
+
+impl Function {
+    /// Create a function with the given name and signature.
+    pub fn with_name_signature(name: ExternalName, sig: Signature) -> Self {
+        Self {
+            name,
+            signature: sig,
+            stack_slots: StackSlots::new(),
+            global_vars: PrimaryMap::new(),
+            heaps: PrimaryMap::new(),
+            jump_tables: PrimaryMap::new(),
+            dfg: DataFlowGraph::new(),
+            layout: Layout::new(),
+            encodings: EntityMap::new(),
+            locations: EntityMap::new(),
+            offsets: EntityMap::new(),
+            srclocs: EntityMap::new(),
+        }
+    }
+
+    /// Clear all data structures in this function.
+    pub fn clear(&mut self) {
+        self.signature.clear(ir::CallConv::SystemV);
+        self.stack_slots.clear();
+        self.global_vars.clear();
+        self.heaps.clear();
+        self.jump_tables.clear();
+        self.dfg.clear();
+        self.layout.clear();
+        self.encodings.clear();
+        self.locations.clear();
+        self.offsets.clear();
+        self.srclocs.clear();
+    }
+
+    /// Create a new empty, anonymous function with a SystemV calling convention.
+    pub fn new() -> Self {
+        Self::with_name_signature(ExternalName::default(), Signature::new(CallConv::SystemV))
+    }
+
+    /// Creates a jump table in the function, to be used by `br_table` instructions.
+    pub fn create_jump_table(&mut self, data: JumpTableData) -> JumpTable {
+        self.jump_tables.push(data)
+    }
+
+    /// Inserts an entry in a previously declared jump table.
+    pub fn insert_jump_table_entry(&mut self, jt: JumpTable, index: usize, ebb: Ebb) {
+        self.jump_tables[jt].set_entry(index, ebb);
+    }
+
+    /// Creates a stack slot in the function, to be used by `stack_load`, `stack_store` and
+    /// `stack_addr` instructions.
+    pub fn create_stack_slot(&mut self, data: StackSlotData) -> StackSlot {
+        self.stack_slots.push(data)
+    }
+
+    /// Adds a signature which can later be used to declare an external function import.
+    pub fn import_signature(&mut self, signature: Signature) -> SigRef {
+        self.dfg.signatures.push(signature)
+    }
+
+    /// Declare an external function import.
+    pub fn import_function(&mut self, data: ExtFuncData) -> FuncRef {
+        self.dfg.ext_funcs.push(data)
+    }
+
+    /// Declares a global variable accessible to the function.
+    pub fn create_global_var(&mut self, data: GlobalVarData) -> GlobalVar {
+        self.global_vars.push(data)
+    }
+
+    /// Declares a heap accessible to the function.
+    pub fn create_heap(&mut self, data: HeapData) -> Heap {
+        self.heaps.push(data)
+    }
+
+    /// Return an object that can display this function with correct ISA-specific annotations.
+    pub fn display<'a, I: Into<Option<&'a TargetIsa>>>(&'a self, isa: I) -> DisplayFunction<'a> {
+        DisplayFunction(self, isa.into())
+    }
+
+    /// Find a presumed unique special-purpose function parameter value.
+    ///
+    /// Returns the value of the last `purpose` parameter, or `None` if no such parameter exists.
+    pub fn special_param(&self, purpose: ir::ArgumentPurpose) -> Option<ir::Value> {
+        let entry = self.layout.entry_block().expect("Function is empty");
+        self.signature.special_param_index(purpose).map(|i| {
+            self.dfg.ebb_params(entry)[i]
+        })
+    }
+
+    /// Get an iterator over the instructions in `ebb`, including offsets and encoded instruction
+    /// sizes.
+    ///
+    /// The iterator returns `(offset, inst, size)` tuples, where `offset` if the offset in bytes
+    /// from the beginning of the function to the instruction, and `size` is the size of the
+    /// instruction in bytes, or 0 for unencoded instructions.
+    ///
+    /// This function can only be used after the code layout has been computed by the
+    /// `binemit::relax_branches()` function.
+    pub fn inst_offsets<'a>(&'a self, ebb: Ebb, encinfo: &EncInfo) -> InstOffsetIter<'a> {
+        assert!(
+            !self.offsets.is_empty(),
+            "Code layout must be computed first"
+        );
+        InstOffsetIter {
+            encinfo: encinfo.clone(),
+            encodings: &self.encodings,
+            offset: self.offsets[ebb],
+            iter: self.layout.ebb_insts(ebb),
+        }
+    }
+
+    /// Wrapper around `encode` which assigns `inst` the resulting encoding.
+    pub fn update_encoding(&mut self, inst: ir::Inst, isa: &TargetIsa) -> Result<(), Legalize> {
+        self.encode(inst, isa).map(|e| self.encodings[inst] = e)
+    }
+
+    /// Wrapper around `TargetIsa::encode` for encoding an existing instruction
+    /// in the `Function`.
+    pub fn encode(&self, inst: ir::Inst, isa: &TargetIsa) -> Result<Encoding, Legalize> {
+        isa.encode(&self, &self.dfg[inst], self.dfg.ctrl_typevar(inst))
+    }
+}
+
+/// Wrapper type capable of displaying a `Function` with correct ISA annotations.
+pub struct DisplayFunction<'a>(&'a Function, Option<&'a TargetIsa>);
+
+impl<'a> fmt::Display for DisplayFunction<'a> {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        write_function(fmt, self.0, self.1)
+    }
+}
+
+impl fmt::Display for Function {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        write_function(fmt, self, None)
+    }
+}
+
+impl fmt::Debug for Function {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        write_function(fmt, self, None)
+    }
+}
+
+/// Iterator returning instruction offsets and sizes: `(offset, inst, size)`.
+pub struct InstOffsetIter<'a> {
+    encinfo: EncInfo,
+    encodings: &'a InstEncodings,
+    offset: CodeOffset,
+    iter: ir::layout::Insts<'a>,
+}
+
+impl<'a> Iterator for InstOffsetIter<'a> {
+    type Item = (CodeOffset, ir::Inst, CodeOffset);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.iter.next().map(|inst| {
+            let size = self.encinfo.bytes(self.encodings[inst]);
+            let offset = self.offset;
+            self.offset += size;
+            (offset, inst, size)
+        })
+    }
+}
--- a/lib/codegen/src/ir/globalvar.rs
+++ b/lib/codegen/src/ir/globalvar.rs
@@ -0,0 +1,70 @@
+//! Global variables.
+
+use ir::immediates::Offset32;
+use ir::{ExternalName, GlobalVar};
+use std::fmt;
+
+/// Information about a global variable declaration.
+#[derive(Clone)]
+pub enum GlobalVarData {
+    /// Variable is part of the VM context struct, it's address is a constant offset from the VM
+    /// context pointer.
+    VMContext {
+        /// Offset from the `vmctx` pointer to this global.
+        offset: Offset32,
+    },
+
+    /// Variable is part of a struct pointed to by another global variable.
+    ///
+    /// The `base` global variable is assumed to contain a pointer to a struct. This global
+    /// variable lives at an offset into the struct. The memory must be accessible, and
+    /// naturally aligned to hold a pointer value.
+    Deref {
+        /// The base pointer global variable.
+        base: GlobalVar,
+
+        /// Byte offset to be added to the pointer loaded from `base`.
+        offset: Offset32,
+    },
+
+    /// Variable is at an address identified by a symbolic name. Cretonne itself
+    /// does not interpret this name; it's used by embedders to link with other
+    /// data structures.
+    Sym {
+        /// The symbolic name.
+        name: ExternalName,
+
+        /// Will this variable be defined nearby, such that it will always be a certain distance
+        /// away, after linking? If so, references to it can avoid going through a GOT. Note that
+        /// symbols meant to be preemptible cannot be colocated.
+        colocated: bool,
+    },
+}
+
+impl GlobalVarData {
+    /// Assume that `self` is an `GlobalVarData::Sym` and return its name.
+    pub fn symbol_name(&self) -> &ExternalName {
+        match *self {
+            GlobalVarData::Sym { ref name, .. } => name,
+            _ => panic!("only symbols have names"),
+        }
+    }
+}
+
+impl fmt::Display for GlobalVarData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            GlobalVarData::VMContext { offset } => write!(f, "vmctx{}", offset),
+            GlobalVarData::Deref { base, offset } => write!(f, "deref({}){}", base, offset),
+            GlobalVarData::Sym {
+                ref name,
+                colocated,
+            } => {
+                if colocated {
+                    write!(f, "colocated ")?;
+                }
+                write!(f, "globalsym {}", name)
+            }
+        }
+    }
+}
--- a/lib/codegen/src/ir/heap.rs
+++ b/lib/codegen/src/ir/heap.rs
@@ -0,0 +1,74 @@
+//! Heaps.
+
+use ir::GlobalVar;
+use ir::immediates::Imm64;
+use std::fmt;
+
+/// Information about a heap declaration.
+#[derive(Clone)]
+pub struct HeapData {
+    /// Method for determining the heap base address.
+    pub base: HeapBase,
+
+    /// Guaranteed minimum heap size in bytes. Heap accesses before `min_size` don't need bounds
+    /// checking.
+    pub min_size: Imm64,
+
+    /// Size in bytes of the guard pages following the heap.
+    pub guard_size: Imm64,
+
+    /// Heap style, with additional style-specific info.
+    pub style: HeapStyle,
+}
+
+/// Method for determining the base address of a heap.
+#[derive(Clone)]
+pub enum HeapBase {
+    /// The heap base lives in a reserved register.
+    ///
+    /// This feature is not yet implemented.
+    ReservedReg,
+
+    /// The heap base is in a global variable. The variable must be accessible and naturally
+    /// aligned for a pointer.
+    GlobalVar(GlobalVar),
+}
+
+/// Style of heap including style-specific information.
+#[derive(Clone)]
+pub enum HeapStyle {
+    /// A dynamic heap can be relocated to a different base address when it is grown.
+    Dynamic {
+        /// Global variable holding the current bound of the heap in bytes. It is
+        /// required to be accessible and naturally aligned for a pointer-sized integer.
+        bound_gv: GlobalVar,
+    },
+
+    /// A static heap has a fixed base address and a number of not-yet-allocated pages before the
+    /// guard pages.
+    Static {
+        /// Heap bound in bytes. The guard pages are allocated after the bound.
+        bound: Imm64,
+    },
+}
+
+impl fmt::Display for HeapData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(match self.style {
+            HeapStyle::Dynamic { .. } => "dynamic",
+            HeapStyle::Static { .. } => "static",
+        })?;
+
+        match self.base {
+            HeapBase::ReservedReg => write!(f, " reserved_reg")?,
+            HeapBase::GlobalVar(gv) => write!(f, " {}", gv)?,
+        }
+
+        write!(f, ", min {}", self.min_size)?;
+        match self.style {
+            HeapStyle::Dynamic { bound_gv } => write!(f, ", bound {}", bound_gv)?,
+            HeapStyle::Static { bound } => write!(f, ", bound {}", bound)?,
+        }
+        write!(f, ", guard {}", self.guard_size)
+    }
+}
--- a/lib/codegen/src/ir/immediates.rs
+++ b/lib/codegen/src/ir/immediates.rs
--- a/lib/codegen/src/ir/instructions.rs
+++ b/lib/codegen/src/ir/instructions.rs
@@ -0,0 +1,680 @@
+//! Instruction formats and opcodes.
+//!
+//! The `instructions` module contains definitions for instruction formats, opcodes, and the
+//! in-memory representation of IR instructions.
+//!
+//! A large part of this module is auto-generated from the instruction descriptions in the meta
+//! directory.
+
+use std::fmt::{self, Display, Formatter};
+use std::ops::{Deref, DerefMut};
+use std::str::FromStr;
+use std::vec::Vec;
+
+use ir;
+use ir::types;
+use ir::{Ebb, FuncRef, JumpTable, SigRef, Type, Value};
+use isa;
+
+use bitset::BitSet;
+use entity;
+use ref_slice::{ref_slice, ref_slice_mut};
+
+/// Some instructions use an external list of argument values because there is not enough space in
+/// the 16-byte `InstructionData` struct. These value lists are stored in a memory pool in
+/// `dfg.value_lists`.
+pub type ValueList = entity::EntityList<Value>;
+
+/// Memory pool for holding value lists. See `ValueList`.
+pub type ValueListPool = entity::ListPool<Value>;
+
+// Include code generated by `lib/codegen/meta/gen_instr.py`. This file contains:
+//
+// - The `pub enum InstructionFormat` enum with all the instruction formats.
+// - The `pub enum InstructionData` enum with all the instruction data fields.
+// - The `pub enum Opcode` definition with all known opcodes,
+// - The `const OPCODE_FORMAT: [InstructionFormat; N]` table.
+// - The private `fn opcode_name(Opcode) -> &'static str` function, and
+// - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`.
+//
+// For value type constraints:
+//
+// - The `const OPCODE_CONSTRAINTS : [OpcodeConstraints; N]` table.
+// - The `const TYPE_SETS : [ValueTypeSet; N]` table.
+// - The `const OPERAND_CONSTRAINTS : [OperandConstraint; N]` table.
+//
+include!(concat!(env!("OUT_DIR"), "/opcodes.rs"));
+
+impl Display for Opcode {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "{}", opcode_name(*self))
+    }
+}
+
+impl Opcode {
+    /// Get the instruction format for this opcode.
+    pub fn format(self) -> InstructionFormat {
+        OPCODE_FORMAT[self as usize - 1]
+    }
+
+    /// Get the constraint descriptor for this opcode.
+    /// Panic if this is called on `NotAnOpcode`.
+    pub fn constraints(self) -> OpcodeConstraints {
+        OPCODE_CONSTRAINTS[self as usize - 1]
+    }
+}
+
+// This trait really belongs in lib/reader where it is used by the `.cton` file parser, but since
+// it critically depends on the `opcode_name()` function which is needed here anyway, it lives in
+// this module. This also saves us from running the build script twice to generate code for the two
+// separate crates.
+impl FromStr for Opcode {
+    type Err = &'static str;
+
+    /// Parse an Opcode name from a string.
+    fn from_str(s: &str) -> Result<Opcode, &'static str> {
+        use constant_hash::{probe, simple_hash, Table};
+
+        impl<'a> Table<&'a str> for [Option<Opcode>] {
+            fn len(&self) -> usize {
+                self.len()
+            }
+
+            fn key(&self, idx: usize) -> Option<&'a str> {
+                self[idx].map(opcode_name)
+            }
+        }
+
+        match probe::<&str, [Option<Opcode>]>(&OPCODE_HASH_TABLE, s, simple_hash(s)) {
+            Err(_) => Err("Unknown opcode"),
+            // We unwrap here because probe() should have ensured that the entry
+            // at this index is not None.
+            Ok(i) => Ok(OPCODE_HASH_TABLE[i].unwrap()),
+        }
+    }
+}
+
+/// A variable list of `Value` operands used for function call arguments and passing arguments to
+/// basic blocks.
+#[derive(Clone, Debug)]
+pub struct VariableArgs(Vec<Value>);
+
+impl VariableArgs {
+    /// Create an empty argument list.
+    pub fn new() -> Self {
+        VariableArgs(Vec::new())
+    }
+
+    /// Add an argument to the end.
+    pub fn push(&mut self, v: Value) {
+        self.0.push(v)
+    }
+
+    /// Check if the list is empty.
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    /// Convert this to a value list in `pool` with `fixed` prepended.
+    pub fn into_value_list(self, fixed: &[Value], pool: &mut ValueListPool) -> ValueList {
+        let mut vlist = ValueList::default();
+        vlist.extend(fixed.iter().cloned(), pool);
+        vlist.extend(self.0, pool);
+        vlist
+    }
+}
+
+// Coerce `VariableArgs` into a `&[Value]` slice.
+impl Deref for VariableArgs {
+    type Target = [Value];
+
+    fn deref(&self) -> &[Value] {
+        &self.0
+    }
+}
+
+impl DerefMut for VariableArgs {
+    fn deref_mut(&mut self) -> &mut [Value] {
+        &mut self.0
+    }
+}
+
+impl Display for VariableArgs {
+    fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
+        for (i, val) in self.0.iter().enumerate() {
+            if i == 0 {
+                write!(fmt, "{}", val)?;
+            } else {
+                write!(fmt, ", {}", val)?;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl Default for VariableArgs {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Analyzing an instruction.
+///
+/// Avoid large matches on instruction formats by using the methods defined here to examine
+/// instructions.
+impl InstructionData {
+    /// Return information about the destination of a branch or jump instruction.
+    ///
+    /// Any instruction that can transfer control to another EBB reveals its possible destinations
+    /// here.
+    pub fn analyze_branch<'a>(&'a self, pool: &'a ValueListPool) -> BranchInfo<'a> {
+        match *self {
+            InstructionData::Jump {
+                destination,
+                ref args,
+                ..
+            } => BranchInfo::SingleDest(destination, args.as_slice(pool)),
+            InstructionData::BranchInt {
+                destination,
+                ref args,
+                ..
+            } |
+            InstructionData::BranchFloat {
+                destination,
+                ref args,
+                ..
+            } |
+            InstructionData::Branch {
+                destination,
+                ref args,
+                ..
+            } => BranchInfo::SingleDest(destination, &args.as_slice(pool)[1..]),
+            InstructionData::BranchIcmp {
+                destination,
+                ref args,
+                ..
+            } => BranchInfo::SingleDest(destination, &args.as_slice(pool)[2..]),
+            InstructionData::BranchTable { table, .. } => BranchInfo::Table(table),
+            _ => {
+                debug_assert!(!self.opcode().is_branch());
+                BranchInfo::NotABranch
+            }
+        }
+    }
+
+    /// Get the single destination of this branch instruction, if it is a single destination
+    /// branch or jump.
+    ///
+    /// Multi-destination branches like `br_table` return `None`.
+    pub fn branch_destination(&self) -> Option<Ebb> {
+        match *self {
+            InstructionData::Jump { destination, .. } |
+            InstructionData::Branch { destination, .. } |
+            InstructionData::BranchInt { destination, .. } |
+            InstructionData::BranchFloat { destination, .. } |
+            InstructionData::BranchIcmp { destination, .. } => Some(destination),
+            InstructionData::BranchTable { .. } => None,
+            _ => {
+                debug_assert!(!self.opcode().is_branch());
+                None
+            }
+        }
+    }
+
+    /// Get a mutable reference to the single destination of this branch instruction, if it is a
+    /// single destination branch or jump.
+    ///
+    /// Multi-destination branches like `br_table` return `None`.
+    pub fn branch_destination_mut(&mut self) -> Option<&mut Ebb> {
+        match *self {
+            InstructionData::Jump { ref mut destination, .. } |
+            InstructionData::Branch { ref mut destination, .. } |
+            InstructionData::BranchInt { ref mut destination, .. } |
+            InstructionData::BranchFloat { ref mut destination, .. } |
+            InstructionData::BranchIcmp { ref mut destination, .. } => Some(destination),
+            InstructionData::BranchTable { .. } => None,
+            _ => {
+                debug_assert!(!self.opcode().is_branch());
+                None
+            }
+        }
+    }
+
+    /// Return information about a call instruction.
+    ///
+    /// Any instruction that can call another function reveals its call signature here.
+    pub fn analyze_call<'a>(&'a self, pool: &'a ValueListPool) -> CallInfo<'a> {
+        match *self {
+            InstructionData::Call { func_ref, ref args, .. } => {
+                CallInfo::Direct(func_ref, args.as_slice(pool))
+            }
+            InstructionData::CallIndirect { sig_ref, ref args, .. } => {
+                CallInfo::Indirect(sig_ref, &args.as_slice(pool)[1..])
+            }
+            _ => {
+                debug_assert!(!self.opcode().is_call());
+                CallInfo::NotACall
+            }
+        }
+    }
+}
+
+/// Information about branch and jump instructions.
+pub enum BranchInfo<'a> {
+    /// This is not a branch or jump instruction.
+    /// This instruction will not transfer control to another EBB in the function, but it may still
+    /// affect control flow by returning or trapping.
+    NotABranch,
+
+    /// This is a branch or jump to a single destination EBB, possibly taking value arguments.
+    SingleDest(Ebb, &'a [Value]),
+
+    /// This is a jump table branch which can have many destination EBBs.
+    Table(JumpTable),
+}
+
+/// Information about call instructions.
+pub enum CallInfo<'a> {
+    /// This is not a call instruction.
+    NotACall,
+
+    /// This is a direct call to an external function declared in the preamble. See
+    /// `DataFlowGraph.ext_funcs`.
+    Direct(FuncRef, &'a [Value]),
+
+    /// This is an indirect call with the specified signature. See `DataFlowGraph.signatures`.
+    Indirect(SigRef, &'a [Value]),
+}
+
+/// Value type constraints for a given opcode.
+///
+/// The `InstructionFormat` determines the constraints on most operands, but `Value` operands and
+/// results are not determined by the format. Every `Opcode` has an associated
+/// `OpcodeConstraints` object that provides the missing details.
+#[derive(Clone, Copy)]
+pub struct OpcodeConstraints {
+    /// Flags for this opcode encoded as a bit field:
+    ///
+    /// Bits 0-2:
+    ///     Number of fixed result values. This does not include `variable_args` results as are
+    ///     produced by call instructions.
+    ///
+    /// Bit 3:
+    ///     This opcode is polymorphic and the controlling type variable can be inferred from the
+    ///     designated input operand. This is the `typevar_operand` index given to the
+    ///     `InstructionFormat` meta language object. When this bit is not set, the controlling
+    ///     type variable must be the first output value instead.
+    ///
+    /// Bit 4:
+    ///     This opcode is polymorphic and the controlling type variable does *not* appear as the
+    ///     first result type.
+    ///
+    /// Bits 5-7:
+    ///     Number of fixed value arguments. The minimum required number of value operands.
+    flags: u8,
+
+    /// Permitted set of types for the controlling type variable as an index into `TYPE_SETS`.
+    typeset_offset: u8,
+
+    /// Offset into `OPERAND_CONSTRAINT` table of the descriptors for this opcode. The first
+    /// `fixed_results()` entries describe the result constraints, then follows constraints for the
+    /// fixed `Value` input operands. (`fixed_value_arguments()` of them).
+    constraint_offset: u16,
+}
+
+impl OpcodeConstraints {
+    /// Can the controlling type variable for this opcode be inferred from the designated value
+    /// input operand?
+    /// This also implies that this opcode is polymorphic.
+    pub fn use_typevar_operand(self) -> bool {
+        (self.flags & 0x8) != 0
+    }
+
+    /// Is it necessary to look at the designated value input operand in order to determine the
+    /// controlling type variable, or is it good enough to use the first return type?
+    ///
+    /// Most polymorphic instructions produce a single result with the type of the controlling type
+    /// variable. A few polymorphic instructions either don't produce any results, or produce
+    /// results with a fixed type. These instructions return `true`.
+    pub fn requires_typevar_operand(self) -> bool {
+        (self.flags & 0x10) != 0
+    }
+
+    /// Get the number of *fixed* result values produced by this opcode.
+    /// This does not include `variable_args` produced by calls.
+    pub fn fixed_results(self) -> usize {
+        (self.flags & 0x7) as usize
+    }
+
+    /// Get the number of *fixed* input values required by this opcode.
+    ///
+    /// This does not include `variable_args` arguments on call and branch instructions.
+    ///
+    /// The number of fixed input values is usually implied by the instruction format, but
+    /// instruction formats that use a `ValueList` put both fixed and variable arguments in the
+    /// list. This method returns the *minimum* number of values required in the value list.
+    pub fn fixed_value_arguments(self) -> usize {
+        ((self.flags >> 5) & 0x7) as usize
+    }
+
+    /// Get the offset into `TYPE_SETS` for the controlling type variable.
+    /// Returns `None` if the instruction is not polymorphic.
+    fn typeset_offset(self) -> Option<usize> {
+        let offset = usize::from(self.typeset_offset);
+        if offset < TYPE_SETS.len() {
+            Some(offset)
+        } else {
+            None
+        }
+    }
+
+    /// Get the offset into OPERAND_CONSTRAINTS where the descriptors for this opcode begin.
+    fn constraint_offset(self) -> usize {
+        self.constraint_offset as usize
+    }
+
+    /// Get the value type of result number `n`, having resolved the controlling type variable to
+    /// `ctrl_type`.
+    pub fn result_type(self, n: usize, ctrl_type: Type) -> Type {
+        debug_assert!(n < self.fixed_results(), "Invalid result index");
+        if let ResolvedConstraint::Bound(t) =
+            OPERAND_CONSTRAINTS[self.constraint_offset() + n].resolve(ctrl_type)
+        {
+            t
+        } else {
+            panic!("Result constraints can't be free");
+        }
+    }
+
+    /// Get the value type of input value number `n`, having resolved the controlling type variable
+    /// to `ctrl_type`.
+    ///
+    /// Unlike results, it is possible for some input values to vary freely within a specific
+    /// `ValueTypeSet`. This is represented with the `ArgumentConstraint::Free` variant.
+    pub fn value_argument_constraint(self, n: usize, ctrl_type: Type) -> ResolvedConstraint {
+        debug_assert!(
+            n < self.fixed_value_arguments(),
+            "Invalid value argument index"
+        );
+        let offset = self.constraint_offset() + self.fixed_results();
+        OPERAND_CONSTRAINTS[offset + n].resolve(ctrl_type)
+    }
+
+    /// Get the typeset of allowed types for the controlling type variable in a polymorphic
+    /// instruction.
+    pub fn ctrl_typeset(self) -> Option<ValueTypeSet> {
+        self.typeset_offset().map(|offset| TYPE_SETS[offset])
+    }
+
+    /// Is this instruction polymorphic?
+    pub fn is_polymorphic(self) -> bool {
+        self.ctrl_typeset().is_some()
+    }
+}
+
+type BitSet8 = BitSet<u8>;
+type BitSet16 = BitSet<u16>;
+
+/// A value type set describes the permitted set of types for a type variable.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct ValueTypeSet {
+    /// Allowed lane sizes
+    pub lanes: BitSet16,
+    /// Allowed int widths
+    pub ints: BitSet8,
+    /// Allowed float widths
+    pub floats: BitSet8,
+    /// Allowed bool widths
+    pub bools: BitSet8,
+}
+
+impl ValueTypeSet {
+    /// Is `scalar` part of the base type set?
+    ///
+    /// Note that the base type set does not have to be included in the type set proper.
+    fn is_base_type(&self, scalar: Type) -> bool {
+        let l2b = scalar.log2_lane_bits();
+        if scalar.is_int() {
+            self.ints.contains(l2b)
+        } else if scalar.is_float() {
+            self.floats.contains(l2b)
+        } else if scalar.is_bool() {
+            self.bools.contains(l2b)
+        } else {
+            false
+        }
+    }
+
+    /// Does `typ` belong to this set?
+    pub fn contains(&self, typ: Type) -> bool {
+        let l2l = typ.log2_lane_count();
+        self.lanes.contains(l2l) && self.is_base_type(typ.lane_type())
+    }
+
+    /// Get an example member of this type set.
+    ///
+    /// This is used for error messages to avoid suggesting invalid types.
+    pub fn example(&self) -> Type {
+        let t = if self.ints.max().unwrap_or(0) > 5 {
+            types::I32
+        } else if self.floats.max().unwrap_or(0) > 5 {
+            types::F32
+        } else if self.bools.max().unwrap_or(0) > 5 {
+            types::B32
+        } else {
+            types::B1
+        };
+        t.by(1 << self.lanes.min().unwrap()).unwrap()
+    }
+}
+
+/// Operand constraints. This describes the value type constraints on a single `Value` operand.
+enum OperandConstraint {
+    /// This operand has a concrete value type.
+    Concrete(Type),
+
+    /// This operand can vary freely within the given type set.
+    /// The type set is identified by its index into the TYPE_SETS constant table.
+    Free(u8),
+
+    /// This operand is the same type as the controlling type variable.
+    Same,
+
+    /// This operand is `ctrlType.lane_type()`.
+    LaneOf,
+
+    /// This operand is `ctrlType.as_bool()`.
+    AsBool,
+
+    /// This operand is `ctrlType.half_width()`.
+    HalfWidth,
+
+    /// This operand is `ctrlType.double_width()`.
+    DoubleWidth,
+
+    /// This operand is `ctrlType.half_vector()`.
+    HalfVector,
+
+    /// This operand is `ctrlType.double_vector()`.
+    DoubleVector,
+}
+
+impl OperandConstraint {
+    /// Resolve this operand constraint into a concrete value type, given the value of the
+    /// controlling type variable.
+    pub fn resolve(&self, ctrl_type: Type) -> ResolvedConstraint {
+        use self::OperandConstraint::*;
+        use self::ResolvedConstraint::Bound;
+        match *self {
+            Concrete(t) => Bound(t),
+            Free(vts) => ResolvedConstraint::Free(TYPE_SETS[vts as usize]),
+            Same => Bound(ctrl_type),
+            LaneOf => Bound(ctrl_type.lane_type()),
+            AsBool => Bound(ctrl_type.as_bool()),
+            HalfWidth => Bound(ctrl_type.half_width().expect("invalid type for half_width")),
+            DoubleWidth => Bound(ctrl_type.double_width().expect(
+                "invalid type for double_width",
+            )),
+            HalfVector => Bound(ctrl_type.half_vector().expect(
+                "invalid type for half_vector",
+            )),
+            DoubleVector => Bound(ctrl_type.by(2).expect("invalid type for double_vector")),
+        }
+    }
+}
+
+/// The type constraint on a value argument once the controlling type variable is known.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum ResolvedConstraint {
+    /// The operand is bound to a known type.
+    Bound(Type),
+    /// The operand type can vary freely within the given set.
+    Free(ValueTypeSet),
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::string::ToString;
+
+    #[test]
+    fn opcodes() {
+        use std::mem;
+
+        let x = Opcode::Iadd;
+        let mut y = Opcode::Isub;
+
+        assert!(x != y);
+        y = Opcode::Iadd;
+        assert_eq!(x, y);
+        assert_eq!(x.format(), InstructionFormat::Binary);
+
+        assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm");
+        assert_eq!(Opcode::IaddImm.to_string(), "iadd_imm");
+
+        // Check the matcher.
+        assert_eq!("iadd".parse::<Opcode>(), Ok(Opcode::Iadd));
+        assert_eq!("iadd_imm".parse::<Opcode>(), Ok(Opcode::IaddImm));
+        assert_eq!("iadd\0".parse::<Opcode>(), Err("Unknown opcode"));
+        assert_eq!("".parse::<Opcode>(), Err("Unknown opcode"));
+        assert_eq!("\0".parse::<Opcode>(), Err("Unknown opcode"));
+
+        // Opcode is a single byte, and because Option<Opcode> originally came to 2 bytes, early on
+        // Opcode included a variant NotAnOpcode to avoid the unnecessary bloat. Since then the Rust
+        // compiler has brought in NonZero optimization, meaning that an enum not using the 0 value
+        // can be optional for no size cost. We want to ensure Option<Opcode> remains small.
+        assert_eq!(mem::size_of::<Opcode>(), mem::size_of::<Option<Opcode>>());
+    }
+
+    #[test]
+    fn instruction_data() {
+        use std::mem;
+        // The size of the `InstructionData` enum is important for performance. It should not
+        // exceed 16 bytes. Use `Box<FooData>` out-of-line payloads for instruction formats that
+        // require more space than that. It would be fine with a data structure smaller than 16
+        // bytes, but what are the odds of that?
+        assert_eq!(mem::size_of::<InstructionData>(), 16);
+    }
+
+    #[test]
+    fn constraints() {
+        let a = Opcode::Iadd.constraints();
+        assert!(a.use_typevar_operand());
+        assert!(!a.requires_typevar_operand());
+        assert_eq!(a.fixed_results(), 1);
+        assert_eq!(a.fixed_value_arguments(), 2);
+        assert_eq!(a.result_type(0, types::I32), types::I32);
+        assert_eq!(a.result_type(0, types::I8), types::I8);
+        assert_eq!(
+            a.value_argument_constraint(0, types::I32),
+            ResolvedConstraint::Bound(types::I32)
+        );
+        assert_eq!(
+            a.value_argument_constraint(1, types::I32),
+            ResolvedConstraint::Bound(types::I32)
+        );
+
+        let b = Opcode::Bitcast.constraints();
+        assert!(!b.use_typevar_operand());
+        assert!(!b.requires_typevar_operand());
+        assert_eq!(b.fixed_results(), 1);
+        assert_eq!(b.fixed_value_arguments(), 1);
+        assert_eq!(b.result_type(0, types::I32), types::I32);
+        assert_eq!(b.result_type(0, types::I8), types::I8);
+        match b.value_argument_constraint(0, types::I32) {
+            ResolvedConstraint::Free(vts) => assert!(vts.contains(types::F32)),
+            _ => panic!("Unexpected constraint from value_argument_constraint"),
+        }
+
+        let c = Opcode::Call.constraints();
+        assert_eq!(c.fixed_results(), 0);
+        assert_eq!(c.fixed_value_arguments(), 0);
+
+        let i = Opcode::CallIndirect.constraints();
+        assert_eq!(i.fixed_results(), 0);
+        assert_eq!(i.fixed_value_arguments(), 1);
+
+        let cmp = Opcode::Icmp.constraints();
+        assert!(cmp.use_typevar_operand());
+        assert!(cmp.requires_typevar_operand());
+        assert_eq!(cmp.fixed_results(), 1);
+        assert_eq!(cmp.fixed_value_arguments(), 2);
+    }
+
+    #[test]
+    fn value_set() {
+        use ir::types::*;
+
+        let vts = ValueTypeSet {
+            lanes: BitSet16::from_range(0, 8),
+            ints: BitSet8::from_range(4, 7),
+            floats: BitSet8::from_range(0, 0),
+            bools: BitSet8::from_range(3, 7),
+        };
+        assert!(!vts.contains(I8));
+        assert!(vts.contains(I32));
+        assert!(vts.contains(I64));
+        assert!(vts.contains(I32X4));
+        assert!(!vts.contains(F32));
+        assert!(!vts.contains(B1));
+        assert!(vts.contains(B8));
+        assert!(vts.contains(B64));
+        assert_eq!(vts.example().to_string(), "i32");
+
+        let vts = ValueTypeSet {
+            lanes: BitSet16::from_range(0, 8),
+            ints: BitSet8::from_range(0, 0),
+            floats: BitSet8::from_range(5, 7),
+            bools: BitSet8::from_range(3, 7),
+        };
+        assert_eq!(vts.example().to_string(), "f32");
+
+        let vts = ValueTypeSet {
+            lanes: BitSet16::from_range(1, 8),
+            ints: BitSet8::from_range(0, 0),
+            floats: BitSet8::from_range(5, 7),
+            bools: BitSet8::from_range(3, 7),
+        };
+        assert_eq!(vts.example().to_string(), "f32x2");
+
+        let vts = ValueTypeSet {
+            lanes: BitSet16::from_range(2, 8),
+            ints: BitSet8::from_range(0, 0),
+            floats: BitSet8::from_range(0, 0),
+            bools: BitSet8::from_range(3, 7),
+        };
+        assert!(!vts.contains(B32X2));
+        assert!(vts.contains(B32X4));
+        assert_eq!(vts.example().to_string(), "b32x4");
+
+        let vts = ValueTypeSet {
+            // TypeSet(lanes=(1, 256), ints=(8, 64))
+            lanes: BitSet16::from_range(0, 9),
+            ints: BitSet8::from_range(3, 7),
+            floats: BitSet8::from_range(0, 0),
+            bools: BitSet8::from_range(0, 0),
+        };
+        assert!(vts.contains(I32));
+        assert!(vts.contains(I32X4));
+    }
+}
--- a/lib/codegen/src/ir/jumptable.rs
+++ b/lib/codegen/src/ir/jumptable.rs
@@ -0,0 +1,180 @@
+//! Jump table representation.
+//!
+//! Jump tables are declared in the preamble and assigned an `ir::entities::JumpTable` reference.
+//! The actual table of destinations is stored in a `JumpTableData` struct defined in this module.
+
+use ir::entities::Ebb;
+use packed_option::PackedOption;
+use std::fmt::{self, Display, Formatter};
+use std::iter;
+use std::slice;
+use std::vec::Vec;
+
+/// Contents of a jump table.
+///
+/// All jump tables use 0-based indexing and are expected to be densely populated. They don't need
+/// to be completely populated, though. Individual entries can be missing.
+#[derive(Clone)]
+pub struct JumpTableData {
+    // Table entries, using `None` as a placeholder for missing entries.
+    table: Vec<PackedOption<Ebb>>,
+
+    // How many `None` holes in table?
+    holes: usize,
+}
+
+impl JumpTableData {
+    /// Create a new empty jump table.
+    pub fn new() -> Self {
+        Self {
+            table: Vec::new(),
+            holes: 0,
+        }
+    }
+
+    /// Create a new empty jump table with the specified capacity.
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            table: Vec::with_capacity(capacity),
+            holes: 0,
+        }
+    }
+
+    /// Get the number of table entries.
+    pub fn len(&self) -> usize {
+        self.table.len()
+    }
+
+    /// Set a table entry.
+    ///
+    /// The table will grow as needed to fit `idx`.
+    pub fn set_entry(&mut self, idx: usize, dest: Ebb) {
+        // Resize table to fit `idx`.
+        if idx >= self.table.len() {
+            self.holes += idx - self.table.len();
+            self.table.resize(idx + 1, None.into());
+        } else if self.table[idx].is_none() {
+            // We're filling in an existing hole.
+            self.holes -= 1;
+        }
+        self.table[idx] = dest.into();
+    }
+
+    /// Append a table entry.
+    pub fn push_entry(&mut self, dest: Ebb) {
+        self.table.push(dest.into())
+    }
+
+    /// Clear a table entry.
+    ///
+    /// The `br_table` instruction will fall through if given an index corresponding to a cleared
+    /// table entry.
+    pub fn clear_entry(&mut self, idx: usize) {
+        if idx < self.table.len() && self.table[idx].is_some() {
+            self.holes += 1;
+            self.table[idx] = None.into();
+        }
+    }
+
+    /// Get the entry for `idx`, or `None`.
+    pub fn get_entry(&self, idx: usize) -> Option<Ebb> {
+        self.table.get(idx).and_then(|e| e.expand())
+    }
+
+    /// Enumerate over all `(idx, dest)` pairs in the table in order.
+    ///
+    /// This returns an iterator that skips any empty slots in the table.
+    pub fn entries(&self) -> Entries {
+        Entries(self.table.iter().cloned().enumerate())
+    }
+
+    /// Checks if any of the entries branch to `ebb`.
+    pub fn branches_to(&self, ebb: Ebb) -> bool {
+        self.table.iter().any(|target_ebb| {
+            target_ebb.expand() == Some(ebb)
+        })
+    }
+
+    /// Access the whole table as a mutable slice.
+    pub fn as_mut_slice(&mut self) -> &mut [PackedOption<Ebb>] {
+        self.table.as_mut_slice()
+    }
+}
+
+/// Enumerate `(idx, dest)` pairs in order.
+pub struct Entries<'a>(iter::Enumerate<iter::Cloned<slice::Iter<'a, PackedOption<Ebb>>>>);
+
+impl<'a> Iterator for Entries<'a> {
+    type Item = (usize, Ebb);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            if let Some((idx, dest)) = self.0.next() {
+                if let Some(ebb) = dest.expand() {
+                    return Some((idx, ebb));
+                }
+            } else {
+                return None;
+            }
+        }
+    }
+}
+
+impl Display for JumpTableData {
+    fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
+        match self.table.first().and_then(|e| e.expand()) {
+            None => write!(fmt, "jump_table 0")?,
+            Some(first) => write!(fmt, "jump_table {}", first)?,
+        }
+
+        for dest in self.table.iter().skip(1).map(|e| e.expand()) {
+            match dest {
+                None => write!(fmt, ", 0")?,
+                Some(ebb) => write!(fmt, ", {}", ebb)?,
+            }
+        }
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::JumpTableData;
+    use entity::EntityRef;
+    use ir::Ebb;
+    use std::string::ToString;
+    use std::vec::Vec;
+
+    #[test]
+    fn empty() {
+        let jt = JumpTableData::new();
+
+        assert_eq!(jt.get_entry(0), None);
+        assert_eq!(jt.get_entry(10), None);
+
+        assert_eq!(jt.to_string(), "jump_table 0");
+
+        let v: Vec<(usize, Ebb)> = jt.entries().collect();
+        assert_eq!(v, []);
+    }
+
+    #[test]
+    fn insert() {
+        let e1 = Ebb::new(1);
+        let e2 = Ebb::new(2);
+
+        let mut jt = JumpTableData::new();
+
+        jt.set_entry(0, e1);
+        jt.set_entry(0, e2);
+        jt.set_entry(10, e1);
+
+        assert_eq!(
+            jt.to_string(),
+            "jump_table ebb2, 0, 0, 0, 0, 0, 0, 0, 0, 0, ebb1"
+        );
+
+        let v: Vec<(usize, Ebb)> = jt.entries().collect();
+        assert_eq!(v, [(0, e2), (10, e1)]);
+    }
+}
--- a/lib/codegen/src/ir/layout.rs
+++ b/lib/codegen/src/ir/layout.rs
--- a/lib/codegen/src/ir/libcall.rs
+++ b/lib/codegen/src/ir/libcall.rs
@@ -0,0 +1,115 @@
+//! Naming well-known routines in the runtime library.
+
+use ir::{types, Opcode, Type};
+use std::fmt;
+use std::str::FromStr;
+
+/// The name of a runtime library routine.
+///
+/// Runtime library calls are generated for Cretonne IR instructions that don't have an equivalent
+/// ISA instruction or an easy macro expansion. A `LibCall` is used as a well-known name to refer to
+/// the runtime library routine. This way, Cretonne doesn't have to know about the naming
+/// convention in the embedding VM's runtime library.
+///
+/// This list is likely to grow over time.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum LibCall {
+    /// ceil.f32
+    CeilF32,
+    /// ceil.f64
+    CeilF64,
+    /// floor.f32
+    FloorF32,
+    /// floor.f64
+    FloorF64,
+    /// trunc.f32
+    TruncF32,
+    /// frunc.f64
+    TruncF64,
+    /// nearest.f32
+    NearestF32,
+    /// nearest.f64
+    NearestF64,
+}
+
+const NAME: [&str; 8] = [
+    "CeilF32",
+    "CeilF64",
+    "FloorF32",
+    "FloorF64",
+    "TruncF32",
+    "TruncF64",
+    "NearestF32",
+    "NearestF64",
+];
+
+impl fmt::Display for LibCall {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(NAME[*self as usize])
+    }
+}
+
+impl FromStr for LibCall {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "CeilF32" => Ok(LibCall::CeilF32),
+            "CeilF64" => Ok(LibCall::CeilF64),
+            "FloorF32" => Ok(LibCall::FloorF32),
+            "FloorF64" => Ok(LibCall::FloorF64),
+            "TruncF32" => Ok(LibCall::TruncF32),
+            "TruncF64" => Ok(LibCall::TruncF64),
+            "NearestF32" => Ok(LibCall::NearestF32),
+            "NearestF64" => Ok(LibCall::NearestF64),
+            _ => Err(()),
+        }
+    }
+}
+
+impl LibCall {
+    /// Get the well-known library call name to use as a replacement for an instruction with the
+    /// given opcode and controlling type variable.
+    ///
+    /// Returns `None` if no well-known library routine name exists for that instruction.
+    pub fn for_inst(opcode: Opcode, ctrl_type: Type) -> Option<LibCall> {
+        Some(match ctrl_type {
+            types::F32 => {
+                match opcode {
+                    Opcode::Ceil => LibCall::CeilF32,
+                    Opcode::Floor => LibCall::FloorF32,
+                    Opcode::Trunc => LibCall::TruncF32,
+                    Opcode::Nearest => LibCall::NearestF32,
+                    _ => return None,
+                }
+            }
+            types::F64 => {
+                match opcode {
+                    Opcode::Ceil => LibCall::CeilF64,
+                    Opcode::Floor => LibCall::FloorF64,
+                    Opcode::Trunc => LibCall::TruncF64,
+                    Opcode::Nearest => LibCall::NearestF64,
+                    _ => return None,
+                }
+            }
+            _ => return None,
+        })
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use std::string::ToString;
+
+    #[test]
+    fn display() {
+        assert_eq!(LibCall::CeilF32.to_string(), "CeilF32");
+        assert_eq!(LibCall::NearestF64.to_string(), "NearestF64");
+    }
+
+    #[test]
+    fn parsing() {
+        assert_eq!("FloorF32".parse(), Ok(LibCall::FloorF32));
+    }
+}
--- a/lib/codegen/src/ir/memflags.rs
+++ b/lib/codegen/src/ir/memflags.rs
@@ -0,0 +1,93 @@
+//! Memory operation flags.
+
+use std::fmt;
+
+enum FlagBit {
+    Notrap,
+    Aligned,
+}
+
+const NAMES: [&str; 2] = ["notrap", "aligned"];
+
+/// Flags for memory operations like load/store.
+///
+/// Each of these flags introduce a limited form of undefined behavior. The flags each enable
+/// certain optimizations that need to make additional assumptions. Generally, the semantics of a
+/// program does not change when a flag is removed, but adding a flag will.
+#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
+pub struct MemFlags {
+    bits: u8,
+}
+
+impl MemFlags {
+    /// Create a new empty set of flags.
+    pub fn new() -> Self {
+        Self { bits: 0 }
+    }
+
+    /// Read a flag bit.
+    fn read(self, bit: FlagBit) -> bool {
+        self.bits & (1 << bit as usize) != 0
+    }
+
+    /// Set a flag bit.
+    fn set(&mut self, bit: FlagBit) {
+        self.bits |= 1 << bit as usize
+    }
+
+    /// Set a flag bit by name.
+    ///
+    /// Returns true if the flag was found and set, false for an unknown flag name.
+    pub fn set_by_name(&mut self, name: &str) -> bool {
+        match NAMES.iter().position(|&s| s == name) {
+            Some(bit) => {
+                self.bits |= 1 << bit;
+                true
+            }
+            None => false,
+        }
+    }
+
+    /// Test if the `notrap` flag is set.
+    ///
+    /// Normally, trapping is part of the semantics of a load/store operation. If the platform
+    /// would cause a trap when accessing the effective address, the Cretonne memory operation is
+    /// also required to trap.
+    ///
+    /// The `notrap` flag tells Cretonne that the memory is *accessible*, which means that
+    /// accesses will not trap. This makes it possible to delete an unused load or a dead store
+    /// instruction.
+    pub fn notrap(self) -> bool {
+        self.read(FlagBit::Notrap)
+    }
+
+    /// Set the `notrap` flag.
+    pub fn set_notrap(&mut self) {
+        self.set(FlagBit::Notrap)
+    }
+
+    /// Test if the `aligned` flag is set.
+    ///
+    /// By default, Cretonne memory instructions work with any unaligned effective address. If the
+    /// `aligned` flag is set, the instruction is permitted to trap or return a wrong result if the
+    /// effective address is misaligned.
+    pub fn aligned(self) -> bool {
+        self.read(FlagBit::Aligned)
+    }
+
+    /// Set the `aligned` flag.
+    pub fn set_aligned(&mut self) {
+        self.set(FlagBit::Aligned)
+    }
+}
+
+impl fmt::Display for MemFlags {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        for (i, n) in NAMES.iter().enumerate() {
+            if self.bits & (1 << i) != 0 {
+                write!(f, " {}", n)?;
+            }
+        }
+        Ok(())
+    }
+}
--- a/lib/codegen/src/ir/mod.rs
+++ b/lib/codegen/src/ir/mod.rs
@@ -0,0 +1,63 @@
+//! Representation of Cretonne IR functions.
+
+mod builder;
+pub mod condcodes;
+pub mod dfg;
+pub mod entities;
+mod extfunc;
+mod extname;
+pub mod function;
+mod globalvar;
+mod heap;
+pub mod immediates;
+pub mod instructions;
+pub mod jumptable;
+pub mod layout;
+mod libcall;
+mod memflags;
+mod progpoint;
+mod sourceloc;
+pub mod stackslot;
+mod trapcode;
+pub mod types;
+mod valueloc;
+
+pub use ir::builder::{InsertBuilder, InstBuilder, InstBuilderBase, InstInserterBase};
+pub use ir::dfg::{DataFlowGraph, ValueDef};
+pub use ir::entities::{Ebb, FuncRef, GlobalVar, Heap, Inst, JumpTable, SigRef, StackSlot, Value};
+pub use ir::extfunc::{AbiParam, ArgumentExtension, ArgumentPurpose, CallConv, ExtFuncData,
+                      Signature};
+pub use ir::extname::ExternalName;
+pub use ir::function::Function;
+pub use ir::globalvar::GlobalVarData;
+pub use ir::heap::{HeapBase, HeapData, HeapStyle};
+pub use ir::instructions::{InstructionData, Opcode, ValueList, ValueListPool, VariableArgs};
+pub use ir::jumptable::JumpTableData;
+pub use ir::layout::Layout;
+pub use ir::libcall::LibCall;
+pub use ir::memflags::MemFlags;
+pub use ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint};
+pub use ir::sourceloc::SourceLoc;
+pub use ir::stackslot::{StackSlotData, StackSlotKind, StackSlots};
+pub use ir::trapcode::TrapCode;
+pub use ir::types::Type;
+pub use ir::valueloc::{ArgumentLoc, ValueLoc};
+
+use binemit;
+use entity::{EntityMap, PrimaryMap};
+use isa;
+
+/// Map of value locations.
+pub type ValueLocations = EntityMap<Value, ValueLoc>;
+
+/// Map of jump tables.
+pub type JumpTables = PrimaryMap<JumpTable, JumpTableData>;
+
+/// Map of instruction encodings.
+pub type InstEncodings = EntityMap<Inst, isa::Encoding>;
+
+/// Code offsets for EBBs.
+pub type EbbOffsets = EntityMap<Ebb, binemit::CodeOffset>;
+
+/// Source locations for instructions.
+pub type SourceLocs = EntityMap<Inst, SourceLoc>;
--- a/lib/codegen/src/ir/progpoint.rs
+++ b/lib/codegen/src/ir/progpoint.rs
@@ -0,0 +1,164 @@
+//! Program points.
+
+use entity::EntityRef;
+use ir::{Ebb, Inst, ValueDef};
+use std::cmp;
+use std::fmt;
+use std::u32;
+
+/// A `ProgramPoint` represents a position in a function where the live range of an SSA value can
+/// begin or end. It can be either:
+///
+/// 1. An instruction or
+/// 2. An EBB header.
+///
+/// This corresponds more or less to the lines in the textual form of Cretonne IR.
+#[derive(PartialEq, Eq, Clone, Copy)]
+pub struct ProgramPoint(u32);
+
+impl From<Inst> for ProgramPoint {
+    fn from(inst: Inst) -> ProgramPoint {
+        let idx = inst.index();
+        debug_assert!(idx < (u32::MAX / 2) as usize);
+        ProgramPoint((idx * 2) as u32)
+    }
+}
+
+impl From<Ebb> for ProgramPoint {
+    fn from(ebb: Ebb) -> ProgramPoint {
+        let idx = ebb.index();
+        debug_assert!(idx < (u32::MAX / 2) as usize);
+        ProgramPoint((idx * 2 + 1) as u32)
+    }
+}
+
+impl From<ValueDef> for ProgramPoint {
+    fn from(def: ValueDef) -> ProgramPoint {
+        match def {
+            ValueDef::Result(inst, _) => inst.into(),
+            ValueDef::Param(ebb, _) => ebb.into(),
+        }
+    }
+}
+
+/// An expanded program point directly exposes the variants, but takes twice the space to
+/// represent.
+#[derive(PartialEq, Eq, Clone, Copy)]
+pub enum ExpandedProgramPoint {
+    /// An instruction in the function.
+    Inst(Inst),
+    /// An EBB header.
+    Ebb(Ebb),
+}
+
+impl ExpandedProgramPoint {
+    /// Get the instruction we know is inside.
+    pub fn unwrap_inst(self) -> Inst {
+        match self {
+            ExpandedProgramPoint::Inst(x) => x,
+            ExpandedProgramPoint::Ebb(x) => panic!("expected inst: {}", x),
+        }
+    }
+}
+
+impl From<Inst> for ExpandedProgramPoint {
+    fn from(inst: Inst) -> ExpandedProgramPoint {
+        ExpandedProgramPoint::Inst(inst)
+    }
+}
+
+impl From<Ebb> for ExpandedProgramPoint {
+    fn from(ebb: Ebb) -> ExpandedProgramPoint {
+        ExpandedProgramPoint::Ebb(ebb)
+    }
+}
+
+impl From<ValueDef> for ExpandedProgramPoint {
+    fn from(def: ValueDef) -> ExpandedProgramPoint {
+        match def {
+            ValueDef::Result(inst, _) => inst.into(),
+            ValueDef::Param(ebb, _) => ebb.into(),
+        }
+    }
+}
+
+impl From<ProgramPoint> for ExpandedProgramPoint {
+    fn from(pp: ProgramPoint) -> ExpandedProgramPoint {
+        if pp.0 & 1 == 0 {
+            ExpandedProgramPoint::Inst(Inst::new((pp.0 / 2) as usize))
+        } else {
+            ExpandedProgramPoint::Ebb(Ebb::new((pp.0 / 2) as usize))
+        }
+    }
+}
+
+impl fmt::Display for ExpandedProgramPoint {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            ExpandedProgramPoint::Inst(x) => write!(f, "{}", x),
+            ExpandedProgramPoint::Ebb(x) => write!(f, "{}", x),
+        }
+    }
+}
+
+impl fmt::Display for ProgramPoint {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let epp: ExpandedProgramPoint = (*self).into();
+        epp.fmt(f)
+    }
+}
+
+impl fmt::Debug for ExpandedProgramPoint {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "ExpandedProgramPoint({})", self)
+    }
+}
+
+impl fmt::Debug for ProgramPoint {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "ProgramPoint({})", self)
+    }
+}
+
+/// Context for ordering program points.
+///
+/// `ProgramPoint` objects don't carry enough information to be ordered independently, they need a
+/// context providing the program order.
+pub trait ProgramOrder {
+    /// Compare the program points `a` and `b` relative to this program order.
+    ///
+    /// Return `Less` if `a` appears in the program before `b`.
+    ///
+    /// This is declared as a generic such that it can be called with `Inst` and `Ebb` arguments
+    /// directly. Depending on the implementation, there is a good chance performance will be
+    /// improved for those cases where the type of either argument is known statically.
+    fn cmp<A, B>(&self, a: A, b: B) -> cmp::Ordering
+    where
+        A: Into<ExpandedProgramPoint>,
+        B: Into<ExpandedProgramPoint>;
+
+    /// Is the range from `inst` to `ebb` just the gap between consecutive EBBs?
+    ///
+    /// This returns true if `inst` is the terminator in the EBB immediately before `ebb`.
+    fn is_ebb_gap(&self, inst: Inst, ebb: Ebb) -> bool;
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use entity::EntityRef;
+    use ir::{Ebb, Inst};
+    use std::string::ToString;
+
+    #[test]
+    fn convert() {
+        let i5 = Inst::new(5);
+        let b3 = Ebb::new(3);
+
+        let pp1: ProgramPoint = i5.into();
+        let pp2: ProgramPoint = b3.into();
+
+        assert_eq!(pp1.to_string(), "inst5");
+        assert_eq!(pp2.to_string(), "ebb3");
+    }
+}
--- a/lib/codegen/src/ir/sourceloc.rs
+++ b/lib/codegen/src/ir/sourceloc.rs
@@ -0,0 +1,63 @@
+//! Source locations.
+//!
+//! Cretonne tracks the original source location of each instruction, and preserves the source
+//! location when instructions are transformed.
+
+use std::fmt;
+
+/// A source location.
+///
+/// This is an opaque 32-bit number attached to each Cretonne IR instruction. Cretonne does not
+/// interpret source locations in any way, they are simply preserved from the input to the output.
+///
+/// The default source location uses the all-ones bit pattern `!0`. It is used for instructions
+/// that can't be given a real source location.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct SourceLoc(u32);
+
+impl SourceLoc {
+    /// Create a new source location with the given bits.
+    pub fn new(bits: u32) -> SourceLoc {
+        SourceLoc(bits)
+    }
+
+    /// Is this the default source location?
+    pub fn is_default(self) -> bool {
+        self == Default::default()
+    }
+
+    /// Read the bits of this source location.
+    pub fn bits(self) -> u32 {
+        self.0
+    }
+}
+
+impl Default for SourceLoc {
+    fn default() -> Self {
+        SourceLoc(!0)
+    }
+}
+
+impl fmt::Display for SourceLoc {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.is_default() {
+            write!(f, "@-")
+        } else {
+            write!(f, "@{:04x}", self.0)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use ir::SourceLoc;
+    use std::string::ToString;
+
+    #[test]
+    fn display() {
+        assert_eq!(SourceLoc::default().to_string(), "@-");
+        assert_eq!(SourceLoc::new(0).to_string(), "@0000");
+        assert_eq!(SourceLoc::new(16).to_string(), "@0010");
+        assert_eq!(SourceLoc::new(0xabcdef).to_string(), "@abcdef");
+    }
+}
--- a/lib/codegen/src/ir/stackslot.rs
+++ b/lib/codegen/src/ir/stackslot.rs
@@ -0,0 +1,425 @@
+//! Stack slots.
+//!
+//! The `StackSlotData` struct keeps track of a single stack slot in a function.
+//!
+
+use entity::{Iter, IterMut, Keys, PrimaryMap};
+use ir::{StackSlot, Type};
+use packed_option::PackedOption;
+use std::cmp;
+use std::fmt;
+use std::ops::{Index, IndexMut};
+use std::slice;
+use std::str::FromStr;
+use std::vec::Vec;
+
+/// The size of an object on the stack, or the size of a stack frame.
+///
+/// We don't use `usize` to represent object sizes on the target platform because Cretonne supports
+/// cross-compilation, and `usize` is a type that depends on the host platform, not the target
+/// platform.
+pub type StackSize = u32;
+
+/// A stack offset.
+///
+/// The location of a stack offset relative to a stack pointer or frame pointer.
+pub type StackOffset = i32;
+
+/// The minimum size of a spill slot in bytes.
+///
+/// ISA implementations are allowed to assume that small types like `b1` and `i8` get a full 4-byte
+/// spill slot.
+const MIN_SPILL_SLOT_SIZE: StackSize = 4;
+
+/// Get the spill slot size to use for `ty`.
+fn spill_size(ty: Type) -> StackSize {
+    cmp::max(MIN_SPILL_SLOT_SIZE, ty.bytes())
+}
+
+/// The kind of a stack slot.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum StackSlotKind {
+    /// A spill slot. This is a stack slot created by the register allocator.
+    SpillSlot,
+
+    /// An explicit stack slot. This is a chunk of stack memory for use by the `stack_load`
+    /// and `stack_store` instructions.
+    ExplicitSlot,
+
+    /// An incoming function argument.
+    ///
+    /// If the current function has more arguments than fits in registers, the remaining arguments
+    /// are passed on the stack by the caller. These incoming arguments are represented as SSA
+    /// values assigned to incoming stack slots.
+    IncomingArg,
+
+    /// An outgoing function argument.
+    ///
+    /// When preparing to call a function whose arguments don't fit in registers, outgoing argument
+    /// stack slots are used to represent individual arguments in the outgoing call frame. These
+    /// stack slots are only valid while setting up a call.
+    OutgoingArg,
+
+    /// An emergency spill slot.
+    ///
+    /// Emergency slots are allocated late when the register's constraint solver needs extra space
+    /// to shuffle registers around. The are only used briefly, and can be reused.
+    EmergencySlot,
+}
+
+impl FromStr for StackSlotKind {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<StackSlotKind, ()> {
+        use self::StackSlotKind::*;
+        match s {
+            "explicit_slot" => Ok(ExplicitSlot),
+            "spill_slot" => Ok(SpillSlot),
+            "incoming_arg" => Ok(IncomingArg),
+            "outgoing_arg" => Ok(OutgoingArg),
+            "emergency_slot" => Ok(EmergencySlot),
+            _ => Err(()),
+        }
+    }
+}
+
+impl fmt::Display for StackSlotKind {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use self::StackSlotKind::*;
+        f.write_str(match *self {
+            ExplicitSlot => "explicit_slot",
+            SpillSlot => "spill_slot",
+            IncomingArg => "incoming_arg",
+            OutgoingArg => "outgoing_arg",
+            EmergencySlot => "emergency_slot",
+        })
+    }
+}
+
+/// Contents of a stack slot.
+#[derive(Clone, Debug)]
+pub struct StackSlotData {
+    /// The kind of stack slot.
+    pub kind: StackSlotKind,
+
+    /// Size of stack slot in bytes.
+    pub size: StackSize,
+
+    /// Offset of stack slot relative to the stack pointer in the caller.
+    ///
+    /// On x86, the base address is the stack pointer *before* the return address was pushed. On
+    /// RISC ISAs, the base address is the value of the stack pointer on entry to the function.
+    ///
+    /// For `OutgoingArg` stack slots, the offset is relative to the current function's stack
+    /// pointer immediately before the call.
+    pub offset: Option<StackOffset>,
+}
+
+impl StackSlotData {
+    /// Create a stack slot with the specified byte size.
+    pub fn new(kind: StackSlotKind, size: StackSize) -> StackSlotData {
+        StackSlotData {
+            kind,
+            size,
+            offset: None,
+        }
+    }
+
+    /// Get the alignment in bytes of this stack slot given the stack pointer alignment.
+    pub fn alignment(&self, max_align: StackSize) -> StackSize {
+        debug_assert!(max_align.is_power_of_two());
+        // We want to find the largest power of two that divides both `self.size` and `max_align`.
+        // That is the same as isolating the rightmost bit in `x`.
+        let x = self.size | max_align;
+        // C.f. Hacker's delight.
+        x & x.wrapping_neg()
+    }
+}
+
+impl fmt::Display for StackSlotData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{} {}", self.kind, self.size)?;
+        if let Some(offset) = self.offset {
+            write!(f, ", offset {}", offset)?;
+        }
+        Ok(())
+    }
+}
+
+/// Stack frame manager.
+///
+/// Keep track of all the stack slots used by a function.
+#[derive(Clone, Debug)]
+pub struct StackSlots {
+    /// All allocated stack slots.
+    slots: PrimaryMap<StackSlot, StackSlotData>,
+
+    /// All the outgoing stack slots, ordered by offset.
+    outgoing: Vec<StackSlot>,
+
+    /// All the emergency slots.
+    emergency: Vec<StackSlot>,
+
+    /// The total size of the stack frame.
+    ///
+    /// This is the distance from the stack pointer in the current function to the stack pointer in
+    /// the calling function, so it includes a pushed return address as well as space for outgoing
+    /// call arguments.
+    ///
+    /// This is computed by the `layout()` method.
+    pub frame_size: Option<StackSize>,
+}
+
+/// Stack slot manager functions that behave mostly like an entity map.
+impl StackSlots {
+    /// Create an empty stack slot manager.
+    pub fn new() -> Self {
+        Self {
+            slots: PrimaryMap::new(),
+            outgoing: Vec::new(),
+            emergency: Vec::new(),
+            frame_size: None,
+        }
+    }
+
+    /// Clear out everything.
+    pub fn clear(&mut self) {
+        self.slots.clear();
+        self.outgoing.clear();
+        self.emergency.clear();
+        self.frame_size = None;
+    }
+
+    /// Allocate a new stack slot.
+    ///
+    /// This function should be primarily used by the text format parser. There are more convenient
+    /// functions for creating specific kinds of stack slots below.
+    pub fn push(&mut self, data: StackSlotData) -> StackSlot {
+        self.slots.push(data)
+    }
+
+    /// Check if `ss` is a valid stack slot reference.
+    pub fn is_valid(&self, ss: StackSlot) -> bool {
+        self.slots.is_valid(ss)
+    }
+
+    /// Set the offset of a stack slot.
+    pub fn set_offset(&mut self, ss: StackSlot, offset: StackOffset) {
+        self.slots[ss].offset = Some(offset);
+    }
+
+    /// Get an iterator over all the stack slot keys.
+    pub fn iter(&self) -> Iter<StackSlot, StackSlotData> {
+        self.slots.iter()
+    }
+
+    /// Get an iterator over all the stack slot keys, mutable edition.
+    pub fn iter_mut(&mut self) -> IterMut<StackSlot, StackSlotData> {
+        self.slots.iter_mut()
+    }
+
+    /// Get an iterator over all the stack slot records.
+    pub fn values(&self) -> slice::Iter<StackSlotData> {
+        self.slots.values()
+    }
+
+    /// Get an iterator over all the stack slot records, mutable edition.
+    pub fn values_mut(&mut self) -> slice::IterMut<StackSlotData> {
+        self.slots.values_mut()
+    }
+
+    /// Get an iterator over all the stack slot keys.
+    pub fn keys(&self) -> Keys<StackSlot> {
+        self.slots.keys()
+    }
+
+    /// Get a reference to the next stack slot that would be created by `push()`.
+    ///
+    /// This should just be used by the parser.
+    pub fn next_key(&self) -> StackSlot {
+        self.slots.next_key()
+    }
+}
+
+impl Index<StackSlot> for StackSlots {
+    type Output = StackSlotData;
+
+    fn index(&self, ss: StackSlot) -> &StackSlotData {
+        &self.slots[ss]
+    }
+}
+
+impl IndexMut<StackSlot> for StackSlots {
+    fn index_mut(&mut self, ss: StackSlot) -> &mut StackSlotData {
+        &mut self.slots[ss]
+    }
+}
+
+/// Higher-level stack frame manipulation functions.
+impl StackSlots {
+    /// Create a new spill slot for spilling values of type `ty`.
+    pub fn make_spill_slot(&mut self, ty: Type) -> StackSlot {
+        self.push(StackSlotData::new(StackSlotKind::SpillSlot, spill_size(ty)))
+    }
+
+    /// Create a stack slot representing an incoming function argument.
+    pub fn make_incoming_arg(&mut self, ty: Type, offset: StackOffset) -> StackSlot {
+        let mut data = StackSlotData::new(StackSlotKind::IncomingArg, ty.bytes());
+        debug_assert!(offset <= StackOffset::max_value() - data.size as StackOffset);
+        data.offset = Some(offset);
+        self.push(data)
+    }
+
+    /// Get a stack slot representing an outgoing argument.
+    ///
+    /// This may create a new stack slot, or reuse an existing outgoing stack slot with the
+    /// requested offset and size.
+    ///
+    /// The requested offset is relative to this function's stack pointer immediately before making
+    /// the call.
+    pub fn get_outgoing_arg(&mut self, ty: Type, offset: StackOffset) -> StackSlot {
+        let size = ty.bytes();
+
+        // Look for an existing outgoing stack slot with the same offset and size.
+        let inspos = match self.outgoing.binary_search_by_key(&(offset, size), |&ss| {
+            (self[ss].offset.unwrap(), self[ss].size)
+        }) {
+            Ok(idx) => return self.outgoing[idx],
+            Err(idx) => idx,
+        };
+
+        // No existing slot found. Make one and insert it into `outgoing`.
+        let mut data = StackSlotData::new(StackSlotKind::OutgoingArg, size);
+        debug_assert!(offset <= StackOffset::max_value() - size as StackOffset);
+        data.offset = Some(offset);
+        let ss = self.slots.push(data);
+        self.outgoing.insert(inspos, ss);
+        ss
+    }
+
+    /// Get an emergency spill slot that can be used to store a `ty` value.
+    ///
+    /// This may allocate a new slot, or it may reuse an existing emergency spill slot, excluding
+    /// any slots in the `in_use` list.
+    pub fn get_emergency_slot(
+        &mut self,
+        ty: Type,
+        in_use: &[PackedOption<StackSlot>],
+    ) -> StackSlot {
+        let size = spill_size(ty);
+
+        // Find the smallest existing slot that can fit the type.
+        if let Some(&ss) = self.emergency
+            .iter()
+            .filter(|&&ss| self[ss].size >= size && !in_use.contains(&ss.into()))
+            .min_by_key(|&&ss| self[ss].size)
+        {
+            return ss;
+        }
+
+        // Alternatively, use the largest available slot and make it larger.
+        if let Some(&ss) = self.emergency
+            .iter()
+            .filter(|&&ss| !in_use.contains(&ss.into()))
+            .max_by_key(|&&ss| self[ss].size)
+        {
+            self.slots[ss].size = size;
+            return ss;
+        }
+
+        // No existing slot found. Make one and insert it into `emergency`.
+        let data = StackSlotData::new(StackSlotKind::EmergencySlot, size);
+        let ss = self.slots.push(data);
+        self.emergency.push(ss);
+        ss
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use ir::Function;
+    use ir::types;
+    use std::string::ToString;
+
+    #[test]
+    fn stack_slot() {
+        let mut func = Function::new();
+
+        let ss0 = func.create_stack_slot(StackSlotData::new(StackSlotKind::IncomingArg, 4));
+        let ss1 = func.create_stack_slot(StackSlotData::new(StackSlotKind::SpillSlot, 8));
+        assert_eq!(ss0.to_string(), "ss0");
+        assert_eq!(ss1.to_string(), "ss1");
+
+        assert_eq!(func.stack_slots[ss0].size, 4);
+        assert_eq!(func.stack_slots[ss1].size, 8);
+
+        assert_eq!(func.stack_slots[ss0].to_string(), "incoming_arg 4");
+        assert_eq!(func.stack_slots[ss1].to_string(), "spill_slot 8");
+    }
+
+    #[test]
+    fn outgoing() {
+        let mut sss = StackSlots::new();
+
+        let ss0 = sss.get_outgoing_arg(types::I32, 8);
+        let ss1 = sss.get_outgoing_arg(types::I32, 4);
+        let ss2 = sss.get_outgoing_arg(types::I64, 8);
+
+        assert_eq!(sss[ss0].offset, Some(8));
+        assert_eq!(sss[ss0].size, 4);
+
+        assert_eq!(sss[ss1].offset, Some(4));
+        assert_eq!(sss[ss1].size, 4);
+
+        assert_eq!(sss[ss2].offset, Some(8));
+        assert_eq!(sss[ss2].size, 8);
+
+        assert_eq!(sss.get_outgoing_arg(types::I32, 8), ss0);
+        assert_eq!(sss.get_outgoing_arg(types::I32, 4), ss1);
+        assert_eq!(sss.get_outgoing_arg(types::I64, 8), ss2);
+    }
+
+    #[test]
+    fn alignment() {
+        let slot = StackSlotData::new(StackSlotKind::SpillSlot, 8);
+
+        assert_eq!(slot.alignment(4), 4);
+        assert_eq!(slot.alignment(8), 8);
+        assert_eq!(slot.alignment(16), 8);
+
+        let slot2 = StackSlotData::new(StackSlotKind::ExplicitSlot, 24);
+
+        assert_eq!(slot2.alignment(4), 4);
+        assert_eq!(slot2.alignment(8), 8);
+        assert_eq!(slot2.alignment(16), 8);
+        assert_eq!(slot2.alignment(32), 8);
+    }
+
+    #[test]
+    fn emergency() {
+        let mut sss = StackSlots::new();
+
+        let ss0 = sss.get_emergency_slot(types::I32, &[]);
+        assert_eq!(sss[ss0].size, 4);
+
+        // When a smaller size is requested, we should simply get the same slot back.
+        assert_eq!(sss.get_emergency_slot(types::I8, &[]), ss0);
+        assert_eq!(sss[ss0].size, 4);
+        assert_eq!(sss.get_emergency_slot(types::F32, &[]), ss0);
+        assert_eq!(sss[ss0].size, 4);
+
+        // Ask for a larger size and the slot should grow.
+        assert_eq!(sss.get_emergency_slot(types::F64, &[]), ss0);
+        assert_eq!(sss[ss0].size, 8);
+
+        // When one slot is in use, we should get a new one.
+        let ss1 = sss.get_emergency_slot(types::I32, &[None.into(), ss0.into()]);
+        assert_eq!(sss[ss0].size, 8);
+        assert_eq!(sss[ss1].size, 4);
+
+        // Now we should get the smallest fit of the two available slots.
+        assert_eq!(sss.get_emergency_slot(types::F32, &[]), ss1);
+        assert_eq!(sss.get_emergency_slot(types::F64, &[]), ss0);
+    }
+}
--- a/lib/codegen/src/ir/trapcode.rs
+++ b/lib/codegen/src/ir/trapcode.rs
@@ -0,0 +1,120 @@
+//! Trap codes describing the reason for a trap.
+
+use std::fmt::{self, Display, Formatter};
+use std::str::FromStr;
+
+/// A trap code describing the reason for a trap.
+///
+/// All trap instructions have an explicit trap code.
+#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
+pub enum TrapCode {
+    /// The current stack space was exhausted.
+    ///
+    /// On some platforms, a stack overflow may also be indicated by a segmentation fault from the
+    /// stack guard page.
+    StackOverflow,
+
+    /// A `heap_addr` instruction detected an out-of-bounds error.
+    ///
+    /// Some out-of-bounds heap accesses are detected by a segmentation fault on the heap guard
+    /// pages.
+    HeapOutOfBounds,
+
+    /// Other bounds checking error.
+    OutOfBounds,
+
+    /// Indirect call to a null table entry.
+    IndirectCallToNull,
+
+    /// Signature mismatch on indirect call.
+    BadSignature,
+
+    /// An integer arithmetic operation caused an overflow.
+    IntegerOverflow,
+
+    /// An integer division by zero.
+    IntegerDivisionByZero,
+
+    /// Failed float-to-int conversion.
+    BadConversionToInteger,
+
+    /// Execution has potentially run too long and may be interrupted.
+    /// This trap is resumable.
+    Interrupt,
+
+    /// A user-defined trap code.
+    User(u16),
+}
+
+impl Display for TrapCode {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        use self::TrapCode::*;
+        let identifier = match *self {
+            StackOverflow => "stk_ovf",
+            HeapOutOfBounds => "heap_oob",
+            OutOfBounds => "oob",
+            IndirectCallToNull => "icall_null",
+            BadSignature => "bad_sig",
+            IntegerOverflow => "int_ovf",
+            IntegerDivisionByZero => "int_divz",
+            BadConversionToInteger => "bad_toint",
+            Interrupt => "interrupt",
+            User(x) => return write!(f, "user{}", x),
+        };
+        f.write_str(identifier)
+    }
+}
+
+impl FromStr for TrapCode {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        use self::TrapCode::*;
+        match s {
+            "stk_ovf" => Ok(StackOverflow),
+            "heap_oob" => Ok(HeapOutOfBounds),
+            "oob" => Ok(OutOfBounds),
+            "icall_null" => Ok(IndirectCallToNull),
+            "bad_sig" => Ok(BadSignature),
+            "int_ovf" => Ok(IntegerOverflow),
+            "int_divz" => Ok(IntegerDivisionByZero),
+            "bad_toint" => Ok(BadConversionToInteger),
+            "interrupt" => Ok(Interrupt),
+            _ if s.starts_with("user") => s[4..].parse().map(User).map_err(|_| ()),
+            _ => Err(()),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::string::ToString;
+
+    // Everything but user-defined codes.
+    const CODES: [TrapCode; 8] = [
+        TrapCode::StackOverflow,
+        TrapCode::HeapOutOfBounds,
+        TrapCode::OutOfBounds,
+        TrapCode::IndirectCallToNull,
+        TrapCode::BadSignature,
+        TrapCode::IntegerOverflow,
+        TrapCode::IntegerDivisionByZero,
+        TrapCode::BadConversionToInteger,
+    ];
+
+    #[test]
+    fn display() {
+        for r in &CODES {
+            let tc = *r;
+            assert_eq!(tc.to_string().parse(), Ok(tc));
+        }
+        assert_eq!("bogus".parse::<TrapCode>(), Err(()));
+
+        assert_eq!(TrapCode::User(17).to_string(), "user17");
+        assert_eq!("user22".parse(), Ok(TrapCode::User(22)));
+        assert_eq!("user".parse::<TrapCode>(), Err(()));
+        assert_eq!("user-1".parse::<TrapCode>(), Err(()));
+        assert_eq!("users".parse::<TrapCode>(), Err(()));
+    }
+}
--- a/lib/codegen/src/ir/types.rs
+++ b/lib/codegen/src/ir/types.rs
@@ -0,0 +1,456 @@
+//! Common types for the Cretonne code generator.
+
+use std::default::Default;
+use std::fmt::{self, Debug, Display, Formatter};
+
+/// The type of an SSA value.
+///
+/// The `VOID` type is only used for instructions that produce no value. It can't be part of a SIMD
+/// vector.
+///
+/// Basic integer types: `I8`, `I16`, `I32`, and `I64`. These types are sign-agnostic.
+///
+/// Basic floating point types: `F32` and `F64`. IEEE single and double precision.
+///
+/// Boolean types: `B1`, `B8`, `B16`, `B32`, and `B64`. These all encode 'true' or 'false'. The
+/// larger types use redundant bits.
+///
+/// SIMD vector types have power-of-two lanes, up to 256. Lanes can be any int/float/bool type.
+///
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Type(u8);
+
+/// No type. Used for functions without a return value. Can't be loaded or stored. Can't be part of
+/// a SIMD vector.
+pub const VOID: Type = Type(0);
+
+/// Start of the lane types. See also `meta/cdsl.types.py`.
+const LANE_BASE: u8 = 0x70;
+
+/// Start of the 2-lane vector types.
+const VECTOR_BASE: u8 = LANE_BASE + 16;
+
+// Include code generated by `lib/codegen/meta/gen_types.py`. This file contains constant
+// definitions for all the scalar types as well as common vector types for 64, 128, 256, and
+// 512-bit SIMD vectors.
+include!(concat!(env!("OUT_DIR"), "/types.rs"));
+
+impl Type {
+    /// Get the lane type of this SIMD vector type.
+    ///
+    /// A lane type is the same as a SIMD vector type with one lane, so it returns itself.
+    pub fn lane_type(self) -> Type {
+        if self.0 < VECTOR_BASE {
+            self
+        } else {
+            Type(LANE_BASE | (self.0 & 0x0f))
+        }
+    }
+
+    /// Get log_2 of the number of bits in a lane.
+    pub fn log2_lane_bits(self) -> u8 {
+        match self.lane_type() {
+            B1 => 0,
+            B8 | I8 => 3,
+            B16 | I16 => 4,
+            B32 | I32 | F32 => 5,
+            B64 | I64 | F64 => 6,
+            _ => 0,
+        }
+    }
+
+    /// Get the number of bits in a lane.
+    pub fn lane_bits(self) -> u8 {
+        match self.lane_type() {
+            B1 => 1,
+            B8 | I8 => 8,
+            B16 | I16 => 16,
+            B32 | I32 | F32 => 32,
+            B64 | I64 | F64 => 64,
+            _ => 0,
+        }
+    }
+
+    /// Get an integer type with the requested number of bits.
+    pub fn int(bits: u16) -> Option<Type> {
+        match bits {
+            8 => Some(I8),
+            16 => Some(I16),
+            32 => Some(I32),
+            64 => Some(I64),
+            _ => None,
+        }
+    }
+
+    /// Get a type with the same number of lanes as `self`, but using `lane` as the lane type.
+    fn replace_lanes(self, lane: Type) -> Type {
+        debug_assert!(lane.is_lane() && !self.is_special());
+        Type((lane.0 & 0x0f) | (self.0 & 0xf0))
+    }
+
+    /// Get a type with the same number of lanes as this type, but with the lanes replaced by
+    /// booleans of the same size.
+    ///
+    /// Scalar types are treated as vectors with one lane, so they are converted to the multi-bit
+    /// boolean types.
+    pub fn as_bool_pedantic(self) -> Type {
+        // Replace the low 4 bits with the boolean version, preserve the high 4 bits.
+        self.replace_lanes(match self.lane_type() {
+            B8 | I8 => B8,
+            B16 | I16 => B16,
+            B32 | I32 | F32 => B32,
+            B64 | I64 | F64 => B64,
+            _ => B1,
+        })
+    }
+
+    /// Get a type with the same number of lanes as this type, but with the lanes replaced by
+    /// booleans of the same size.
+    ///
+    /// Scalar types are all converted to `b1` which is usually what you want.
+    pub fn as_bool(self) -> Type {
+        if !self.is_vector() {
+            B1
+        } else {
+            self.as_bool_pedantic()
+        }
+    }
+
+    /// Get a type with the same number of lanes as this type, but with lanes that are half the
+    /// number of bits.
+    pub fn half_width(self) -> Option<Type> {
+        Some(self.replace_lanes(match self.lane_type() {
+            I16 => I8,
+            I32 => I16,
+            I64 => I32,
+            F64 => F32,
+            B16 => B8,
+            B32 => B16,
+            B64 => B32,
+            _ => return None,
+        }))
+    }
+
+    /// Get a type with the same number of lanes as this type, but with lanes that are twice the
+    /// number of bits.
+    pub fn double_width(self) -> Option<Type> {
+        Some(self.replace_lanes(match self.lane_type() {
+            I8 => I16,
+            I16 => I32,
+            I32 => I64,
+            F32 => F64,
+            B8 => B16,
+            B16 => B32,
+            B32 => B64,
+            _ => return None,
+        }))
+    }
+
+    /// Is this the VOID type?
+    pub fn is_void(self) -> bool {
+        self == VOID
+    }
+
+    /// Is this a special type?
+    pub fn is_special(self) -> bool {
+        self.0 < LANE_BASE
+    }
+
+    /// Is this a lane type?
+    ///
+    /// This is a scalar type that can also appear as the lane type of a SIMD vector.
+    pub fn is_lane(self) -> bool {
+        LANE_BASE <= self.0 && self.0 < VECTOR_BASE
+    }
+
+    /// Is this a SIMD vector type?
+    ///
+    /// A vector type has 2 or more lanes.
+    pub fn is_vector(self) -> bool {
+        self.0 >= VECTOR_BASE
+    }
+
+    /// Is this a scalar boolean type?
+    pub fn is_bool(self) -> bool {
+        match self {
+            B1 | B8 | B16 | B32 | B64 => true,
+            _ => false,
+        }
+    }
+
+    /// Is this a scalar integer type?
+    pub fn is_int(self) -> bool {
+        match self {
+            I8 | I16 | I32 | I64 => true,
+            _ => false,
+        }
+    }
+
+    /// Is this a scalar floating point type?
+    pub fn is_float(self) -> bool {
+        match self {
+            F32 | F64 => true,
+            _ => false,
+        }
+    }
+
+    /// Is this a CPU flags type?
+    pub fn is_flags(self) -> bool {
+        match self {
+            IFLAGS | FFLAGS => true,
+            _ => false,
+        }
+    }
+
+    /// Get log_2 of the number of lanes in this SIMD vector type.
+    ///
+    /// All SIMD types have a lane count that is a power of two and no larger than 256, so this
+    /// will be a number in the range 0-8.
+    ///
+    /// A scalar type is the same as a SIMD vector type with one lane, so it returns 0.
+    pub fn log2_lane_count(self) -> u8 {
+        self.0.saturating_sub(LANE_BASE) >> 4
+    }
+
+    /// Get the number of lanes in this SIMD vector type.
+    ///
+    /// A scalar type is the same as a SIMD vector type with one lane, so it returns 1.
+    pub fn lane_count(self) -> u16 {
+        1 << self.log2_lane_count()
+    }
+
+    /// Get the total number of bits used to represent this type.
+    pub fn bits(self) -> u16 {
+        u16::from(self.lane_bits()) * self.lane_count()
+    }
+
+    /// Get the number of bytes used to store this type in memory.
+    pub fn bytes(self) -> u32 {
+        (u32::from(self.bits()) + 7) / 8
+    }
+
+    /// Get a SIMD vector type with `n` times more lanes than this one.
+    ///
+    /// If this is a scalar type, this produces a SIMD type with this as a lane type and `n` lanes.
+    ///
+    /// If this is already a SIMD vector type, this produces a SIMD vector type with `n *
+    /// self.lane_count()` lanes.
+    pub fn by(self, n: u16) -> Option<Type> {
+        if self.lane_bits() == 0 || !n.is_power_of_two() {
+            return None;
+        }
+        let log2_lanes: u32 = n.trailing_zeros();
+        let new_type = u32::from(self.0) + (log2_lanes << 4);
+        if new_type < 0x100 {
+            Some(Type(new_type as u8))
+        } else {
+            None
+        }
+    }
+
+    /// Get a SIMD vector with half the number of lanes.
+    ///
+    /// There is no `double_vector()` method. Use `t.by(2)` instead.
+    pub fn half_vector(self) -> Option<Type> {
+        if self.is_vector() {
+            Some(Type(self.0 - 0x10))
+        } else {
+            None
+        }
+    }
+
+    /// Index of this type, for use with hash tables etc.
+    pub fn index(self) -> usize {
+        usize::from(self.0)
+    }
+
+    /// True iff:
+    ///
+    /// 1. `self.lane_count() == other.lane_count()` and
+    /// 2. `self.lane_bits() >= other.lane_bits()`
+    pub fn wider_or_equal(self, other: Type) -> bool {
+        self.lane_count() == other.lane_count() && self.lane_bits() >= other.lane_bits()
+    }
+}
+
+impl Display for Type {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        if self.is_bool() {
+            write!(f, "b{}", self.lane_bits())
+        } else if self.is_int() {
+            write!(f, "i{}", self.lane_bits())
+        } else if self.is_float() {
+            write!(f, "f{}", self.lane_bits())
+        } else if self.is_vector() {
+            write!(f, "{}x{}", self.lane_type(), self.lane_count())
+        } else {
+            f.write_str(match *self {
+                VOID => "void",
+                IFLAGS => "iflags",
+                FFLAGS => "fflags",
+                _ => panic!("Invalid Type(0x{:x})", self.0),
+            })
+        }
+    }
+}
+
+impl Debug for Type {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        if self.is_bool() {
+            write!(f, "types::B{}", self.lane_bits())
+        } else if self.is_int() {
+            write!(f, "types::I{}", self.lane_bits())
+        } else if self.is_float() {
+            write!(f, "types::F{}", self.lane_bits())
+        } else if self.is_vector() {
+            write!(f, "{:?}X{}", self.lane_type(), self.lane_count())
+        } else {
+            match *self {
+                VOID => write!(f, "types::VOID"),
+                IFLAGS => write!(f, "types::IFLAGS"),
+                FFLAGS => write!(f, "types::FFLAGS"),
+                _ => write!(f, "Type(0x{:x})", self.0),
+            }
+        }
+    }
+}
+
+impl Default for Type {
+    fn default() -> Self {
+        VOID
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::string::ToString;
+
+    #[test]
+    fn basic_scalars() {
+        assert_eq!(VOID, VOID.lane_type());
+        assert_eq!(0, VOID.bits());
+        assert_eq!(IFLAGS, IFLAGS.lane_type());
+        assert_eq!(0, IFLAGS.bits());
+        assert_eq!(FFLAGS, FFLAGS.lane_type());
+        assert_eq!(0, FFLAGS.bits());
+        assert_eq!(B1, B1.lane_type());
+        assert_eq!(B8, B8.lane_type());
+        assert_eq!(B16, B16.lane_type());
+        assert_eq!(B32, B32.lane_type());
+        assert_eq!(B64, B64.lane_type());
+        assert_eq!(I8, I8.lane_type());
+        assert_eq!(I16, I16.lane_type());
+        assert_eq!(I32, I32.lane_type());
+        assert_eq!(I64, I64.lane_type());
+        assert_eq!(F32, F32.lane_type());
+        assert_eq!(F64, F64.lane_type());
+
+        assert_eq!(VOID.lane_bits(), 0);
+        assert_eq!(IFLAGS.lane_bits(), 0);
+        assert_eq!(FFLAGS.lane_bits(), 0);
+        assert_eq!(B1.lane_bits(), 1);
+        assert_eq!(B8.lane_bits(), 8);
+        assert_eq!(B16.lane_bits(), 16);
+        assert_eq!(B32.lane_bits(), 32);
+        assert_eq!(B64.lane_bits(), 64);
+        assert_eq!(I8.lane_bits(), 8);
+        assert_eq!(I16.lane_bits(), 16);
+        assert_eq!(I32.lane_bits(), 32);
+        assert_eq!(I64.lane_bits(), 64);
+        assert_eq!(F32.lane_bits(), 32);
+        assert_eq!(F64.lane_bits(), 64);
+    }
+
+    #[test]
+    fn typevar_functions() {
+        assert_eq!(VOID.half_width(), None);
+        assert_eq!(IFLAGS.half_width(), None);
+        assert_eq!(FFLAGS.half_width(), None);
+        assert_eq!(B1.half_width(), None);
+        assert_eq!(B8.half_width(), None);
+        assert_eq!(B16.half_width(), Some(B8));
+        assert_eq!(B32.half_width(), Some(B16));
+        assert_eq!(B64.half_width(), Some(B32));
+        assert_eq!(I8.half_width(), None);
+        assert_eq!(I16.half_width(), Some(I8));
+        assert_eq!(I32.half_width(), Some(I16));
+        assert_eq!(I32X4.half_width(), Some(I16X4));
+        assert_eq!(I64.half_width(), Some(I32));
+        assert_eq!(F32.half_width(), None);
+        assert_eq!(F64.half_width(), Some(F32));
+
+        assert_eq!(VOID.double_width(), None);
+        assert_eq!(IFLAGS.double_width(), None);
+        assert_eq!(FFLAGS.double_width(), None);
+        assert_eq!(B1.double_width(), None);
+        assert_eq!(B8.double_width(), Some(B16));
+        assert_eq!(B16.double_width(), Some(B32));
+        assert_eq!(B32.double_width(), Some(B64));
+        assert_eq!(B64.double_width(), None);
+        assert_eq!(I8.double_width(), Some(I16));
+        assert_eq!(I16.double_width(), Some(I32));
+        assert_eq!(I32.double_width(), Some(I64));
+        assert_eq!(I32X4.double_width(), Some(I64X4));
+        assert_eq!(I64.double_width(), None);
+        assert_eq!(F32.double_width(), Some(F64));
+        assert_eq!(F64.double_width(), None);
+    }
+
+    #[test]
+    fn vectors() {
+        let big = F64.by(256).unwrap();
+        assert_eq!(big.lane_bits(), 64);
+        assert_eq!(big.lane_count(), 256);
+        assert_eq!(big.bits(), 64 * 256);
+
+        assert_eq!(big.half_vector().unwrap().to_string(), "f64x128");
+        assert_eq!(B1.by(2).unwrap().half_vector().unwrap().to_string(), "b1");
+        assert_eq!(I32.half_vector(), None);
+        assert_eq!(VOID.half_vector(), None);
+
+        // Check that the generated constants match the computed vector types.
+        assert_eq!(I32.by(4), Some(I32X4));
+        assert_eq!(F64.by(8), Some(F64X8));
+    }
+
+    #[test]
+    fn format_scalars() {
+        assert_eq!(VOID.to_string(), "void");
+        assert_eq!(IFLAGS.to_string(), "iflags");
+        assert_eq!(FFLAGS.to_string(), "fflags");
+        assert_eq!(B1.to_string(), "b1");
+        assert_eq!(B8.to_string(), "b8");
+        assert_eq!(B16.to_string(), "b16");
+        assert_eq!(B32.to_string(), "b32");
+        assert_eq!(B64.to_string(), "b64");
+        assert_eq!(I8.to_string(), "i8");
+        assert_eq!(I16.to_string(), "i16");
+        assert_eq!(I32.to_string(), "i32");
+        assert_eq!(I64.to_string(), "i64");
+        assert_eq!(F32.to_string(), "f32");
+        assert_eq!(F64.to_string(), "f64");
+    }
+
+    #[test]
+    fn format_vectors() {
+        assert_eq!(B1.by(8).unwrap().to_string(), "b1x8");
+        assert_eq!(B8.by(1).unwrap().to_string(), "b8");
+        assert_eq!(B16.by(256).unwrap().to_string(), "b16x256");
+        assert_eq!(B32.by(4).unwrap().by(2).unwrap().to_string(), "b32x8");
+        assert_eq!(B64.by(8).unwrap().to_string(), "b64x8");
+        assert_eq!(I8.by(64).unwrap().to_string(), "i8x64");
+        assert_eq!(F64.by(2).unwrap().to_string(), "f64x2");
+        assert_eq!(I8.by(3), None);
+        assert_eq!(I8.by(512), None);
+        assert_eq!(VOID.by(4), None);
+    }
+
+    #[test]
+    fn as_bool() {
+        assert_eq!(I32X4.as_bool(), B32X4);
+        assert_eq!(I32.as_bool(), B1);
+        assert_eq!(I32X4.as_bool_pedantic(), B32X4);
+        assert_eq!(I32.as_bool_pedantic(), B32);
+    }
+}
--- a/lib/codegen/src/ir/valueloc.rs
+++ b/lib/codegen/src/ir/valueloc.rs
@@ -0,0 +1,165 @@
+//! Value locations.
+//!
+//! The register allocator assigns every SSA value to either a register or a stack slot. This
+//! assignment is represented by a `ValueLoc` object.
+
+use ir::StackSlot;
+use isa::{RegInfo, RegUnit};
+use std::fmt;
+
+/// Value location.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum ValueLoc {
+    /// This value has not been assigned to a location yet.
+    Unassigned,
+    /// Value is assigned to a register.
+    Reg(RegUnit),
+    /// Value is assigned to a stack slot.
+    Stack(StackSlot),
+}
+
+impl Default for ValueLoc {
+    fn default() -> Self {
+        ValueLoc::Unassigned
+    }
+}
+
+impl ValueLoc {
+    /// Is this an assigned location? (That is, not `Unassigned`).
+    pub fn is_assigned(&self) -> bool {
+        match *self {
+            ValueLoc::Unassigned => false,
+            _ => true,
+        }
+    }
+
+    /// Get the register unit of this location, or panic.
+    pub fn unwrap_reg(self) -> RegUnit {
+        match self {
+            ValueLoc::Reg(ru) => ru,
+            _ => panic!("Expected register: {:?}", self),
+        }
+    }
+
+    /// Get the stack slot of this location, or panic.
+    pub fn unwrap_stack(self) -> StackSlot {
+        match self {
+            ValueLoc::Stack(ss) => ss,
+            _ => panic!("Expected stack slot: {:?}", self),
+        }
+    }
+
+    /// Return an object that can display this value location, using the register info from the
+    /// target ISA.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayValueLoc<'a> {
+        DisplayValueLoc(self, regs.into())
+    }
+}
+
+/// Displaying a `ValueLoc` correctly requires the associated `RegInfo` from the target ISA.
+/// Without the register info, register units are simply show as numbers.
+///
+/// The `DisplayValueLoc` type can display the contained `ValueLoc`.
+pub struct DisplayValueLoc<'a>(ValueLoc, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayValueLoc<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self.0 {
+            ValueLoc::Unassigned => write!(f, "-"),
+            ValueLoc::Reg(ru) => {
+                match self.1 {
+                    Some(regs) => write!(f, "{}", regs.display_regunit(ru)),
+                    None => write!(f, "%{}", ru),
+                }
+            }
+            ValueLoc::Stack(ss) => write!(f, "{}", ss),
+        }
+    }
+}
+
+/// Function argument location.
+///
+/// The ABI specifies how arguments are passed to a function, and where return values appear after
+/// the call. Just like a `ValueLoc`, function arguments can be passed in registers or on the
+/// stack.
+///
+/// Function arguments on the stack are accessed differently for the incoming arguments to the
+/// current function and the outgoing arguments to a called external function. For this reason,
+/// the location of stack arguments is described as an offset into the array of function arguments
+/// on the stack.
+///
+/// An `ArgumentLoc` can be translated to a `ValueLoc` only when we know if we're talking about an
+/// incoming argument or an outgoing argument.
+///
+/// - For stack arguments, different `StackSlot` entities are used to represent incoming and
+///   outgoing arguments.
+/// - For register arguments, there is usually no difference, but if we ever add support for a
+///   register-window ISA like SPARC, register arguments would also need to be translated.
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+pub enum ArgumentLoc {
+    /// This argument has not been assigned to a location yet.
+    Unassigned,
+    /// Argument is passed in a register.
+    Reg(RegUnit),
+    /// Argument is passed on the stack, at the given byte offset into the argument array.
+    Stack(i32),
+}
+
+impl Default for ArgumentLoc {
+    fn default() -> Self {
+        ArgumentLoc::Unassigned
+    }
+}
+
+impl ArgumentLoc {
+    /// Is this an assigned location? (That is, not `Unassigned`).
+    pub fn is_assigned(&self) -> bool {
+        match *self {
+            ArgumentLoc::Unassigned => false,
+            _ => true,
+        }
+    }
+
+    /// Is this a register location?
+    pub fn is_reg(&self) -> bool {
+        match *self {
+            ArgumentLoc::Reg(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Is this a stack location?
+    pub fn is_stack(&self) -> bool {
+        match *self {
+            ArgumentLoc::Stack(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Return an object that can display this argument location, using the register info from the
+    /// target ISA.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayArgumentLoc<'a> {
+        DisplayArgumentLoc(self, regs.into())
+    }
+}
+
+/// Displaying a `ArgumentLoc` correctly requires the associated `RegInfo` from the target ISA.
+/// Without the register info, register units are simply show as numbers.
+///
+/// The `DisplayArgumentLoc` type can display the contained `ArgumentLoc`.
+pub struct DisplayArgumentLoc<'a>(ArgumentLoc, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayArgumentLoc<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self.0 {
+            ArgumentLoc::Unassigned => write!(f, "-"),
+            ArgumentLoc::Reg(ru) => {
+                match self.1 {
+                    Some(regs) => write!(f, "{}", regs.display_regunit(ru)),
+                    None => write!(f, "%{}", ru),
+                }
+            }
+            ArgumentLoc::Stack(offset) => write!(f, "{}", offset),
+        }
+    }
+}
--- a/lib/codegen/src/isa/arm32/abi.rs
+++ b/lib/codegen/src/isa/arm32/abi.rs
@@ -0,0 +1,35 @@
+//! ARM ABI implementation.
+
+use super::registers::{D, GPR, Q, S};
+use ir;
+use isa::RegClass;
+use regalloc::RegisterSet;
+use settings as shared_settings;
+
+/// Legalize `sig`.
+pub fn legalize_signature(
+    _sig: &mut ir::Signature,
+    _flags: &shared_settings::Flags,
+    _current: bool,
+) {
+    unimplemented!()
+}
+
+/// Get register class for a type appearing in a legalized signature.
+pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
+    if ty.is_int() {
+        GPR
+    } else {
+        match ty.bits() {
+            32 => S,
+            64 => D,
+            128 => Q,
+            _ => panic!("Unexpected {} ABI type for arm32", ty),
+        }
+    }
+}
+
+/// Get the set of allocatable registers for `func`.
+pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
+    unimplemented!()
+}
--- a/lib/codegen/src/isa/arm32/binemit.rs
+++ b/lib/codegen/src/isa/arm32/binemit.rs
@@ -0,0 +1,7 @@
+//! Emitting binary ARM32 machine code.
+
+use binemit::{bad_encoding, CodeSink};
+use ir::{Function, Inst};
+use regalloc::RegDiversions;
+
+include!(concat!(env!("OUT_DIR"), "/binemit-arm32.rs"));
--- a/lib/codegen/src/isa/arm32/enc_tables.rs
+++ b/lib/codegen/src/isa/arm32/enc_tables.rs
@@ -0,0 +1,10 @@
+//! Encoding tables for ARM32 ISA.
+
+use ir;
+use isa;
+use isa::constraints::*;
+use isa::enc_tables::*;
+use isa::encoding::RecipeSizing;
+
+include!(concat!(env!("OUT_DIR"), "/encoding-arm32.rs"));
+include!(concat!(env!("OUT_DIR"), "/legalize-arm32.rs"));
--- a/lib/codegen/src/isa/arm32/mod.rs
+++ b/lib/codegen/src/isa/arm32/mod.rs
@@ -0,0 +1,118 @@
+//! ARM 32-bit Instruction Set Architecture.
+
+mod abi;
+mod binemit;
+mod enc_tables;
+mod registers;
+pub mod settings;
+
+use super::super::settings as shared_settings;
+use binemit::{emit_function, CodeSink, MemoryCodeSink};
+use ir;
+use isa::Builder as IsaBuilder;
+use isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
+use isa::{EncInfo, RegClass, RegInfo, TargetIsa};
+use regalloc;
+use std::boxed::Box;
+use std::fmt;
+
+#[allow(dead_code)]
+struct Isa {
+    shared_flags: shared_settings::Flags,
+    isa_flags: settings::Flags,
+    cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
+}
+
+/// Get an ISA builder for creating ARM32 targets.
+pub fn isa_builder() -> IsaBuilder {
+    IsaBuilder {
+        setup: settings::builder(),
+        constructor: isa_constructor,
+    }
+}
+
+fn isa_constructor(
+    shared_flags: shared_settings::Flags,
+    builder: &shared_settings::Builder,
+) -> Box<TargetIsa> {
+    let level1 = if shared_flags.is_compressed() {
+        &enc_tables::LEVEL1_T32[..]
+    } else {
+        &enc_tables::LEVEL1_A32[..]
+    };
+    Box::new(Isa {
+        isa_flags: settings::Flags::new(&shared_flags, builder),
+        shared_flags,
+        cpumode: level1,
+    })
+}
+
+impl TargetIsa for Isa {
+    fn name(&self) -> &'static str {
+        "arm32"
+    }
+
+    fn flags(&self) -> &shared_settings::Flags {
+        &self.shared_flags
+    }
+
+    fn register_info(&self) -> RegInfo {
+        registers::INFO.clone()
+    }
+
+    fn encoding_info(&self) -> EncInfo {
+        enc_tables::INFO.clone()
+    }
+
+    fn legal_encodings<'a>(
+        &'a self,
+        func: &'a ir::Function,
+        inst: &'a ir::InstructionData,
+        ctrl_typevar: ir::Type,
+    ) -> Encodings<'a> {
+        lookup_enclist(
+            ctrl_typevar,
+            inst,
+            func,
+            self.cpumode,
+            &enc_tables::LEVEL2[..],
+            &enc_tables::ENCLISTS[..],
+            &enc_tables::LEGALIZE_ACTIONS[..],
+            &enc_tables::RECIPE_PREDICATES[..],
+            &enc_tables::INST_PREDICATES[..],
+            self.isa_flags.predicate_view(),
+        )
+    }
+
+    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
+        abi::legalize_signature(sig, &self.shared_flags, current)
+    }
+
+    fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
+        abi::regclass_for_abi_type(ty)
+    }
+
+    fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
+        abi::allocatable_registers(func)
+    }
+
+    fn emit_inst(
+        &self,
+        func: &ir::Function,
+        inst: ir::Inst,
+        divert: &mut regalloc::RegDiversions,
+        sink: &mut CodeSink,
+    ) {
+        binemit::emit_inst(func, inst, divert, sink)
+    }
+
+    fn emit_function(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
+        emit_function(func, binemit::emit_inst, sink)
+    }
+}
+
+impl fmt::Display for Isa {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
+    }
+}
--- a/lib/codegen/src/isa/arm32/registers.rs
+++ b/lib/codegen/src/isa/arm32/registers.rs
@@ -0,0 +1,68 @@
+//! ARM32 register descriptions.
+
+use isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
+
+include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::{D, GPR, INFO, S};
+    use isa::RegUnit;
+    use std::string::{String, ToString};
+
+    #[test]
+    fn unit_encodings() {
+        assert_eq!(INFO.parse_regunit("s0"), Some(0));
+        assert_eq!(INFO.parse_regunit("s31"), Some(31));
+        assert_eq!(INFO.parse_regunit("s32"), Some(32));
+        assert_eq!(INFO.parse_regunit("r0"), Some(64));
+        assert_eq!(INFO.parse_regunit("r15"), Some(79));
+    }
+
+    #[test]
+    fn unit_names() {
+        fn uname(ru: RegUnit) -> String {
+            INFO.display_regunit(ru).to_string()
+        }
+
+        assert_eq!(uname(0), "%s0");
+        assert_eq!(uname(1), "%s1");
+        assert_eq!(uname(31), "%s31");
+        assert_eq!(uname(64), "%r0");
+    }
+
+    #[test]
+    fn overlaps() {
+        // arm32 has the most interesting register geometries, so test `regs_overlap()` here.
+        use isa::regs_overlap;
+
+        let r0 = GPR.unit(0);
+        let r1 = GPR.unit(1);
+        let r2 = GPR.unit(2);
+
+        assert!(regs_overlap(GPR, r0, GPR, r0));
+        assert!(regs_overlap(GPR, r2, GPR, r2));
+        assert!(!regs_overlap(GPR, r0, GPR, r1));
+        assert!(!regs_overlap(GPR, r1, GPR, r0));
+        assert!(!regs_overlap(GPR, r2, GPR, r1));
+        assert!(!regs_overlap(GPR, r1, GPR, r2));
+
+        let s0 = S.unit(0);
+        let s1 = S.unit(1);
+        let s2 = S.unit(2);
+        let s3 = S.unit(3);
+        let d0 = D.unit(0);
+        let d1 = D.unit(1);
+
+        assert!(regs_overlap(S, s0, D, d0));
+        assert!(regs_overlap(S, s1, D, d0));
+        assert!(!regs_overlap(S, s0, D, d1));
+        assert!(!regs_overlap(S, s1, D, d1));
+        assert!(regs_overlap(S, s2, D, d1));
+        assert!(regs_overlap(S, s3, D, d1));
+        assert!(!regs_overlap(D, d1, S, s1));
+        assert!(regs_overlap(D, d1, S, s2));
+        assert!(!regs_overlap(D, d0, D, d1));
+        assert!(regs_overlap(D, d1, D, d1));
+    }
+}
--- a/lib/codegen/src/isa/arm32/settings.rs
+++ b/lib/codegen/src/isa/arm32/settings.rs
@@ -0,0 +1,9 @@
+//! ARM32 Settings.
+
+use settings::{self, detail, Builder};
+use std::fmt;
+
+// Include code generated by `lib/codegen/meta/gen_settings.py`. This file contains a public
+// `Flags` struct with an impl for all of the settings defined in
+// `lib/codegen/meta/isa/arm32/settings.py`.
+include!(concat!(env!("OUT_DIR"), "/settings-arm32.rs"));
--- a/lib/codegen/src/isa/arm64/abi.rs
+++ b/lib/codegen/src/isa/arm64/abi.rs
@@ -0,0 +1,26 @@
+//! ARM 64 ABI implementation.
+
+use super::registers::{FPR, GPR};
+use ir;
+use isa::RegClass;
+use regalloc::RegisterSet;
+use settings as shared_settings;
+
+/// Legalize `sig`.
+pub fn legalize_signature(
+    _sig: &mut ir::Signature,
+    _flags: &shared_settings::Flags,
+    _current: bool,
+) {
+    unimplemented!()
+}
+
+/// Get register class for a type appearing in a legalized signature.
+pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
+    if ty.is_int() { GPR } else { FPR }
+}
+
+/// Get the set of allocatable registers for `func`.
+pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
+    unimplemented!()
+}
--- a/lib/codegen/src/isa/arm64/binemit.rs
+++ b/lib/codegen/src/isa/arm64/binemit.rs
@@ -0,0 +1,7 @@
+//! Emitting binary ARM64 machine code.
+
+use binemit::{bad_encoding, CodeSink};
+use ir::{Function, Inst};
+use regalloc::RegDiversions;
+
+include!(concat!(env!("OUT_DIR"), "/binemit-arm64.rs"));
--- a/lib/codegen/src/isa/arm64/enc_tables.rs
+++ b/lib/codegen/src/isa/arm64/enc_tables.rs
@@ -0,0 +1,10 @@
+//! Encoding tables for ARM64 ISA.
+
+use ir;
+use isa;
+use isa::constraints::*;
+use isa::enc_tables::*;
+use isa::encoding::RecipeSizing;
+
+include!(concat!(env!("OUT_DIR"), "/encoding-arm64.rs"));
+include!(concat!(env!("OUT_DIR"), "/legalize-arm64.rs"));
--- a/lib/codegen/src/isa/arm64/mod.rs
+++ b/lib/codegen/src/isa/arm64/mod.rs
@@ -0,0 +1,111 @@
+//! ARM 64-bit Instruction Set Architecture.
+
+mod abi;
+mod binemit;
+mod enc_tables;
+mod registers;
+pub mod settings;
+
+use super::super::settings as shared_settings;
+use binemit::{emit_function, CodeSink, MemoryCodeSink};
+use ir;
+use isa::Builder as IsaBuilder;
+use isa::enc_tables::{lookup_enclist, Encodings};
+use isa::{EncInfo, RegClass, RegInfo, TargetIsa};
+use regalloc;
+use std::boxed::Box;
+use std::fmt;
+
+#[allow(dead_code)]
+struct Isa {
+    shared_flags: shared_settings::Flags,
+    isa_flags: settings::Flags,
+}
+
+/// Get an ISA builder for creating ARM64 targets.
+pub fn isa_builder() -> IsaBuilder {
+    IsaBuilder {
+        setup: settings::builder(),
+        constructor: isa_constructor,
+    }
+}
+
+fn isa_constructor(
+    shared_flags: shared_settings::Flags,
+    builder: &shared_settings::Builder,
+) -> Box<TargetIsa> {
+    Box::new(Isa {
+        isa_flags: settings::Flags::new(&shared_flags, builder),
+        shared_flags,
+    })
+}
+
+impl TargetIsa for Isa {
+    fn name(&self) -> &'static str {
+        "arm64"
+    }
+
+    fn flags(&self) -> &shared_settings::Flags {
+        &self.shared_flags
+    }
+
+    fn register_info(&self) -> RegInfo {
+        registers::INFO.clone()
+    }
+
+    fn encoding_info(&self) -> EncInfo {
+        enc_tables::INFO.clone()
+    }
+
+    fn legal_encodings<'a>(
+        &'a self,
+        func: &'a ir::Function,
+        inst: &'a ir::InstructionData,
+        ctrl_typevar: ir::Type,
+    ) -> Encodings<'a> {
+        lookup_enclist(
+            ctrl_typevar,
+            inst,
+            func,
+            &enc_tables::LEVEL1_A64[..],
+            &enc_tables::LEVEL2[..],
+            &enc_tables::ENCLISTS[..],
+            &enc_tables::LEGALIZE_ACTIONS[..],
+            &enc_tables::RECIPE_PREDICATES[..],
+            &enc_tables::INST_PREDICATES[..],
+            self.isa_flags.predicate_view(),
+        )
+    }
+
+    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
+        abi::legalize_signature(sig, &self.shared_flags, current)
+    }
+
+    fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
+        abi::regclass_for_abi_type(ty)
+    }
+
+    fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
+        abi::allocatable_registers(func)
+    }
+
+    fn emit_inst(
+        &self,
+        func: &ir::Function,
+        inst: ir::Inst,
+        divert: &mut regalloc::RegDiversions,
+        sink: &mut CodeSink,
+    ) {
+        binemit::emit_inst(func, inst, divert, sink)
+    }
+
+    fn emit_function(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
+        emit_function(func, binemit::emit_inst, sink)
+    }
+}
+
+impl fmt::Display for Isa {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
+    }
+}
--- a/lib/codegen/src/isa/arm64/registers.rs
+++ b/lib/codegen/src/isa/arm64/registers.rs
@@ -0,0 +1,39 @@
+//! ARM64 register descriptions.
+
+use isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
+
+include!(concat!(env!("OUT_DIR"), "/registers-arm64.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::INFO;
+    use isa::RegUnit;
+    use std::string::{String, ToString};
+
+    #[test]
+    fn unit_encodings() {
+        assert_eq!(INFO.parse_regunit("x0"), Some(0));
+        assert_eq!(INFO.parse_regunit("x31"), Some(31));
+        assert_eq!(INFO.parse_regunit("v0"), Some(32));
+        assert_eq!(INFO.parse_regunit("v31"), Some(63));
+
+        assert_eq!(INFO.parse_regunit("x32"), None);
+        assert_eq!(INFO.parse_regunit("v32"), None);
+    }
+
+    #[test]
+    fn unit_names() {
+        fn uname(ru: RegUnit) -> String {
+            INFO.display_regunit(ru).to_string()
+        }
+
+        assert_eq!(uname(0), "%x0");
+        assert_eq!(uname(1), "%x1");
+        assert_eq!(uname(31), "%x31");
+        assert_eq!(uname(32), "%v0");
+        assert_eq!(uname(33), "%v1");
+        assert_eq!(uname(63), "%v31");
+        assert_eq!(uname(64), "%nzcv");
+        assert_eq!(uname(65), "%INVALID65");
+    }
+}
--- a/lib/codegen/src/isa/arm64/settings.rs
+++ b/lib/codegen/src/isa/arm64/settings.rs
@@ -0,0 +1,9 @@
+//! ARM64 Settings.
+
+use settings::{self, detail, Builder};
+use std::fmt;
+
+// Include code generated by `lib/codegen/meta/gen_settings.py`. This file contains a public
+// `Flags` struct with an impl for all of the settings defined in
+// `lib/codegen/meta/isa/arm64/settings.py`.
+include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));
--- a/lib/codegen/src/isa/constraints.rs
+++ b/lib/codegen/src/isa/constraints.rs
@@ -0,0 +1,209 @@
+//! Register constraints for instruction operands.
+//!
+//! An encoding recipe specifies how an instruction is encoded as binary machine code, but it only
+//! works if the operands and results satisfy certain constraints. Constraints on immediate
+//! operands are checked by instruction predicates when the recipe is chosen.
+//!
+//! It is the register allocator's job to make sure that the register constraints on value operands
+//! are satisfied.
+
+use binemit::CodeOffset;
+use ir::{Function, Inst, ValueLoc};
+use isa::{RegClass, RegUnit};
+use regalloc::RegDiversions;
+
+/// Register constraint for a single value operand or instruction result.
+#[derive(PartialEq, Debug)]
+pub struct OperandConstraint {
+    /// The kind of constraint.
+    pub kind: ConstraintKind,
+
+    /// The register class of the operand.
+    ///
+    /// This applies to all kinds of constraints, but with slightly different meaning.
+    pub regclass: RegClass,
+}
+
+impl OperandConstraint {
+    /// Check if this operand constraint is satisfied by the given value location.
+    /// For tied constraints, this only checks the register class, not that the
+    /// counterpart operand has the same value location.
+    pub fn satisfied(&self, loc: ValueLoc) -> bool {
+        match self.kind {
+            ConstraintKind::Reg |
+            ConstraintKind::Tied(_) => {
+                if let ValueLoc::Reg(reg) = loc {
+                    self.regclass.contains(reg)
+                } else {
+                    false
+                }
+            }
+            ConstraintKind::FixedReg(reg) |
+            ConstraintKind::FixedTied(reg) => {
+                loc == ValueLoc::Reg(reg) && self.regclass.contains(reg)
+            }
+            ConstraintKind::Stack => {
+                if let ValueLoc::Stack(_) = loc {
+                    true
+                } else {
+                    false
+                }
+            }
+        }
+    }
+}
+
+/// The different kinds of operand constraints.
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum ConstraintKind {
+    /// This operand or result must be a register from the given register class.
+    Reg,
+
+    /// This operand or result must be a fixed register.
+    ///
+    /// The constraint's `regclass` field is the top-level register class containing the fixed
+    /// register.
+    FixedReg(RegUnit),
+
+    /// This result value must use the same register as an input value operand.
+    ///
+    /// The associated number is the index of the input value operand this result is tied to. The
+    /// constraint's `regclass` field is the same as the tied operand's register class.
+    ///
+    /// When an (in, out) operand pair is tied, this constraint kind appears in both the `ins` and
+    /// the `outs` arrays. The constraint for the in operand is `Tied(out)`, and the constraint for
+    /// the out operand is `Tied(in)`.
+    Tied(u8),
+
+    /// This operand must be a fixed register, and it has a tied counterpart.
+    ///
+    /// This works just like `FixedReg`, but additionally indicates that there are identical
+    /// input/output operands for this fixed register. For an input operand, this means that the
+    /// value will be clobbered by the instruction
+    FixedTied(RegUnit),
+
+    /// This operand must be a value in a stack slot.
+    ///
+    /// The constraint's `regclass` field is the register class that would normally be used to load
+    /// and store values of this type.
+    Stack,
+}
+
+/// Value operand constraints for an encoding recipe.
+#[derive(PartialEq, Clone)]
+pub struct RecipeConstraints {
+    /// Constraints for the instruction's fixed value operands.
+    ///
+    /// If the instruction takes a variable number of operands, the register constraints for those
+    /// operands must be computed dynamically.
+    ///
+    /// - For branches and jumps, EBB arguments must match the expectations of the destination EBB.
+    /// - For calls and returns, the calling convention ABI specifies constraints.
+    pub ins: &'static [OperandConstraint],
+
+    /// Constraints for the instruction's fixed results.
+    ///
+    /// If the instruction produces a variable number of results, it's probably a call and the
+    /// constraints must be derived from the calling convention ABI.
+    pub outs: &'static [OperandConstraint],
+
+    /// Are any of the input constraints `FixedReg`?
+    pub fixed_ins: bool,
+
+    /// Are any of the output constraints `FixedReg`?
+    pub fixed_outs: bool,
+
+    /// Are there any tied operands?
+    pub tied_ops: bool,
+
+    /// Does this instruction clobber the CPU flags?
+    ///
+    /// When true, SSA values of type `iflags` or `fflags` can not be live across the instruction.
+    pub clobbers_flags: bool,
+}
+
+impl RecipeConstraints {
+    /// Check that these constraints are satisfied by the operands on `inst`.
+    pub fn satisfied(&self, inst: Inst, divert: &RegDiversions, func: &Function) -> bool {
+        for (&arg, constraint) in func.dfg.inst_args(inst).iter().zip(self.ins) {
+            let loc = divert.get(arg, &func.locations);
+
+            if let ConstraintKind::Tied(out_index) = constraint.kind {
+                let out_val = func.dfg.inst_results(inst)[out_index as usize];
+                let out_loc = func.locations[out_val];
+                if loc != out_loc {
+                    return false;
+                }
+            }
+
+            if !constraint.satisfied(loc) {
+                return false;
+            }
+        }
+
+        for (&arg, constraint) in func.dfg.inst_results(inst).iter().zip(self.outs) {
+            let loc = divert.get(arg, &func.locations);
+            if !constraint.satisfied(loc) {
+                return false;
+            }
+        }
+
+        true
+    }
+}
+
+/// Constraints on the range of a branch instruction.
+///
+/// A branch instruction usually encodes its destination as a signed n-bit offset from an origin.
+/// The origin depends on the ISA and the specific instruction:
+///
+/// - RISC-V and ARM Aarch64 use the address of the branch instruction, `origin = 0`.
+/// - x86 uses the address of the instruction following the branch, `origin = 2` for a 2-byte
+///   branch instruction.
+/// - ARM's A32 encoding uses the address of the branch instruction + 8 bytes, `origin = 8`.
+#[derive(Clone, Copy, Debug)]
+pub struct BranchRange {
+    /// Offset in bytes from the address of the branch instruction to the origin used for computing
+    /// the branch displacement. This is the destination of a branch that encodes a 0 displacement.
+    pub origin: u8,
+
+    /// Number of bits in the signed byte displacement encoded in the instruction. This does not
+    /// account for branches that can only target aligned addresses.
+    pub bits: u8,
+}
+
+impl BranchRange {
+    /// Determine if this branch range can represent the range from `branch` to `dest`, where
+    /// `branch` is the code offset of the branch instruction itself and `dest` is the code offset
+    /// of the destination EBB header.
+    ///
+    /// This method does not detect if the range is larger than 2 GB.
+    pub fn contains(self, branch: CodeOffset, dest: CodeOffset) -> bool {
+        let d = dest.wrapping_sub(branch + CodeOffset::from(self.origin)) as i32;
+        let s = 32 - self.bits;
+        d == d << s >> s
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn branch_range() {
+        // ARM T1 branch.
+        let t1 = BranchRange { origin: 4, bits: 9 };
+        assert!(t1.contains(0, 0));
+        assert!(t1.contains(0, 2));
+        assert!(t1.contains(2, 0));
+        assert!(t1.contains(1000, 1000));
+
+        // Forward limit.
+        assert!(t1.contains(1000, 1258));
+        assert!(!t1.contains(1000, 1260));
+
+        // Backward limit
+        assert!(t1.contains(1000, 748));
+        assert!(!t1.contains(1000, 746));
+    }
+}
--- a/lib/codegen/src/isa/enc_tables.rs
+++ b/lib/codegen/src/isa/enc_tables.rs
@@ -0,0 +1,292 @@
+//! Support types for generated encoding tables.
+//!
+//! This module contains types and functions for working with the encoding tables generated by
+//! `lib/codegen/meta/gen_encoding.py`.
+
+use constant_hash::{probe, Table};
+use ir::{Function, InstructionData, Opcode, Type};
+use isa::{Encoding, Legalize};
+use settings::PredicateView;
+use std::ops::Range;
+
+/// A recipe predicate.
+///
+/// This is a predicate function capable of testing ISA and instruction predicates simultaneously.
+///
+/// A None predicate is always satisfied.
+pub type RecipePredicate = Option<fn(PredicateView, &InstructionData) -> bool>;
+
+/// An instruction predicate.
+///
+/// This is a predicate function that needs to be tested in addition to the recipe predicate. It
+/// can't depend on ISA settings.
+pub type InstPredicate = fn(&Function, &InstructionData) -> bool;
+
+/// Legalization action to perform when no encoding can be found for an instruction.
+///
+/// This is an index into an ISA-specific table of legalization actions.
+pub type LegalizeCode = u8;
+
+/// Level 1 hash table entry.
+///
+/// One level 1 hash table is generated per CPU mode. This table is keyed by the controlling type
+/// variable, using `VOID` for non-polymorphic instructions.
+///
+/// The hash table values are references to level 2 hash tables, encoded as an offset in `LEVEL2`
+/// where the table begins, and the binary logarithm of its length. All the level 2 hash tables
+/// have a power-of-two size.
+///
+/// Entries are generic over the offset type. It will typically be `u32` or `u16`, depending on the
+/// size of the `LEVEL2` table.
+///
+/// Empty entries are encoded with a `!0` value for `log2len` which will always be out of range.
+/// Entries that have a `legalize` value but no level 2 table have an `offset` field that is out f
+/// bounds.
+pub struct Level1Entry<OffT: Into<u32> + Copy> {
+    pub ty: Type,
+    pub log2len: u8,
+    pub legalize: LegalizeCode,
+    pub offset: OffT,
+}
+
+impl<OffT: Into<u32> + Copy> Level1Entry<OffT> {
+    /// Get the level 2 table range indicated by this entry.
+    fn range(&self) -> Range<usize> {
+        let b = self.offset.into() as usize;
+        b..b + (1 << self.log2len)
+    }
+}
+
+impl<OffT: Into<u32> + Copy> Table<Type> for [Level1Entry<OffT>] {
+    fn len(&self) -> usize {
+        self.len()
+    }
+
+    fn key(&self, idx: usize) -> Option<Type> {
+        if self[idx].log2len != !0 {
+            Some(self[idx].ty)
+        } else {
+            None
+        }
+    }
+}
+
+/// Level 2 hash table entry.
+///
+/// The second level hash tables are keyed by `Opcode`, and contain an offset into the `ENCLISTS`
+/// table where the encoding recipes for the instruction are stored.
+///
+/// Entries are generic over the offset type which depends on the size of `ENCLISTS`. A `u16`
+/// offset allows the entries to be only 32 bits each. There is no benefit to dropping down to `u8`
+/// for tiny ISAs. The entries won't shrink below 32 bits since the opcode is expected to be 16
+/// bits.
+///
+/// Empty entries are encoded with a `NotAnOpcode` `opcode` field.
+pub struct Level2Entry<OffT: Into<u32> + Copy> {
+    pub opcode: Option<Opcode>,
+    pub offset: OffT,
+}
+
+impl<OffT: Into<u32> + Copy> Table<Opcode> for [Level2Entry<OffT>] {
+    fn len(&self) -> usize {
+        self.len()
+    }
+
+    fn key(&self, idx: usize) -> Option<Opcode> {
+        self[idx].opcode
+    }
+}
+
+/// Two-level hash table lookup and iterator construction.
+///
+/// Given the controlling type variable and instruction opcode, find the corresponding encoding
+/// list.
+///
+/// Returns an iterator that produces legal encodings for `inst`.
+pub fn lookup_enclist<'a, OffT1, OffT2>(
+    ctrl_typevar: Type,
+    inst: &'a InstructionData,
+    func: &'a Function,
+    level1_table: &'static [Level1Entry<OffT1>],
+    level2_table: &'static [Level2Entry<OffT2>],
+    enclist: &'static [EncListEntry],
+    legalize_actions: &'static [Legalize],
+    recipe_preds: &'static [RecipePredicate],
+    inst_preds: &'static [InstPredicate],
+    isa_preds: PredicateView<'a>,
+) -> Encodings<'a>
+where
+    OffT1: Into<u32> + Copy,
+    OffT2: Into<u32> + Copy,
+{
+    let (offset, legalize) = match probe(level1_table, ctrl_typevar, ctrl_typevar.index()) {
+        Err(l1idx) => {
+            // No level 1 entry found for the type.
+            // We have a sentinel entry with the default legalization code.
+            (!0, level1_table[l1idx].legalize)
+        }
+        Ok(l1idx) => {
+            // We have a valid level 1 entry for this type.
+            let l1ent = &level1_table[l1idx];
+            let offset = match level2_table.get(l1ent.range()) {
+                Some(l2tab) => {
+                    let opcode = inst.opcode();
+                    match probe(l2tab, opcode, opcode as usize) {
+                        Ok(l2idx) => l2tab[l2idx].offset.into() as usize,
+                        Err(_) => !0,
+                    }
+                }
+                // The l1ent range is invalid. This means that we just have a customized
+                // legalization code for this type. The level 2 table is empty.
+                None => !0,
+            };
+            (offset, l1ent.legalize)
+        }
+    };
+
+    // Now we have an offset into `enclist` that is `!0` when no encoding list could be found.
+    // The default legalization code is always valid.
+    Encodings::new(
+        offset,
+        legalize,
+        inst,
+        func,
+        enclist,
+        legalize_actions,
+        recipe_preds,
+        inst_preds,
+        isa_preds,
+    )
+}
+
+/// Encoding list entry.
+///
+/// Encoding lists are represented as sequences of u16 words.
+pub type EncListEntry = u16;
+
+/// Number of bits used to represent a predicate. c.f. `meta/gen_encoding.py`.
+const PRED_BITS: u8 = 12;
+const PRED_MASK: usize = (1 << PRED_BITS) - 1;
+/// First code word representing a predicate check. c.f. `meta/gen_encoding.py`.
+const PRED_START: usize = 0x1000;
+
+/// An iterator over legal encodings for the instruction.
+pub struct Encodings<'a> {
+    // Current offset into `enclist`, or out of bounds after we've reached the end.
+    offset: usize,
+    // Legalization code to use of no encoding is found.
+    legalize: LegalizeCode,
+    inst: &'a InstructionData,
+    func: &'a Function,
+    enclist: &'static [EncListEntry],
+    legalize_actions: &'static [Legalize],
+    recipe_preds: &'static [RecipePredicate],
+    inst_preds: &'static [InstPredicate],
+    isa_preds: PredicateView<'a>,
+}
+
+impl<'a> Encodings<'a> {
+    /// Creates a new instance of `Encodings`.
+    ///
+    /// This iterator provides search for encodings that applies to the given instruction. The
+    /// encoding lists are laid out such that first call to `next` returns valid entry in the list
+    /// or `None`.
+    pub fn new(
+        offset: usize,
+        legalize: LegalizeCode,
+        inst: &'a InstructionData,
+        func: &'a Function,
+        enclist: &'static [EncListEntry],
+        legalize_actions: &'static [Legalize],
+        recipe_preds: &'static [RecipePredicate],
+        inst_preds: &'static [InstPredicate],
+        isa_preds: PredicateView<'a>,
+    ) -> Self {
+        Encodings {
+            offset,
+            inst,
+            func,
+            legalize,
+            isa_preds,
+            recipe_preds,
+            inst_preds,
+            enclist,
+            legalize_actions,
+        }
+    }
+
+    /// Get the legalization action that caused the enumeration of encodings to stop.
+    /// This can be the default legalization action for the type or a custom code for the
+    /// instruction.
+    ///
+    /// This method must only be called after the iterator returns `None`.
+    pub fn legalize(&self) -> Legalize {
+        debug_assert_eq!(self.offset, !0, "Premature Encodings::legalize()");
+        self.legalize_actions[self.legalize as usize]
+    }
+
+    /// Check if the `rpred` recipe predicate is satisfied.
+    fn check_recipe(&self, rpred: RecipePredicate) -> bool {
+        match rpred {
+            Some(p) => p(self.isa_preds, self.inst),
+            None => true,
+        }
+    }
+
+    /// Check an instruction or isa predicate.
+    fn check_pred(&self, pred: usize) -> bool {
+        if let Some(&p) = self.inst_preds.get(pred) {
+            p(self.func, self.inst)
+        } else {
+            let pred = pred - self.inst_preds.len();
+            self.isa_preds.test(pred)
+        }
+    }
+}
+
+impl<'a> Iterator for Encodings<'a> {
+    type Item = Encoding;
+
+    fn next(&mut self) -> Option<Encoding> {
+        while let Some(entryref) = self.enclist.get(self.offset) {
+            let entry = *entryref as usize;
+
+            // Check for "recipe+bits".
+            let recipe = entry >> 1;
+            if let Some(&rpred) = self.recipe_preds.get(recipe) {
+                let bits = self.offset + 1;
+                if entry & 1 == 0 {
+                    self.offset += 2; // Next entry.
+                } else {
+                    self.offset = !0; // Stop.
+                }
+                if self.check_recipe(rpred) {
+                    return Some(Encoding::new(recipe as u16, self.enclist[bits]));
+                }
+                continue;
+            }
+
+            // Check for "stop with legalize".
+            if entry < PRED_START {
+                self.legalize = (entry - 2 * self.recipe_preds.len()) as LegalizeCode;
+                self.offset = !0; // Stop.
+                return None;
+            }
+
+            // Finally, this must be a predicate entry.
+            let pred_entry = entry - PRED_START;
+            let skip = pred_entry >> PRED_BITS;
+            let pred = pred_entry & PRED_MASK;
+
+            if self.check_pred(pred) {
+                self.offset += 1;
+            } else if skip == 0 {
+                self.offset = !0; // Stop.
+                return None;
+            } else {
+                self.offset += 1 + skip;
+            }
+        }
+        None
+    }
+}
--- a/lib/codegen/src/isa/encoding.rs
+++ b/lib/codegen/src/isa/encoding.rs
@@ -0,0 +1,137 @@
+//! The `Encoding` struct.
+
+use binemit::CodeOffset;
+use isa::constraints::{BranchRange, RecipeConstraints};
+use std::fmt;
+
+/// Bits needed to encode an instruction as binary machine code.
+///
+/// The encoding consists of two parts, both specific to the target ISA: An encoding *recipe*, and
+/// encoding *bits*. The recipe determines the native instruction format and the mapping of
+/// operands to encoded bits. The encoding bits provide additional information to the recipe,
+/// typically parts of the opcode.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct Encoding {
+    recipe: u16,
+    bits: u16,
+}
+
+impl Encoding {
+    /// Create a new `Encoding` containing `(recipe, bits)`.
+    pub fn new(recipe: u16, bits: u16) -> Encoding {
+        Encoding { recipe, bits }
+    }
+
+    /// Get the recipe number in this encoding.
+    pub fn recipe(self) -> usize {
+        self.recipe as usize
+    }
+
+    /// Get the recipe-specific encoding bits.
+    pub fn bits(self) -> u16 {
+        self.bits
+    }
+
+    /// Is this a legal encoding, or the default placeholder?
+    pub fn is_legal(self) -> bool {
+        self != Self::default()
+    }
+}
+
+/// The default encoding is the illegal one.
+impl Default for Encoding {
+    fn default() -> Self {
+        Self::new(0xffff, 0xffff)
+    }
+}
+
+/// ISA-independent display of an encoding.
+impl fmt::Display for Encoding {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.is_legal() {
+            write!(f, "{}#{:02x}", self.recipe, self.bits)
+        } else {
+            write!(f, "-")
+        }
+    }
+}
+
+/// Temporary object that holds enough context to properly display an encoding.
+/// This is meant to be created by `EncInfo::display()`.
+pub struct DisplayEncoding {
+    pub encoding: Encoding,
+    pub recipe_names: &'static [&'static str],
+}
+
+impl fmt::Display for DisplayEncoding {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.encoding.is_legal() {
+            write!(
+                f,
+                "{}#{:02x}",
+                self.recipe_names[self.encoding.recipe()],
+                self.encoding.bits
+            )
+        } else {
+            write!(f, "-")
+        }
+    }
+}
+
+/// Code size information for an encoding recipe.
+///
+/// All encoding recipes correspond to an exact instruction size.
+pub struct RecipeSizing {
+    /// Size in bytes of instructions encoded with this recipe.
+    pub bytes: u8,
+
+    /// Allowed branch range in this recipe, if any.
+    ///
+    /// All encoding recipes for branches have exact branch range information.
+    pub branch_range: Option<BranchRange>,
+}
+
+/// Information about all the encodings in this ISA.
+#[derive(Clone)]
+pub struct EncInfo {
+    /// Constraints on value operands per recipe.
+    pub constraints: &'static [RecipeConstraints],
+
+    /// Code size information per recipe.
+    pub sizing: &'static [RecipeSizing],
+
+    /// Names of encoding recipes.
+    pub names: &'static [&'static str],
+}
+
+impl EncInfo {
+    /// Get the value operand constraints for `enc` if it is a legal encoding.
+    pub fn operand_constraints(&self, enc: Encoding) -> Option<&'static RecipeConstraints> {
+        self.constraints.get(enc.recipe())
+    }
+
+    /// Create an object that can display an ISA-dependent encoding properly.
+    pub fn display(&self, enc: Encoding) -> DisplayEncoding {
+        DisplayEncoding {
+            encoding: enc,
+            recipe_names: self.names,
+        }
+    }
+
+    /// Get the exact size in bytes of instructions encoded with `enc`.
+    ///
+    /// Returns 0 for illegal encodings.
+    pub fn bytes(&self, enc: Encoding) -> CodeOffset {
+        self.sizing
+            .get(enc.recipe())
+            .map(|s| CodeOffset::from(s.bytes))
+            .unwrap_or(0)
+    }
+
+    /// Get the branch range that is supported by `enc`, if any.
+    ///
+    /// This will never return `None` for a legal branch encoding.
+    pub fn branch_range(&self, enc: Encoding) -> Option<BranchRange> {
+        self.sizing.get(enc.recipe()).and_then(|s| s.branch_range)
+    }
+}
--- a/lib/codegen/src/isa/mod.rs
+++ b/lib/codegen/src/isa/mod.rs
@@ -0,0 +1,282 @@
+//! Instruction Set Architectures.
+//!
+//! The `isa` module provides a `TargetIsa` trait which provides the behavior specialization needed
+//! by the ISA-independent code generator. The sub-modules of this module provide definitions for
+//! the instruction sets that Cretonne can target. Each sub-module has it's own implementation of
+//! `TargetIsa`.
+//!
+//! # Constructing a `TargetIsa` instance
+//!
+//! The target ISA is built from the following information:
+//!
+//! - The name of the target ISA as a string. Cretonne is a cross-compiler, so the ISA to target
+//!   can be selected dynamically. Individual ISAs can be left out when Cretonne is compiled, so a
+//!   string is used to identify the proper sub-module.
+//! - Values for settings that apply to all ISAs. This is represented by a `settings::Flags`
+//!   instance.
+//! - Values for ISA-specific settings.
+//!
+//! The `isa::lookup()` function is the main entry point which returns an `isa::Builder`
+//! appropriate for the requested ISA:
+//!
+//! ```
+//! use cretonne_codegen::settings::{self, Configurable};
+//! use cretonne_codegen::isa;
+//!
+//! let shared_builder = settings::builder();
+//! let shared_flags = settings::Flags::new(&shared_builder);
+//!
+//! match isa::lookup("riscv") {
+//!     Err(_) => {
+//!         // The RISC-V target ISA is not available.
+//!     }
+//!     Ok(mut isa_builder) => {
+//!         isa_builder.set("supports_m", "on");
+//!         let isa = isa_builder.finish(shared_flags);
+//!     }
+//! }
+//! ```
+//!
+//! The configured target ISA trait object is a `Box<TargetIsa>` which can be used for multiple
+//! concurrent function compilations.
+
+pub use isa::constraints::{BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints};
+pub use isa::encoding::{EncInfo, Encoding};
+pub use isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit};
+pub use isa::stack::{StackBase, StackBaseMask, StackRef};
+
+use binemit;
+use flowgraph;
+use ir;
+use isa::enc_tables::Encodings;
+use regalloc;
+use result;
+use settings;
+use std::boxed::Box;
+use std::fmt;
+use timing;
+
+#[cfg(build_riscv)]
+mod riscv;
+
+#[cfg(build_x86)]
+mod x86;
+
+#[cfg(build_arm32)]
+mod arm32;
+
+#[cfg(build_arm64)]
+mod arm64;
+
+mod constraints;
+mod enc_tables;
+mod encoding;
+pub mod registers;
+mod stack;
+
+/// Returns a builder that can create a corresponding `TargetIsa`
+/// or `Err(LookupError::Unsupported)` if not enabled.
+macro_rules! isa_builder {
+    ($module:ident, $name:ident) => {{
+        #[cfg($name)]
+        fn $name() -> Result<Builder, LookupError> {
+            Ok($module::isa_builder())
+        };
+        #[cfg(not($name))]
+        fn $name() -> Result<Builder, LookupError> {
+            Err(LookupError::Unsupported)
+        }
+        $name()
+    }};
+}
+
+/// Look for a supported ISA with the given `name`.
+/// Return a builder that can create a corresponding `TargetIsa`.
+pub fn lookup(name: &str) -> Result<Builder, LookupError> {
+    match name {
+        "riscv" => isa_builder!(riscv, build_riscv),
+        "x86" => isa_builder!(x86, build_x86),
+        "arm32" => isa_builder!(arm32, build_arm32),
+        "arm64" => isa_builder!(arm64, build_arm64),
+        _ => Err(LookupError::Unknown),
+    }
+}
+
+/// Describes reason for target lookup failure
+#[derive(PartialEq, Eq, Copy, Clone, Debug)]
+pub enum LookupError {
+    /// Unknown Target
+    Unknown,
+
+    /// Target known but not built and thus not supported
+    Unsupported,
+}
+
+/// Builder for a `TargetIsa`.
+/// Modify the ISA-specific settings before creating the `TargetIsa` trait object with `finish`.
+pub struct Builder {
+    setup: settings::Builder,
+    constructor: fn(settings::Flags, &settings::Builder) -> Box<TargetIsa>,
+}
+
+impl Builder {
+    /// Combine the ISA-specific settings with the provided ISA-independent settings and allocate a
+    /// fully configured `TargetIsa` trait object.
+    pub fn finish(self, shared_flags: settings::Flags) -> Box<TargetIsa> {
+        (self.constructor)(shared_flags, &self.setup)
+    }
+}
+
+impl settings::Configurable for Builder {
+    fn set(&mut self, name: &str, value: &str) -> settings::Result<()> {
+        self.setup.set(name, value)
+    }
+
+    fn enable(&mut self, name: &str) -> settings::Result<()> {
+        self.setup.enable(name)
+    }
+}
+
+/// After determining that an instruction doesn't have an encoding, how should we proceed to
+/// legalize it?
+///
+/// The `Encodings` iterator returns a legalization function to call.
+pub type Legalize = fn(ir::Inst,
+                       &mut ir::Function,
+                       &mut flowgraph::ControlFlowGraph,
+                       &TargetIsa)
+                       -> bool;
+
+/// Methods that are specialized to a target ISA. Implies a Display trait that shows the
+/// shared flags, as well as any isa-specific flags.
+pub trait TargetIsa: fmt::Display {
+    /// Get the name of this ISA.
+    fn name(&self) -> &'static str;
+
+    /// Get the ISA-independent flags that were used to make this trait object.
+    fn flags(&self) -> &settings::Flags;
+
+    /// Does the CPU implement scalar comparisons using a CPU flags register?
+    fn uses_cpu_flags(&self) -> bool {
+        false
+    }
+
+    /// Get a data structure describing the registers in this ISA.
+    fn register_info(&self) -> RegInfo;
+
+    /// Returns an iterartor over legal encodings for the instruction.
+    fn legal_encodings<'a>(
+        &'a self,
+        func: &'a ir::Function,
+        inst: &'a ir::InstructionData,
+        ctrl_typevar: ir::Type,
+    ) -> Encodings<'a>;
+
+    /// Encode an instruction after determining it is legal.
+    ///
+    /// If `inst` can legally be encoded in this ISA, produce the corresponding `Encoding` object.
+    /// Otherwise, return `Legalize` action.
+    ///
+    /// This is also the main entry point for determining if an instruction is legal.
+    fn encode(
+        &self,
+        func: &ir::Function,
+        inst: &ir::InstructionData,
+        ctrl_typevar: ir::Type,
+    ) -> Result<Encoding, Legalize> {
+        let mut iter = self.legal_encodings(func, inst, ctrl_typevar);
+        iter.next().ok_or_else(|| iter.legalize())
+    }
+
+    /// Get a data structure describing the instruction encodings in this ISA.
+    fn encoding_info(&self) -> EncInfo;
+
+    /// Legalize a function signature.
+    ///
+    /// This is used to legalize both the signature of the function being compiled and any called
+    /// functions. The signature should be modified by adding `ArgumentLoc` annotations to all
+    /// arguments and return values.
+    ///
+    /// Arguments with types that are not supported by the ABI can be expanded into multiple
+    /// arguments:
+    ///
+    /// - Integer types that are too large to fit in a register can be broken into multiple
+    ///   arguments of a smaller integer type.
+    /// - Floating point types can be bit-cast to an integer type of the same size, and possible
+    ///   broken into smaller integer types.
+    /// - Vector types can be bit-cast and broken down into smaller vectors or scalars.
+    ///
+    /// The legalizer will adapt argument and return values as necessary at all ABI boundaries.
+    ///
+    /// When this function is called to legalize the signature of the function currently begin
+    /// compiler, `current` is true. The legalized signature can then also contain special purpose
+    /// arguments and return values such as:
+    ///
+    /// - A `link` argument representing the link registers on RISC architectures that don't push
+    ///   the return address on the stack.
+    /// - A `link` return value which will receive the value that was passed to the `link`
+    ///   argument.
+    /// - An `sret` argument can be added if one wasn't present already. This is necessary if the
+    ///   signature returns more values than registers are available for returning values.
+    /// - An `sret` return value can be added if the ABI requires a function to return its `sret`
+    ///   argument in a register.
+    ///
+    /// Arguments and return values for the caller's frame pointer and other callee-saved registers
+    /// should not be added by this function. These arguments are not added until after register
+    /// allocation.
+    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool);
+
+    /// Get the register class that should be used to represent an ABI argument or return value of
+    /// type `ty`. This should be the top-level register class that contains the argument
+    /// registers.
+    ///
+    /// This function can assume that it will only be asked to provide register classes for types
+    /// that `legalize_signature()` produces in `ArgumentLoc::Reg` entries.
+    fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass;
+
+    /// Get the set of allocatable registers that can be used when compiling `func`.
+    ///
+    /// This set excludes reserved registers like the stack pointer and other special-purpose
+    /// registers.
+    fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet;
+
+    /// Compute the stack layout and insert prologue and epilogue code into `func`.
+    ///
+    /// Return an error if the stack frame is too large.
+    fn prologue_epilogue(&self, func: &mut ir::Function) -> result::CtonResult {
+        let _tt = timing::prologue_epilogue();
+        // This default implementation is unlikely to be good enough.
+        use ir::stackslot::{StackOffset, StackSize};
+        use stack_layout::layout_stack;
+
+        let word_size = if self.flags().is_64bit() { 8 } else { 4 };
+
+        // Account for the SpiderMonkey standard prologue pushes.
+        if func.signature.call_conv == ir::CallConv::SpiderWASM {
+            let bytes = StackSize::from(self.flags().spiderwasm_prologue_words()) * word_size;
+            let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
+            ss.offset = Some(-(bytes as StackOffset));
+            func.stack_slots.push(ss);
+        }
+
+        layout_stack(&mut func.stack_slots, word_size)?;
+        Ok(())
+    }
+
+    /// Emit binary machine code for a single instruction into the `sink` trait object.
+    ///
+    /// Note that this will call `put*` methods on the trait object via its vtable which is not the
+    /// fastest way of emitting code.
+    fn emit_inst(
+        &self,
+        func: &ir::Function,
+        inst: ir::Inst,
+        divert: &mut regalloc::RegDiversions,
+        sink: &mut binemit::CodeSink,
+    );
+
+    /// Emit a whole function into memory.
+    ///
+    /// This is more performant than calling `emit_inst` for each instruction.
+    fn emit_function(&self, func: &ir::Function, sink: &mut binemit::MemoryCodeSink);
+}
--- a/lib/codegen/src/isa/registers.rs
+++ b/lib/codegen/src/isa/registers.rs
@@ -0,0 +1,322 @@
+//! Data structures describing the registers in an ISA.
+
+use entity::EntityRef;
+use std::fmt;
+
+/// Register units are the smallest units of register allocation.
+///
+/// Normally there is a 1-1 correspondence between registers and register units, but when an ISA
+/// has aliasing registers, the aliasing can be modeled with registers that cover multiple
+/// register units.
+///
+/// The register allocator will enforce that each register unit only gets used for one thing.
+pub type RegUnit = u16;
+
+/// A bit mask indexed by register units.
+///
+/// The size of this type is determined by the target ISA that has the most register units defined.
+/// Currently that is arm32 which has 64+16 units.
+///
+/// This type should be coordinated with meta/cdsl/registers.py.
+pub type RegUnitMask = [u32; 3];
+
+/// A bit mask indexed by register classes.
+///
+/// The size of this type is determined by the ISA with the most register classes.
+///
+/// This type should be coordinated with meta/cdsl/isa.py.
+pub type RegClassMask = u32;
+
+/// Guaranteed maximum number of top-level register classes with pressure tracking in any ISA.
+///
+/// This can be increased, but should be coordinated with meta/cdsl/isa.py.
+pub const MAX_TRACKED_TOPRCS: usize = 4;
+
+/// The register units in a target ISA are divided into disjoint register banks. Each bank covers a
+/// contiguous range of register units.
+///
+/// The `RegBank` struct provides a static description of a register bank.
+pub struct RegBank {
+    /// The name of this register bank as defined in the ISA's `registers.py` file.
+    pub name: &'static str,
+
+    /// The first register unit in this bank.
+    pub first_unit: RegUnit,
+
+    /// The total number of register units in this bank.
+    pub units: RegUnit,
+
+    /// Array of specially named register units. This array can be shorter than the number of units
+    /// in the bank.
+    pub names: &'static [&'static str],
+
+    /// Name prefix to use for those register units in the bank not covered by the `names` array.
+    /// The remaining register units will be named this prefix followed by their decimal offset in
+    /// the bank. So with a prefix `r`, registers will be named `r8`, `r9`, ...
+    pub prefix: &'static str,
+
+    /// Index of the first top-level register class in this bank.
+    pub first_toprc: usize,
+
+    /// Number of top-level register classes in this bank.
+    ///
+    /// The top-level register classes in a bank are guaranteed to be numbered sequentially from
+    /// `first_toprc`, and all top-level register classes across banks come before any sub-classes.
+    pub num_toprcs: usize,
+
+    /// Is register pressure tracking enabled for this bank?
+    pub pressure_tracking: bool,
+}
+
+impl RegBank {
+    /// Does this bank contain `regunit`?
+    fn contains(&self, regunit: RegUnit) -> bool {
+        regunit >= self.first_unit && regunit - self.first_unit < self.units
+    }
+
+    /// Try to parse a regunit name. The name is not expected to begin with `%`.
+    fn parse_regunit(&self, name: &str) -> Option<RegUnit> {
+        match self.names.iter().position(|&x| x == name) {
+            Some(offset) => {
+                // This is one of the special-cased names.
+                Some(offset as RegUnit)
+            }
+            None => {
+                // Try a regular prefixed name.
+                if name.starts_with(self.prefix) {
+                    name[self.prefix.len()..].parse().ok()
+                } else {
+                    None
+                }
+            }
+        }.and_then(|offset| if offset < self.units {
+            Some(offset + self.first_unit)
+        } else {
+            None
+        })
+    }
+
+    /// Write `regunit` to `w`, assuming that it belongs to this bank.
+    /// All regunits are written with a `%` prefix.
+    fn write_regunit(&self, f: &mut fmt::Formatter, regunit: RegUnit) -> fmt::Result {
+        let offset = regunit - self.first_unit;
+        assert!(offset < self.units);
+        if (offset as usize) < self.names.len() {
+            write!(f, "%{}", self.names[offset as usize])
+        } else {
+            write!(f, "%{}{}", self.prefix, offset)
+        }
+    }
+}
+
+/// A register class reference.
+///
+/// All register classes are statically defined in tables generated from the meta descriptions.
+pub type RegClass = &'static RegClassData;
+
+/// Data about a register class.
+///
+/// A register class represents a subset of the registers in a bank. It describes the set of
+/// permitted registers for a register operand in a given encoding of an instruction.
+///
+/// A register class can be a subset of another register class. The top-level register classes are
+/// disjoint.
+pub struct RegClassData {
+    /// The name of the register class.
+    pub name: &'static str,
+
+    /// The index of this class in the ISA's RegInfo description.
+    pub index: u8,
+
+    /// How many register units to allocate per register.
+    pub width: u8,
+
+    /// Index of the register bank this class belongs to.
+    pub bank: u8,
+
+    /// Index of the top-level register class contains this one.
+    pub toprc: u8,
+
+    /// The first register unit in this class.
+    pub first: RegUnit,
+
+    /// Bit-mask of sub-classes of this register class, including itself.
+    ///
+    /// Bits correspond to RC indexes.
+    pub subclasses: RegClassMask,
+
+    /// Mask of register units in the class. If `width > 1`, the mask only has a bit set for the
+    /// first register unit in each allocatable register.
+    pub mask: RegUnitMask,
+
+    /// The global `RegInfo` instance containing that this register class.
+    pub info: &'static RegInfo,
+}
+
+impl RegClassData {
+    /// Get the register class index corresponding to the intersection of `self` and `other`.
+    ///
+    /// This register class is guaranteed to exist if the register classes overlap. If the register
+    /// classes don't overlap, returns `None`.
+    pub fn intersect_index(&self, other: RegClass) -> Option<RegClassIndex> {
+        // Compute the set of common subclasses.
+        let mask = self.subclasses & other.subclasses;
+
+        if mask == 0 {
+            // No overlap.
+            None
+        } else {
+            // Register class indexes are topologically ordered, so the largest common subclass has
+            // the smallest index.
+            Some(RegClassIndex(mask.trailing_zeros() as u8))
+        }
+    }
+
+    /// Get the intersection of `self` and `other`.
+    pub fn intersect(&self, other: RegClass) -> Option<RegClass> {
+        self.intersect_index(other).map(|rci| self.info.rc(rci))
+    }
+
+    /// Returns true if `other` is a subclass of this register class.
+    /// A register class is considered to be a subclass of itself.
+    pub fn has_subclass<RCI: Into<RegClassIndex>>(&self, other: RCI) -> bool {
+        self.subclasses & (1 << other.into().0) != 0
+    }
+
+    /// Get the top-level register class containing this class.
+    pub fn toprc(&self) -> RegClass {
+        self.info.rc(RegClassIndex(self.toprc))
+    }
+
+    /// Get a specific register unit in this class.
+    pub fn unit(&self, offset: usize) -> RegUnit {
+        let uoffset = offset * usize::from(self.width);
+        self.first + uoffset as RegUnit
+    }
+
+    /// Does this register class contain `regunit`?
+    pub fn contains(&self, regunit: RegUnit) -> bool {
+        self.mask[(regunit / 32) as usize] & (1u32 << (regunit % 32)) != 0
+    }
+}
+
+impl fmt::Display for RegClassData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(self.name)
+    }
+}
+
+impl fmt::Debug for RegClassData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(self.name)
+    }
+}
+
+/// Within an ISA, register classes are uniquely identified by their index.
+impl PartialEq for RegClassData {
+    fn eq(&self, other: &Self) -> bool {
+        self.index == other.index
+    }
+}
+
+/// A small reference to a register class.
+///
+/// Use this when storing register classes in compact data structures. The `RegInfo::rc()` method
+/// can be used to get the real register class reference back.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct RegClassIndex(u8);
+
+impl EntityRef for RegClassIndex {
+    fn new(idx: usize) -> Self {
+        RegClassIndex(idx as u8)
+    }
+
+    fn index(self) -> usize {
+        usize::from(self.0)
+    }
+}
+
+impl From<RegClass> for RegClassIndex {
+    fn from(rc: RegClass) -> Self {
+        RegClassIndex(rc.index)
+    }
+}
+
+impl fmt::Display for RegClassIndex {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "rci{}", self.0)
+    }
+}
+
+/// Test of two registers overlap.
+///
+/// A register is identified as a `(RegClass, RegUnit)` pair. The register class is needed to
+/// determine the width (in regunits) of the register.
+pub fn regs_overlap(rc1: RegClass, reg1: RegUnit, rc2: RegClass, reg2: RegUnit) -> bool {
+    let end1 = reg1 + RegUnit::from(rc1.width);
+    let end2 = reg2 + RegUnit::from(rc2.width);
+    !(end1 <= reg2 || end2 <= reg1)
+}
+
+/// Information about the registers in an ISA.
+///
+/// The `RegUnit` data structure collects all relevant static information about the registers in an
+/// ISA.
+#[derive(Clone)]
+pub struct RegInfo {
+    /// All register banks, ordered by their `first_unit`. The register banks are disjoint, but
+    /// there may be holes of unused register unit numbers between banks due to alignment.
+    pub banks: &'static [RegBank],
+
+    /// All register classes ordered topologically so a sub-class always follows its parent.
+    pub classes: &'static [RegClass],
+}
+
+impl RegInfo {
+    /// Get the register bank holding `regunit`.
+    pub fn bank_containing_regunit(&self, regunit: RegUnit) -> Option<&RegBank> {
+        // We could do a binary search, but most ISAs have only two register banks...
+        self.banks.iter().find(|b| b.contains(regunit))
+    }
+
+    /// Try to parse a regunit name. The name is not expected to begin with `%`.
+    pub fn parse_regunit(&self, name: &str) -> Option<RegUnit> {
+        self.banks
+            .iter()
+            .filter_map(|b| b.parse_regunit(name))
+            .next()
+    }
+
+    /// Make a temporary object that can display a register unit.
+    pub fn display_regunit(&self, regunit: RegUnit) -> DisplayRegUnit {
+        DisplayRegUnit {
+            regunit,
+            reginfo: self,
+        }
+    }
+
+    /// Get the register class corresponding to `idx`.
+    pub fn rc(&self, idx: RegClassIndex) -> RegClass {
+        self.classes[idx.index()]
+    }
+
+    /// Get the top-level register class containing the `idx` class.
+    pub fn toprc(&self, idx: RegClassIndex) -> RegClass {
+        self.classes[self.rc(idx).toprc as usize]
+    }
+}
+
+/// Temporary object that holds enough information to print a register unit.
+pub struct DisplayRegUnit<'a> {
+    regunit: RegUnit,
+    reginfo: &'a RegInfo,
+}
+
+impl<'a> fmt::Display for DisplayRegUnit<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self.reginfo.bank_containing_regunit(self.regunit) {
+            Some(b) => b.write_regunit(f, self.regunit),
+            None => write!(f, "%INVALID{}", self.regunit),
+        }
+    }
+}
--- a/lib/codegen/src/isa/riscv/abi.rs
+++ b/lib/codegen/src/isa/riscv/abi.rs
@@ -0,0 +1,140 @@
+//! RISC-V ABI implementation.
+//!
+//! This module implements the RISC-V calling convention through the primary `legalize_signature()`
+//! entry point.
+//!
+//! This doesn't support the soft-float ABI at the moment.
+
+use super::registers::{FPR, GPR};
+use super::settings;
+use abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
+use ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, Type};
+use isa::RegClass;
+use regalloc::RegisterSet;
+use settings as shared_settings;
+use std::i32;
+
+struct Args {
+    pointer_bits: u16,
+    pointer_bytes: u32,
+    pointer_type: Type,
+    regs: u32,
+    reg_limit: u32,
+    offset: u32,
+}
+
+impl Args {
+    fn new(bits: u16, enable_e: bool) -> Args {
+        Args {
+            pointer_bits: bits,
+            pointer_bytes: u32::from(bits) / 8,
+            pointer_type: Type::int(bits).unwrap(),
+            regs: 0,
+            reg_limit: if enable_e { 6 } else { 8 },
+            offset: 0,
+        }
+    }
+}
+
+impl ArgAssigner for Args {
+    fn assign(&mut self, arg: &AbiParam) -> ArgAction {
+        fn align(value: u32, to: u32) -> u32 {
+            (value + to - 1) & !(to - 1)
+        }
+
+        let ty = arg.value_type;
+
+        // Check for a legal type.
+        // RISC-V doesn't have SIMD at all, so break all vectors down.
+        if ty.is_vector() {
+            return ValueConversion::VectorSplit.into();
+        }
+
+        // Large integers and booleans are broken down to fit in a register.
+        if !ty.is_float() && ty.bits() > self.pointer_bits {
+            // Align registers and stack to a multiple of two pointers.
+            self.regs = align(self.regs, 2);
+            self.offset = align(self.offset, 2 * self.pointer_bytes);
+            return ValueConversion::IntSplit.into();
+        }
+
+        // Small integers are extended to the size of a pointer register.
+        if ty.is_int() && ty.bits() < self.pointer_bits {
+            match arg.extension {
+                ArgumentExtension::None => {}
+                ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
+                ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(),
+            }
+        }
+
+        if self.regs < self.reg_limit {
+            // Assign to a register.
+            let reg = if ty.is_float() {
+                FPR.unit(10 + self.regs as usize)
+            } else {
+                GPR.unit(10 + self.regs as usize)
+            };
+            self.regs += 1;
+            ArgumentLoc::Reg(reg).into()
+        } else {
+            // Assign a stack location.
+            let loc = ArgumentLoc::Stack(self.offset as i32);
+            self.offset += self.pointer_bytes;
+            debug_assert!(self.offset <= i32::MAX as u32);
+            loc.into()
+        }
+    }
+}
+
+/// Legalize `sig` for RISC-V.
+pub fn legalize_signature(
+    sig: &mut ir::Signature,
+    flags: &shared_settings::Flags,
+    isa_flags: &settings::Flags,
+    current: bool,
+) {
+    let bits = if flags.is_64bit() { 64 } else { 32 };
+
+    let mut args = Args::new(bits, isa_flags.enable_e());
+    legalize_args(&mut sig.params, &mut args);
+
+    let mut rets = Args::new(bits, isa_flags.enable_e());
+    legalize_args(&mut sig.returns, &mut rets);
+
+    if current {
+        let ptr = Type::int(bits).unwrap();
+
+        // Add the link register as an argument and return value.
+        //
+        // The `jalr` instruction implementing a return can technically accept the return address
+        // in any register, but a micro-architecture with a return address predictor will only
+        // recognize it as a return if the address is in `x1`.
+        let link = AbiParam::special_reg(ptr, ArgumentPurpose::Link, GPR.unit(1));
+        sig.params.push(link);
+        sig.returns.push(link);
+    }
+}
+
+/// Get register class for a type appearing in a legalized signature.
+pub fn regclass_for_abi_type(ty: Type) -> RegClass {
+    if ty.is_float() { FPR } else { GPR }
+}
+
+pub fn allocatable_registers(_func: &ir::Function, isa_flags: &settings::Flags) -> RegisterSet {
+    let mut regs = RegisterSet::new();
+    regs.take(GPR, GPR.unit(0)); // Hard-wired 0.
+    // %x1 is the link register which is available for allocation.
+    regs.take(GPR, GPR.unit(2)); // Stack pointer.
+    regs.take(GPR, GPR.unit(3)); // Global pointer.
+    regs.take(GPR, GPR.unit(4)); // Thread pointer.
+    // TODO: %x8 is the frame pointer. Reserve it?
+
+    // Remove %x16 and up for RV32E.
+    if isa_flags.enable_e() {
+        for u in 16..32 {
+            regs.take(GPR, GPR.unit(u));
+        }
+    }
+
+    regs
+}
--- a/lib/codegen/src/isa/riscv/binemit.rs
+++ b/lib/codegen/src/isa/riscv/binemit.rs
@@ -0,0 +1,182 @@
+//! Emitting binary RISC-V machine code.
+
+use binemit::{bad_encoding, CodeSink, Reloc};
+use ir::{Function, Inst, InstructionData};
+use isa::{RegUnit, StackBaseMask, StackRef};
+use predicates::is_signed_int;
+use regalloc::RegDiversions;
+use std::u32;
+
+include!(concat!(env!("OUT_DIR"), "/binemit-riscv.rs"));
+
+/// R-type instructions.
+///
+///   31     24  19  14     11 6
+///   funct7 rs2 rs1 funct3 rd opcode
+///       25  20  15     12  7      0
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`.
+fn put_r<CS: CodeSink + ?Sized>(bits: u16, rs1: RegUnit, rs2: RegUnit, rd: RegUnit, sink: &mut CS) {
+    let bits = u32::from(bits);
+    let opcode5 = bits & 0x1f;
+    let funct3 = (bits >> 5) & 0x7;
+    let funct7 = (bits >> 8) & 0x7f;
+    let rs1 = u32::from(rs1) & 0x1f;
+    let rs2 = u32::from(rs2) & 0x1f;
+    let rd = u32::from(rd) & 0x1f;
+
+    // 0-6: opcode
+    let mut i = 0x3;
+    i |= opcode5 << 2;
+    i |= rd << 7;
+    i |= funct3 << 12;
+    i |= rs1 << 15;
+    i |= rs2 << 20;
+    i |= funct7 << 25;
+
+    sink.put4(i);
+}
+
+/// R-type instructions with a shift amount instead of rs2.
+///
+///   31     25    19  14     11 6
+///   funct7 shamt rs1 funct3 rd opcode
+///       25    20  15     12  7      0
+///
+/// Both funct7 and shamt contribute to bit 25. In RV64, shamt uses it for shifts > 31.
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`.
+fn put_rshamt<CS: CodeSink + ?Sized>(
+    bits: u16,
+    rs1: RegUnit,
+    shamt: i64,
+    rd: RegUnit,
+    sink: &mut CS,
+) {
+    let bits = u32::from(bits);
+    let opcode5 = bits & 0x1f;
+    let funct3 = (bits >> 5) & 0x7;
+    let funct7 = (bits >> 8) & 0x7f;
+    let rs1 = u32::from(rs1) & 0x1f;
+    let shamt = shamt as u32 & 0x3f;
+    let rd = u32::from(rd) & 0x1f;
+
+    // 0-6: opcode
+    let mut i = 0x3;
+    i |= opcode5 << 2;
+    i |= rd << 7;
+    i |= funct3 << 12;
+    i |= rs1 << 15;
+    i |= shamt << 20;
+    i |= funct7 << 25;
+
+    sink.put4(i);
+}
+
+/// I-type instructions.
+///
+///   31  19  14     11 6
+///   imm rs1 funct3 rd opcode
+///    20  15     12  7      0
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
+fn put_i<CS: CodeSink + ?Sized>(bits: u16, rs1: RegUnit, imm: i64, rd: RegUnit, sink: &mut CS) {
+    let bits = u32::from(bits);
+    let opcode5 = bits & 0x1f;
+    let funct3 = (bits >> 5) & 0x7;
+    let rs1 = u32::from(rs1) & 0x1f;
+    let rd = u32::from(rd) & 0x1f;
+
+    // 0-6: opcode
+    let mut i = 0x3;
+    i |= opcode5 << 2;
+    i |= rd << 7;
+    i |= funct3 << 12;
+    i |= rs1 << 15;
+    i |= (imm << 20) as u32;
+
+    sink.put4(i);
+}
+
+/// U-type instructions.
+///
+///   31  11 6
+///   imm rd opcode
+///    12  7      0
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
+fn put_u<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) {
+    let bits = u32::from(bits);
+    let opcode5 = bits & 0x1f;
+    let rd = u32::from(rd) & 0x1f;
+
+    // 0-6: opcode
+    let mut i = 0x3;
+    i |= opcode5 << 2;
+    i |= rd << 7;
+    i |= imm as u32 & 0xfffff000;
+
+    sink.put4(i);
+}
+
+/// SB-type branch instructions.
+///
+///   31  24  19  14     11  6
+///   imm rs2 rs1 funct3 imm opcode
+///    25  20  15     12   7      0
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
+fn put_sb<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rs1: RegUnit, rs2: RegUnit, sink: &mut CS) {
+    let bits = u32::from(bits);
+    let opcode5 = bits & 0x1f;
+    let funct3 = (bits >> 5) & 0x7;
+    let rs1 = u32::from(rs1) & 0x1f;
+    let rs2 = u32::from(rs2) & 0x1f;
+
+    debug_assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm);
+    let imm = imm as u32;
+
+    // 0-6: opcode
+    let mut i = 0x3;
+    i |= opcode5 << 2;
+    i |= funct3 << 12;
+    i |= rs1 << 15;
+    i |= rs2 << 20;
+
+    // The displacement is completely hashed up.
+    i |= ((imm >> 11) & 0x1) << 7;
+    i |= ((imm >> 1) & 0xf) << 8;
+    i |= ((imm >> 5) & 0x3f) << 25;
+    i |= ((imm >> 12) & 0x1) << 31;
+
+    sink.put4(i);
+}
+
+/// UJ-type jump instructions.
+///
+///   31  11 6
+///   imm rd opcode
+///    12  7      0
+///
+/// Encoding bits: `opcode[6:2]`
+fn put_uj<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) {
+    let bits = u32::from(bits);
+    let opcode5 = bits & 0x1f;
+    let rd = u32::from(rd) & 0x1f;
+
+    debug_assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm);
+    let imm = imm as u32;
+
+    // 0-6: opcode
+    let mut i = 0x3;
+    i |= opcode5 << 2;
+    i |= rd << 7;
+
+    // The displacement is completely hashed up.
+    i |= imm & 0xff000;
+    i |= ((imm >> 11) & 0x1) << 20;
+    i |= ((imm >> 1) & 0x3ff) << 21;
+    i |= ((imm >> 20) & 0x1) << 31;
+
+    sink.put4(i);
+}
--- a/lib/codegen/src/isa/riscv/enc_tables.rs
+++ b/lib/codegen/src/isa/riscv/enc_tables.rs
@@ -0,0 +1,18 @@
+//! Encoding tables for RISC-V.
+
+use super::registers::*;
+use ir;
+use isa;
+use isa::constraints::*;
+use isa::enc_tables::*;
+use isa::encoding::RecipeSizing;
+use predicates;
+
+// Include the generated encoding tables:
+// - `LEVEL1_RV32`
+// - `LEVEL1_RV64`
+// - `LEVEL2`
+// - `ENCLIST`
+// - `INFO`
+include!(concat!(env!("OUT_DIR"), "/encoding-riscv.rs"));
+include!(concat!(env!("OUT_DIR"), "/legalize-riscv.rs"));
--- a/lib/codegen/src/isa/riscv/mod.rs
+++ b/lib/codegen/src/isa/riscv/mod.rs
@@ -0,0 +1,266 @@
+//! RISC-V Instruction Set Architecture.
+
+mod abi;
+mod binemit;
+mod enc_tables;
+mod registers;
+pub mod settings;
+
+use super::super::settings as shared_settings;
+use binemit::{emit_function, CodeSink, MemoryCodeSink};
+use ir;
+use isa::Builder as IsaBuilder;
+use isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
+use isa::{EncInfo, RegClass, RegInfo, TargetIsa};
+use regalloc;
+use std::boxed::Box;
+use std::fmt;
+
+#[allow(dead_code)]
+struct Isa {
+    shared_flags: shared_settings::Flags,
+    isa_flags: settings::Flags,
+    cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
+}
+
+/// Get an ISA builder for creating RISC-V targets.
+pub fn isa_builder() -> IsaBuilder {
+    IsaBuilder {
+        setup: settings::builder(),
+        constructor: isa_constructor,
+    }
+}
+
+fn isa_constructor(
+    shared_flags: shared_settings::Flags,
+    builder: &shared_settings::Builder,
+) -> Box<TargetIsa> {
+    let level1 = if shared_flags.is_64bit() {
+        &enc_tables::LEVEL1_RV64[..]
+    } else {
+        &enc_tables::LEVEL1_RV32[..]
+    };
+    Box::new(Isa {
+        isa_flags: settings::Flags::new(&shared_flags, builder),
+        shared_flags,
+        cpumode: level1,
+    })
+}
+
+impl TargetIsa for Isa {
+    fn name(&self) -> &'static str {
+        "riscv"
+    }
+
+    fn flags(&self) -> &shared_settings::Flags {
+        &self.shared_flags
+    }
+
+    fn register_info(&self) -> RegInfo {
+        registers::INFO.clone()
+    }
+
+    fn encoding_info(&self) -> EncInfo {
+        enc_tables::INFO.clone()
+    }
+
+    fn legal_encodings<'a>(
+        &'a self,
+        func: &'a ir::Function,
+        inst: &'a ir::InstructionData,
+        ctrl_typevar: ir::Type,
+    ) -> Encodings<'a> {
+        lookup_enclist(
+            ctrl_typevar,
+            inst,
+            func,
+            self.cpumode,
+            &enc_tables::LEVEL2[..],
+            &enc_tables::ENCLISTS[..],
+            &enc_tables::LEGALIZE_ACTIONS[..],
+            &enc_tables::RECIPE_PREDICATES[..],
+            &enc_tables::INST_PREDICATES[..],
+            self.isa_flags.predicate_view(),
+        )
+    }
+
+    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
+        abi::legalize_signature(sig, &self.shared_flags, &self.isa_flags, current)
+    }
+
+    fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
+        abi::regclass_for_abi_type(ty)
+    }
+
+    fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
+        abi::allocatable_registers(func, &self.isa_flags)
+    }
+
+    fn emit_inst(
+        &self,
+        func: &ir::Function,
+        inst: ir::Inst,
+        divert: &mut regalloc::RegDiversions,
+        sink: &mut CodeSink,
+    ) {
+        binemit::emit_inst(func, inst, divert, sink)
+    }
+
+    fn emit_function(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
+        emit_function(func, binemit::emit_inst, sink)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use ir::{Function, InstructionData, Opcode};
+    use ir::{immediates, types};
+    use isa;
+    use settings::{self, Configurable};
+    use std::string::{String, ToString};
+
+    fn encstr(isa: &isa::TargetIsa, enc: Result<isa::Encoding, isa::Legalize>) -> String {
+        match enc {
+            Ok(e) => isa.encoding_info().display(e).to_string(),
+            Err(_) => "no encoding".to_string(),
+        }
+    }
+
+    #[test]
+    fn test_64bitenc() {
+        let mut shared_builder = settings::builder();
+        shared_builder.enable("is_64bit").unwrap();
+        let shared_flags = settings::Flags::new(&shared_builder);
+        let isa = isa::lookup("riscv").unwrap().finish(shared_flags);
+
+        let mut func = Function::new();
+        let ebb = func.dfg.make_ebb();
+        let arg64 = func.dfg.append_ebb_param(ebb, types::I64);
+        let arg32 = func.dfg.append_ebb_param(ebb, types::I32);
+
+        // Try to encode iadd_imm.i64 v1, -10.
+        let inst64 = InstructionData::BinaryImm {
+            opcode: Opcode::IaddImm,
+            arg: arg64,
+            imm: immediates::Imm64::new(-10),
+        };
+
+        // ADDI is I/0b00100
+        assert_eq!(
+            encstr(&*isa, isa.encode(&func, &inst64, types::I64)),
+            "Ii#04"
+        );
+
+        // Try to encode iadd_imm.i64 v1, -10000.
+        let inst64_large = InstructionData::BinaryImm {
+            opcode: Opcode::IaddImm,
+            arg: arg64,
+            imm: immediates::Imm64::new(-10000),
+        };
+
+        // Immediate is out of range for ADDI.
+        assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
+
+        // Create an iadd_imm.i32 which is encodable in RV64.
+        let inst32 = InstructionData::BinaryImm {
+            opcode: Opcode::IaddImm,
+            arg: arg32,
+            imm: immediates::Imm64::new(10),
+        };
+
+        // ADDIW is I/0b00110
+        assert_eq!(
+            encstr(&*isa, isa.encode(&func, &inst32, types::I32)),
+            "Ii#06"
+        );
+    }
+
+    // Same as above, but for RV32.
+    #[test]
+    fn test_32bitenc() {
+        let mut shared_builder = settings::builder();
+        shared_builder.set("is_64bit", "false").unwrap();
+        let shared_flags = settings::Flags::new(&shared_builder);
+        let isa = isa::lookup("riscv").unwrap().finish(shared_flags);
+
+        let mut func = Function::new();
+        let ebb = func.dfg.make_ebb();
+        let arg64 = func.dfg.append_ebb_param(ebb, types::I64);
+        let arg32 = func.dfg.append_ebb_param(ebb, types::I32);
+
+        // Try to encode iadd_imm.i64 v1, -10.
+        let inst64 = InstructionData::BinaryImm {
+            opcode: Opcode::IaddImm,
+            arg: arg64,
+            imm: immediates::Imm64::new(-10),
+        };
+
+        // In 32-bit mode, an i64 bit add should be narrowed.
+        assert!(isa.encode(&func, &inst64, types::I64).is_err());
+
+        // Try to encode iadd_imm.i64 v1, -10000.
+        let inst64_large = InstructionData::BinaryImm {
+            opcode: Opcode::IaddImm,
+            arg: arg64,
+            imm: immediates::Imm64::new(-10000),
+        };
+
+        // In 32-bit mode, an i64 bit add should be narrowed.
+        assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
+
+        // Create an iadd_imm.i32 which is encodable in RV32.
+        let inst32 = InstructionData::BinaryImm {
+            opcode: Opcode::IaddImm,
+            arg: arg32,
+            imm: immediates::Imm64::new(10),
+        };
+
+        // ADDI is I/0b00100
+        assert_eq!(
+            encstr(&*isa, isa.encode(&func, &inst32, types::I32)),
+            "Ii#04"
+        );
+
+        // Create an imul.i32 which is encodable in RV32, but only when use_m is true.
+        let mul32 = InstructionData::Binary {
+            opcode: Opcode::Imul,
+            args: [arg32, arg32],
+        };
+
+        assert!(isa.encode(&func, &mul32, types::I32).is_err());
+    }
+
+    #[test]
+    fn test_rv32m() {
+        let mut shared_builder = settings::builder();
+        shared_builder.set("is_64bit", "false").unwrap();
+        let shared_flags = settings::Flags::new(&shared_builder);
+
+        // Set the supports_m stting which in turn enables the use_m predicate that unlocks
+        // encodings for imul.
+        let mut isa_builder = isa::lookup("riscv").unwrap();
+        isa_builder.enable("supports_m").unwrap();
+
+        let isa = isa_builder.finish(shared_flags);
+
+        let mut func = Function::new();
+        let ebb = func.dfg.make_ebb();
+        let arg32 = func.dfg.append_ebb_param(ebb, types::I32);
+
+        // Create an imul.i32 which is encodable in RV32M.
+        let mul32 = InstructionData::Binary {
+            opcode: Opcode::Imul,
+            args: [arg32, arg32],
+        };
+        assert_eq!(
+            encstr(&*isa, isa.encode(&func, &mul32, types::I32)),
+            "R#10c"
+        );
+    }
+}
+
+impl fmt::Display for Isa {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
+    }
+}
--- a/lib/codegen/src/isa/riscv/registers.rs
+++ b/lib/codegen/src/isa/riscv/registers.rs
@@ -0,0 +1,50 @@
+//! RISC-V register descriptions.
+
+use isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
+
+include!(concat!(env!("OUT_DIR"), "/registers-riscv.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::{FPR, GPR, INFO};
+    use isa::RegUnit;
+    use std::string::{String, ToString};
+
+    #[test]
+    fn unit_encodings() {
+        assert_eq!(INFO.parse_regunit("x0"), Some(0));
+        assert_eq!(INFO.parse_regunit("x31"), Some(31));
+        assert_eq!(INFO.parse_regunit("f0"), Some(32));
+        assert_eq!(INFO.parse_regunit("f31"), Some(63));
+
+        assert_eq!(INFO.parse_regunit("x32"), None);
+        assert_eq!(INFO.parse_regunit("f32"), None);
+    }
+
+    #[test]
+    fn unit_names() {
+        fn uname(ru: RegUnit) -> String {
+            INFO.display_regunit(ru).to_string()
+        }
+
+        assert_eq!(uname(0), "%x0");
+        assert_eq!(uname(1), "%x1");
+        assert_eq!(uname(31), "%x31");
+        assert_eq!(uname(32), "%f0");
+        assert_eq!(uname(33), "%f1");
+        assert_eq!(uname(63), "%f31");
+        assert_eq!(uname(64), "%INVALID64");
+    }
+
+    #[test]
+    fn classes() {
+        assert!(GPR.contains(GPR.unit(0)));
+        assert!(GPR.contains(GPR.unit(31)));
+        assert!(!FPR.contains(GPR.unit(0)));
+        assert!(!FPR.contains(GPR.unit(31)));
+        assert!(!GPR.contains(FPR.unit(0)));
+        assert!(!GPR.contains(FPR.unit(31)));
+        assert!(FPR.contains(FPR.unit(0)));
+        assert!(FPR.contains(FPR.unit(31)));
+    }
+}
--- a/lib/codegen/src/isa/riscv/settings.rs
+++ b/lib/codegen/src/isa/riscv/settings.rs
@@ -0,0 +1,54 @@
+//! RISC-V Settings.
+
+use settings::{self, detail, Builder};
+use std::fmt;
+
+// Include code generated by `lib/codegen/meta/gen_settings.py`. This file contains a public
+// `Flags` struct with an impl for all of the settings defined in
+// `lib/codegen/meta/isa/riscv/settings.py`.
+include!(concat!(env!("OUT_DIR"), "/settings-riscv.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::{builder, Flags};
+    use settings::{self, Configurable};
+    use std::string::ToString;
+
+    #[test]
+    fn display_default() {
+        let shared = settings::Flags::new(&settings::builder());
+        let b = builder();
+        let f = Flags::new(&shared, &b);
+        assert_eq!(
+            f.to_string(),
+            "[riscv]\n\
+             supports_m = false\n\
+             supports_a = false\n\
+             supports_f = false\n\
+             supports_d = false\n\
+             enable_m = true\n\
+             enable_e = false\n"
+        );
+        // Predicates are not part of the Display output.
+        assert_eq!(f.full_float(), false);
+    }
+
+    #[test]
+    fn predicates() {
+        let shared = settings::Flags::new(&settings::builder());
+        let mut b = builder();
+        b.enable("supports_f").unwrap();
+        b.enable("supports_d").unwrap();
+        let f = Flags::new(&shared, &b);
+        assert_eq!(f.full_float(), true);
+
+        let mut sb = settings::builder();
+        sb.set("enable_simd", "false").unwrap();
+        let shared = settings::Flags::new(&sb);
+        let mut b = builder();
+        b.enable("supports_f").unwrap();
+        b.enable("supports_d").unwrap();
+        let f = Flags::new(&shared, &b);
+        assert_eq!(f.full_float(), false);
+    }
+}
--- a/lib/codegen/src/isa/stack.rs
+++ b/lib/codegen/src/isa/stack.rs
@@ -0,0 +1,94 @@
+//! Low-level details of stack accesses.
+//!
+//! The `ir::StackSlots` type deals with stack slots and stack frame layout. The `StackRef` type
+//! defined in this module expresses the low-level details of accessing a stack slot from an
+//! encoded instruction.
+
+use ir::StackSlot;
+use ir::stackslot::{StackOffset, StackSlotKind, StackSlots};
+
+/// A method for referencing a stack slot in the current stack frame.
+///
+/// Stack slots are addressed with a constant offset from a base register. The base can be the
+/// stack pointer, the frame pointer, or (in the future) a zone register pointing to an inner zone
+/// of a large stack frame.
+#[derive(Clone, Copy, Debug)]
+pub struct StackRef {
+    /// The base register to use for addressing.
+    pub base: StackBase,
+
+    /// Immediate offset from the base register to the first byte of the stack slot.
+    pub offset: StackOffset,
+}
+
+impl StackRef {
+    /// Get a reference to the stack slot `ss` using one of the base pointers in `mask`.
+    pub fn masked(ss: StackSlot, mask: StackBaseMask, frame: &StackSlots) -> Option<StackRef> {
+        // Try an SP-relative reference.
+        if mask.contains(StackBase::SP) {
+            return Some(StackRef::sp(ss, frame));
+        }
+
+        // No reference possible with this mask.
+        None
+    }
+
+    /// Get a reference to `ss` using the stack pointer as a base.
+    pub fn sp(ss: StackSlot, frame: &StackSlots) -> StackRef {
+        let size = frame.frame_size.expect(
+            "Stack layout must be computed before referencing stack slots",
+        );
+        let slot = &frame[ss];
+        let offset = if slot.kind == StackSlotKind::OutgoingArg {
+            // Outgoing argument slots have offsets relative to our stack pointer.
+            slot.offset.unwrap()
+        } else {
+            // All other slots have offsets relative to our caller's stack frame.
+            // Offset where SP is pointing. (All ISAs have stacks growing downwards.)
+            let sp_offset = -(size as StackOffset);
+            slot.offset.unwrap() - sp_offset
+        };
+        StackRef {
+            base: StackBase::SP,
+            offset,
+        }
+    }
+}
+
+/// Generic base register for referencing stack slots.
+///
+/// Most ISAs have a stack pointer and an optional frame pointer, so provide generic names for
+/// those two base pointers.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum StackBase {
+    /// Use the stack pointer.
+    SP = 0,
+
+    /// Use the frame pointer (if one is present).
+    FP = 1,
+
+    /// Use an explicit zone pointer in a general-purpose register.
+    ///
+    /// This feature is not yet implemented.
+    Zone = 2,
+}
+
+/// Bit mask of supported stack bases.
+///
+/// Many instruction encodings can use different base registers while others only work with the
+/// stack pointer, say. A `StackBaseMask` is a bit mask of supported stack bases for a given
+/// instruction encoding.
+///
+/// This behaves like a set of `StackBase` variants.
+///
+/// The internal representation as a `u8` is public because stack base masks are used in constant
+/// tables generated from the Python encoding definitions.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct StackBaseMask(pub u8);
+
+impl StackBaseMask {
+    /// Check if this mask contains the `base` variant.
+    pub fn contains(self, base: StackBase) -> bool {
+        self.0 & (1 << base as usize) != 0
+    }
+}
--- a/lib/codegen/src/isa/x86/abi.rs
+++ b/lib/codegen/src/isa/x86/abi.rs
@@ -0,0 +1,371 @@
+//! x86 ABI implementation.
+
+use super::registers::{FPR, GPR, RU};
+use abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
+use cursor::{Cursor, CursorPosition, EncCursor};
+use ir;
+use ir::immediates::Imm64;
+use ir::stackslot::{StackOffset, StackSize};
+use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, CallConv, InstBuilder,
+         ValueLoc};
+use isa::{RegClass, RegUnit, TargetIsa};
+use regalloc::RegisterSet;
+use result;
+use settings as shared_settings;
+use stack_layout::layout_stack;
+use std::i32;
+
+/// Argument registers for x86-64
+static ARG_GPRS: [RU; 6] = [RU::rdi, RU::rsi, RU::rdx, RU::rcx, RU::r8, RU::r9];
+
+/// Return value registers.
+static RET_GPRS: [RU; 3] = [RU::rax, RU::rdx, RU::rcx];
+
+struct Args {
+    pointer_bytes: u32,
+    pointer_bits: u16,
+    pointer_type: ir::Type,
+    gpr: &'static [RU],
+    gpr_used: usize,
+    fpr_limit: usize,
+    fpr_used: usize,
+    offset: u32,
+    call_conv: CallConv,
+}
+
+impl Args {
+    fn new(bits: u16, gpr: &'static [RU], fpr_limit: usize, call_conv: CallConv) -> Args {
+        Args {
+            pointer_bytes: u32::from(bits) / 8,
+            pointer_bits: bits,
+            pointer_type: ir::Type::int(bits).unwrap(),
+            gpr,
+            gpr_used: 0,
+            fpr_limit,
+            fpr_used: 0,
+            offset: 0,
+            call_conv: call_conv,
+        }
+    }
+}
+
+impl ArgAssigner for Args {
+    fn assign(&mut self, arg: &AbiParam) -> ArgAction {
+        let ty = arg.value_type;
+
+        // Check for a legal type.
+        // We don't support SIMD yet, so break all vectors down.
+        if ty.is_vector() {
+            return ValueConversion::VectorSplit.into();
+        }
+
+        // Large integers and booleans are broken down to fit in a register.
+        if !ty.is_float() && ty.bits() > self.pointer_bits {
+            return ValueConversion::IntSplit.into();
+        }
+
+        // Small integers are extended to the size of a pointer register.
+        if ty.is_int() && ty.bits() < self.pointer_bits {
+            match arg.extension {
+                ArgumentExtension::None => {}
+                ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
+                ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(),
+            }
+        }
+
+        // Handle special-purpose arguments.
+        if ty.is_int() && self.call_conv == CallConv::SpiderWASM {
+            match arg.purpose {
+                // This is SpiderMonkey's `WasmTlsReg`.
+                ArgumentPurpose::VMContext => {
+                    return ArgumentLoc::Reg(if self.pointer_bits == 64 {
+                        RU::r14
+                    } else {
+                        RU::rsi
+                    } as RegUnit).into()
+                }
+                // This is SpiderMonkey's `WasmTableCallSigReg`.
+                ArgumentPurpose::SignatureId => return ArgumentLoc::Reg(RU::rbx as RegUnit).into(),
+                _ => {}
+            }
+        }
+
+        // Try to use a GPR.
+        if !ty.is_float() && self.gpr_used < self.gpr.len() {
+            let reg = self.gpr[self.gpr_used] as RegUnit;
+            self.gpr_used += 1;
+            return ArgumentLoc::Reg(reg).into();
+        }
+
+        // Try to use an FPR.
+        if ty.is_float() && self.fpr_used < self.fpr_limit {
+            let reg = FPR.unit(self.fpr_used);
+            self.fpr_used += 1;
+            return ArgumentLoc::Reg(reg).into();
+        }
+
+        // Assign a stack location.
+        let loc = ArgumentLoc::Stack(self.offset as i32);
+        self.offset += self.pointer_bytes;
+        debug_assert!(self.offset <= i32::MAX as u32);
+        loc.into()
+    }
+}
+
+/// Legalize `sig`.
+pub fn legalize_signature(sig: &mut ir::Signature, flags: &shared_settings::Flags, _current: bool) {
+    let bits;
+    let mut args;
+
+    if flags.is_64bit() {
+        bits = 64;
+        args = Args::new(bits, &ARG_GPRS, 8, sig.call_conv);
+    } else {
+        bits = 32;
+        args = Args::new(bits, &[], 0, sig.call_conv);
+    }
+
+    legalize_args(&mut sig.params, &mut args);
+
+    let mut rets = Args::new(bits, &RET_GPRS, 2, sig.call_conv);
+    legalize_args(&mut sig.returns, &mut rets);
+}
+
+/// Get register class for a type appearing in a legalized signature.
+pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
+    if ty.is_int() || ty.is_bool() {
+        GPR
+    } else {
+        FPR
+    }
+}
+
+/// Get the set of allocatable registers for `func`.
+pub fn allocatable_registers(_func: &ir::Function, flags: &shared_settings::Flags) -> RegisterSet {
+    let mut regs = RegisterSet::new();
+    regs.take(GPR, RU::rsp as RegUnit);
+    regs.take(GPR, RU::rbp as RegUnit);
+
+    // 32-bit arch only has 8 registers.
+    if !flags.is_64bit() {
+        for i in 8..16 {
+            regs.take(GPR, GPR.unit(i));
+            regs.take(FPR, FPR.unit(i));
+        }
+    }
+
+    regs
+}
+
+/// Get the set of callee-saved registers.
+fn callee_saved_gprs(flags: &shared_settings::Flags) -> &'static [RU] {
+    if flags.is_64bit() {
+        &[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15]
+    } else {
+        &[RU::rbx, RU::rsi, RU::rdi]
+    }
+}
+
+fn callee_saved_gprs_used(flags: &shared_settings::Flags, func: &ir::Function) -> RegisterSet {
+    let mut all_callee_saved = RegisterSet::empty();
+    for reg in callee_saved_gprs(flags) {
+        all_callee_saved.free(GPR, *reg as RegUnit);
+    }
+
+    let mut used = RegisterSet::empty();
+    for value_loc in func.locations.values() {
+        // Note that `value_loc` here contains only a single unit of a potentially multi-unit
+        // register. We don't use registers that overlap each other in the x86 ISA, but in others
+        // we do. So this should not be blindly reused.
+        if let ValueLoc::Reg(ru) = *value_loc {
+            if !used.is_avail(GPR, ru) {
+                used.free(GPR, ru);
+            }
+        }
+    }
+
+    // regmove and regfill instructions may temporarily divert values into other registers,
+    // and these are not reflected in `func.locations`. Scan the function for such instructions
+    // and note which callee-saved registers they use.
+    //
+    // TODO: Consider re-evaluating how regmove/regfill/regspill work and whether it's possible
+    // to avoid this step.
+    for ebb in &func.layout {
+        for inst in func.layout.ebb_insts(ebb) {
+            match func.dfg[inst] {
+                ir::instructions::InstructionData::RegMove { dst, .. } |
+                ir::instructions::InstructionData::RegFill { dst, .. } => {
+                    if !used.is_avail(GPR, dst) {
+                        used.free(GPR, dst);
+                    }
+                }
+                _ => (),
+            }
+        }
+    }
+
+    used.intersect(&all_callee_saved);
+    return used;
+}
+
+pub fn prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
+    match func.signature.call_conv {
+        ir::CallConv::SystemV => system_v_prologue_epilogue(func, isa),
+        ir::CallConv::SpiderWASM => spiderwasm_prologue_epilogue(func, isa),
+    }
+}
+
+pub fn spiderwasm_prologue_epilogue(
+    func: &mut ir::Function,
+    isa: &TargetIsa,
+) -> result::CtonResult {
+    // Spiderwasm on 32-bit x86 always aligns its stack pointer to 16 bytes.
+    let stack_align = 16;
+    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
+    let bytes = StackSize::from(isa.flags().spiderwasm_prologue_words()) * word_size;
+
+    let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
+    ss.offset = Some(-(bytes as StackOffset));
+    func.stack_slots.push(ss);
+
+    layout_stack(&mut func.stack_slots, stack_align)?;
+    Ok(())
+}
+
+/// Insert a System V-compatible prologue and epilogue.
+pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
+    // The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but
+    // newer versions use a 16-byte aligned stack pointer.
+    let stack_align = 16;
+    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
+    let csr_type = if isa.flags().is_64bit() {
+        ir::types::I64
+    } else {
+        ir::types::I32
+    };
+
+    let csrs = callee_saved_gprs_used(isa.flags(), func);
+
+    // The reserved stack area is composed of:
+    //   return address + frame pointer + all callee-saved registers
+    //
+    // Pushing the return address is an implicit function of the `call`
+    // instruction. Each of the others we will then push explicitly. Then we
+    // will adjust the stack pointer to make room for the rest of the required
+    // space for this frame.
+    let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size as usize) as i32;
+    func.create_stack_slot(ir::StackSlotData {
+        kind: ir::StackSlotKind::IncomingArg,
+        size: csr_stack_size as u32,
+        offset: Some(-csr_stack_size),
+    });
+
+    let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
+    let local_stack_size = i64::from(total_stack_size - csr_stack_size);
+
+    // Add CSRs to function signature
+    let fp_arg = ir::AbiParam::special_reg(
+        csr_type,
+        ir::ArgumentPurpose::FramePointer,
+        RU::rbp as RegUnit,
+    );
+    func.signature.params.push(fp_arg);
+    func.signature.returns.push(fp_arg);
+
+    for csr in csrs.iter(GPR) {
+        let csr_arg = ir::AbiParam::special_reg(csr_type, ir::ArgumentPurpose::CalleeSaved, csr);
+        func.signature.params.push(csr_arg);
+        func.signature.returns.push(csr_arg);
+    }
+
+    // Set up the cursor and insert the prologue
+    let entry_ebb = func.layout.entry_block().expect("missing entry block");
+    let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
+    insert_system_v_prologue(&mut pos, local_stack_size, csr_type, &csrs);
+
+    // Reset the cursor and insert the epilogue
+    let mut pos = pos.at_position(CursorPosition::Nowhere);
+    insert_system_v_epilogues(&mut pos, local_stack_size, csr_type, &csrs);
+
+    Ok(())
+}
+
+/// Insert the prologue for a given function.
+fn insert_system_v_prologue(
+    pos: &mut EncCursor,
+    stack_size: i64,
+    csr_type: ir::types::Type,
+    csrs: &RegisterSet,
+) {
+    // Append param to entry EBB
+    let ebb = pos.current_ebb().expect("missing ebb under cursor");
+    let fp = pos.func.dfg.append_ebb_param(ebb, csr_type);
+    pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
+
+    pos.ins().x86_push(fp);
+    pos.ins().copy_special(
+        RU::rsp as RegUnit,
+        RU::rbp as RegUnit,
+    );
+
+    for reg in csrs.iter(GPR) {
+        // Append param to entry EBB
+        let csr_arg = pos.func.dfg.append_ebb_param(ebb, csr_type);
+
+        // Assign it a location
+        pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
+
+        // Remember it so we can push it momentarily
+        pos.ins().x86_push(csr_arg);
+    }
+
+    if stack_size > 0 {
+        pos.ins().adjust_sp_imm(Imm64::new(-stack_size));
+    }
+}
+
+/// Find all `return` instructions and insert epilogues before them.
+fn insert_system_v_epilogues(
+    pos: &mut EncCursor,
+    stack_size: i64,
+    csr_type: ir::types::Type,
+    csrs: &RegisterSet,
+) {
+    while let Some(ebb) = pos.next_ebb() {
+        pos.goto_last_inst(ebb);
+        if let Some(inst) = pos.current_inst() {
+            if pos.func.dfg[inst].opcode().is_return() {
+                insert_system_v_epilogue(inst, stack_size, pos, csr_type, csrs);
+            }
+        }
+    }
+}
+
+/// Insert an epilogue given a specific `return` instruction.
+fn insert_system_v_epilogue(
+    inst: ir::Inst,
+    stack_size: i64,
+    pos: &mut EncCursor,
+    csr_type: ir::types::Type,
+    csrs: &RegisterSet,
+) {
+    if stack_size > 0 {
+        pos.ins().adjust_sp_imm(Imm64::new(stack_size));
+    }
+
+    // Pop all the callee-saved registers, stepping backward each time to
+    // preserve the correct order.
+    let fp_ret = pos.ins().x86_pop(csr_type);
+    pos.prev_inst();
+
+    pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
+    pos.func.dfg.append_inst_arg(inst, fp_ret);
+
+    for reg in csrs.iter(GPR) {
+        let csr_ret = pos.ins().x86_pop(csr_type);
+        pos.prev_inst();
+
+        pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg);
+        pos.func.dfg.append_inst_arg(inst, csr_ret);
+    }
+}
--- a/lib/codegen/src/isa/x86/binemit.rs
+++ b/lib/codegen/src/isa/x86/binemit.rs
@@ -0,0 +1,300 @@
+//! Emitting binary x86 machine code.
+
+use super::registers::RU;
+use binemit::{bad_encoding, CodeSink, Reloc};
+use ir::condcodes::{CondCode, FloatCC, IntCC};
+use ir::{Ebb, Function, Inst, InstructionData, Opcode, TrapCode};
+use isa::{RegUnit, StackBase, StackBaseMask, StackRef};
+use regalloc::RegDiversions;
+
+include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs"));
+
+// Convert a stack base to the corresponding register.
+fn stk_base(base: StackBase) -> RegUnit {
+    let ru = match base {
+        StackBase::SP => RU::rsp,
+        StackBase::FP => RU::rbp,
+        StackBase::Zone => unimplemented!(),
+    };
+    ru as RegUnit
+}
+
+// Mandatory prefix bytes for Mp* opcodes.
+const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2];
+
+// Second byte for three-byte opcodes for mm=0b10 and mm=0b11.
+const OP3_BYTE2: [u8; 2] = [0x38, 0x3a];
+
+// A REX prefix with no bits set: 0b0100WRXB.
+const BASE_REX: u8 = 0b0100_0000;
+
+// Create a single-register REX prefix, setting the B bit to bit 3 of the register.
+// This is used for instructions that encode a register in the low 3 bits of the opcode and for
+// instructions that use the ModR/M `reg` field for something else.
+fn rex1(reg_b: RegUnit) -> u8 {
+    let b = ((reg_b >> 3) & 1) as u8;
+    BASE_REX | b
+}
+
+// Create a dual-register REX prefix, setting:
+//
+// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
+// REX.R = bit 3 of reg register.
+fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
+    let b = ((rm >> 3) & 1) as u8;
+    let r = ((reg >> 3) & 1) as u8;
+    BASE_REX | b | (r << 2)
+}
+
+// Emit a REX prefix.
+//
+// The R, X, and B bits are computed from registers using the functions above. The W bit is
+// extracted from `bits`.
+fn rex_prefix<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(rex & 0xf8, BASE_REX);
+    let w = ((bits >> 15) & 1) as u8;
+    sink.put1(rex | (w << 3));
+}
+
+// Emit a single-byte opcode with no REX prefix.
+fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*");
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding");
+    sink.put1(bits as u8);
+}
+
+// Emit a single-byte opcode with REX prefix.
+fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for Op1*");
+    rex_prefix(bits, rex, sink);
+    sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode: 0F XX
+fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding");
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode: 0F XX with REX prefix.
+fn put_rexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0f00, 0x0400, "Invalid encoding bits for RexOp2*");
+    rex_prefix(bits, rex, sink);
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
+// Emit single-byte opcode with mandatory prefix.
+fn put_mp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding");
+    sink.put1(bits as u8);
+}
+
+// Emit single-byte opcode with mandatory prefix and REX.
+fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    rex_prefix(bits, rex, sink);
+    sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode (0F XX) with mandatory prefix.
+fn put_mp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding");
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode (0F XX) with mandatory prefix and REX.
+fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for Mp2*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    rex_prefix(bits, rex, sink);
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
+// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix.
+fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding");
+    let mm = (bits >> 10) & 3;
+    sink.put1(0x0f);
+    sink.put1(OP3_BYTE2[(mm - 2) as usize]);
+    sink.put1(bits as u8);
+}
+
+// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX
+fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for Mp3*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    rex_prefix(bits, rex, sink);
+    let mm = (bits >> 10) & 3;
+    sink.put1(0x0f);
+    sink.put1(OP3_BYTE2[(mm - 2) as usize]);
+    sink.put1(bits as u8);
+}
+
+/// Emit a ModR/M byte for reg-reg operands.
+fn modrm_rr<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+    let reg = reg as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b11000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a ModR/M byte where the reg bits are part of the opcode.
+fn modrm_r_bits<CS: CodeSink + ?Sized>(rm: RegUnit, bits: u16, sink: &mut CS) {
+    let reg = (bits >> 12) as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b11000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a mode 00 ModR/M byte. This is a register-indirect addressing mode with no offset.
+/// Registers %rsp and %rbp are invalid for `rm`, %rsp indicates a SIB byte, and %rbp indicates an
+/// absolute immediate 32-bit address.
+fn modrm_rm<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+    let reg = reg as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b00000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a mode 00 Mod/RM byte, with a rip-relative displacement in 64-bit mode. Effective address
+/// is calculated by adding displacement to 64-bit rip of next instruction. See intel Sw dev manual
+/// section 2.2.1.6.
+fn modrm_riprel<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+    modrm_rm(0b101, reg, sink)
+}
+
+/// Emit a mode 01 ModR/M byte. This is a register-indirect addressing mode with 8-bit
+/// displacement.
+/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
+fn modrm_disp8<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+    let reg = reg as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b01000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a mode 10 ModR/M byte. This is a register-indirect addressing mode with 32-bit
+/// displacement.
+/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
+fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+    let reg = reg as u8 & 7;
+    let rm = rm as u8 & 7;
+    let mut b = 0b10000000;
+    b |= reg << 3;
+    b |= rm;
+    sink.put1(b);
+}
+
+/// Emit a mode 10 ModR/M byte indicating that a SIB byte is present.
+fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+    modrm_disp32(0b100, reg, sink);
+}
+
+/// Emit a SIB byte with a base register and no scale+index.
+fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
+    let base = base as u8 & 7;
+    // SIB        SS_III_BBB.
+    let mut b = 0b00_100_000;
+    b |= base;
+    sink.put1(b);
+}
+
+/// Get the low 4 bits of an opcode for an integer condition code.
+///
+/// Add this offset to a base opcode for:
+///
+/// ---- 0x70: Short conditional branch.
+/// 0x0f 0x80: Long conditional branch.
+/// 0x0f 0x90: SetCC.
+///
+fn icc2opc(cond: IntCC) -> u16 {
+    use ir::condcodes::IntCC::*;
+    match cond {
+        // 0x0 = Overflow.
+        // 0x1 = !Overflow.
+        UnsignedLessThan => 0x2,
+        UnsignedGreaterThanOrEqual => 0x3,
+        Equal => 0x4,
+        NotEqual => 0x5,
+        UnsignedLessThanOrEqual => 0x6,
+        UnsignedGreaterThan => 0x7,
+        // 0x8 = Sign.
+        // 0x9 = !Sign.
+        // 0xa = Parity even.
+        // 0xb = Parity odd.
+        SignedLessThan => 0xc,
+        SignedGreaterThanOrEqual => 0xd,
+        SignedLessThanOrEqual => 0xe,
+        SignedGreaterThan => 0xf,
+    }
+}
+
+/// Get the low 4 bits of an opcode for a floating point condition code.
+///
+/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
+///
+///    ZPC OSA
+/// UN 111 000
+/// GT 000 000
+/// LT 001 000
+/// EQ 100 000
+///
+/// Not all floating point condition codes are supported.
+fn fcc2opc(cond: FloatCC) -> u16 {
+    use ir::condcodes::FloatCC::*;
+    match cond {
+        Ordered                    => 0xb, // EQ|LT|GT => *np (P=0)
+        Unordered                  => 0xa, // UN       => *p  (P=1)
+        OrderedNotEqual            => 0x5, // LT|GT    => *ne (Z=0),
+        UnorderedOrEqual           => 0x4, // UN|EQ    => *e  (Z=1)
+        GreaterThan                => 0x7, // GT       => *a  (C=0&Z=0)
+        GreaterThanOrEqual         => 0x3, // GT|EQ    => *ae (C=0)
+        UnorderedOrLessThan        => 0x2, // UN|LT    => *b  (C=1)
+        UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1)
+        Equal |                            // EQ
+        NotEqual |                         // UN|LT|GT
+        LessThan |                         // LT
+        LessThanOrEqual |                  // LT|EQ
+        UnorderedOrGreaterThan |           // UN|GT
+        UnorderedOrGreaterThanOrEqual      // UN|GT|EQ
+        => panic!("{} not supported", cond),
+    }
+}
+
+/// Emit a single-byte branch displacement to `destination`.
+fn disp1<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
+    let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1);
+    sink.put1(delta as u8);
+}
+
+/// Emit a single-byte branch displacement to `destination`.
+fn disp4<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
+    let delta = func.offsets[destination].wrapping_sub(sink.offset() + 4);
+    sink.put4(delta);
+}
--- a/lib/codegen/src/isa/x86/enc_tables.rs
+++ b/lib/codegen/src/isa/x86/enc_tables.rs
@@ -0,0 +1,509 @@
+//! Encoding tables for x86 ISAs.
+
+use super::registers::*;
+use bitset::BitSet;
+use cursor::{Cursor, FuncCursor};
+use flowgraph::ControlFlowGraph;
+use ir::condcodes::IntCC;
+use ir::{self, InstBuilder};
+use isa;
+use isa::constraints::*;
+use isa::enc_tables::*;
+use isa::encoding::RecipeSizing;
+use predicates;
+
+include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs"));
+include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs"));
+
+/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`.
+fn expand_sdivrem(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    isa: &isa::TargetIsa,
+) {
+    let (x, y, is_srem) = match func.dfg[inst] {
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Sdiv,
+            args,
+        } => (args[0], args[1], false),
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Srem,
+            args,
+        } => (args[0], args[1], true),
+        _ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)),
+    };
+    let avoid_div_traps = isa.flags().avoid_div_traps();
+    let old_ebb = func.layout.pp_ebb(inst);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+    pos.func.dfg.clear_results(inst);
+
+    // If we can tolerate native division traps, sdiv doesn't need branching.
+    if !avoid_div_traps && !is_srem {
+        let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
+        pos.ins().with_result(result).x86_sdivmodx(x, xhi, y);
+        pos.remove_inst();
+        return;
+    }
+
+    // EBB handling the -1 divisor case.
+    let minus_one = pos.func.dfg.make_ebb();
+
+    // Final EBB with one argument representing the final result value.
+    let done = pos.func.dfg.make_ebb();
+
+    // Move the `inst` result value onto the `done` EBB.
+    pos.func.dfg.attach_ebb_param(done, result);
+
+    // Start by checking for a -1 divisor which needs to be handled specially.
+    let is_m1 = pos.ins().ifcmp_imm(y, -1);
+    pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]);
+
+    // Put in an explicit division-by-zero trap if the environment requires it.
+    if avoid_div_traps {
+        pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
+    }
+
+    // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division
+    // by zero.
+    let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
+    let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
+    let divres = if is_srem { rem } else { quot };
+    pos.ins().jump(done, &[divres]);
+
+    // Now deal with the -1 divisor case.
+    pos.insert_ebb(minus_one);
+    let m1_result = if is_srem {
+        // x % -1 = 0.
+        pos.ins().iconst(ty, 0)
+    } else {
+        // Explicitly check for overflow: Trap when x == INT_MIN.
+        debug_assert!(avoid_div_traps, "Native trapping divide handled above");
+        let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1));
+        pos.ins().trapif(
+            IntCC::Equal,
+            f,
+            ir::TrapCode::IntegerOverflow,
+        );
+        // x / -1 = -x.
+        pos.ins().irsub_imm(x, 0)
+    };
+
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[m1_result]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, minus_one);
+    cfg.recompute_ebb(pos.func, done);
+}
+
+/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`.
+fn expand_udivrem(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    isa: &isa::TargetIsa,
+) {
+    let (x, y, is_urem) = match func.dfg[inst] {
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Udiv,
+            args,
+        } => (args[0], args[1], false),
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Urem,
+            args,
+        } => (args[0], args[1], true),
+        _ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)),
+    };
+    let avoid_div_traps = isa.flags().avoid_div_traps();
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+    pos.func.dfg.clear_results(inst);
+
+    // Put in an explicit division-by-zero trap if the environment requires it.
+    if avoid_div_traps {
+        pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
+    }
+
+    // Now it is safe to execute the `x86_udivmodx` instruction.
+    let xhi = pos.ins().iconst(ty, 0);
+    let reuse = if is_urem {
+        [None, Some(result)]
+    } else {
+        [Some(result), None]
+    };
+    pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y);
+    pos.remove_inst();
+}
+
+/// Expand the `fmin` and `fmax` instructions using the x86 `x86_fmin` and `x86_fmax`
+/// instructions.
+fn expand_minmax(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use ir::condcodes::FloatCC;
+
+    let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] {
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Fmin,
+            args,
+        } => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor),
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Fmax,
+            args,
+        } => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band),
+        _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
+    };
+    let old_ebb = func.layout.pp_ebb(inst);
+
+    // We need to handle the following conditions, depending on how x and y compare:
+    //
+    // 1. LT or GT: The native `x86_opc` min/max instruction does what we need.
+    // 2. EQ: We need to use `bitwise_opc` to make sure that
+    //    fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0.
+    // 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical.
+
+    // EBB handling case 3) where one operand is NaN.
+    let uno_ebb = func.dfg.make_ebb();
+
+    // EBB that handles the unordered or equal cases 2) and 3).
+    let ueq_ebb = func.dfg.make_ebb();
+
+    // Final EBB with one argument representing the final result value.
+    let done = func.dfg.make_ebb();
+
+    // The basic blocks are laid out to minimize branching for the common cases:
+    //
+    // 1) One branch not taken, one jump.
+    // 2) One branch taken.
+    // 3) Two branches taken, one jump.
+
+    // Move the `inst` result value onto the `done` EBB.
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+    func.dfg.clear_results(inst);
+    func.dfg.attach_ebb_param(done, result);
+
+    // Test for case 1) ordered and not equal.
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+    let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y);
+    pos.ins().brnz(cmp_ueq, ueq_ebb, &[]);
+
+    // Handle the common ordered, not equal (LT|GT) case.
+    let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0;
+    let one_result = pos.func.dfg.first_result(one_inst);
+    pos.ins().jump(done, &[one_result]);
+
+    // Case 3) Unordered.
+    // We know that at least one operand is a NaN that needs to be propagated. We simply use an
+    // `fadd` instruction which has the same NaN propagation semantics.
+    pos.insert_ebb(uno_ebb);
+    let uno_result = pos.ins().fadd(x, y);
+    pos.ins().jump(done, &[uno_result]);
+
+    // Case 2) or 3).
+    pos.insert_ebb(ueq_ebb);
+    // Test for case 3) (UN) one value is NaN.
+    // TODO: When we get support for flag values, we can reuse the above comparison.
+    let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y);
+    pos.ins().brnz(cmp_uno, uno_ebb, &[]);
+
+    // We are now in case 2) where x and y compare EQ.
+    // We need a bitwise operation to get the sign right.
+    let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0;
+    let bw_result = pos.func.dfg.first_result(bw_inst);
+    // This should become a fall-through for this second most common case.
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[bw_result]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, ueq_ebb);
+    cfg.recompute_ebb(pos.func, uno_ebb);
+    cfg.recompute_ebb(pos.func, done);
+}
+
+/// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to
+/// i64 with a pattern, the rest needs more code.
+fn expand_fcvt_from_uint(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use ir::condcodes::IntCC;
+
+    let x;
+    match func.dfg[inst] {
+        ir::InstructionData::Unary {
+            opcode: ir::Opcode::FcvtFromUint,
+            arg,
+        } => x = arg,
+        _ => panic!("Need fcvt_from_uint: {}", func.dfg.display_inst(inst, None)),
+    }
+    let xty = func.dfg.value_type(x);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Conversion from unsigned 32-bit is easy on x86-64.
+    // TODO: This should be guarded by an ISA check.
+    if xty == ir::types::I32 {
+        let wide = pos.ins().uextend(ir::types::I64, x);
+        pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide);
+        return;
+    }
+
+    let old_ebb = pos.func.layout.pp_ebb(inst);
+
+    // EBB handling the case where x < 0.
+    let neg_ebb = pos.func.dfg.make_ebb();
+
+    // Final EBB with one argument representing the final result value.
+    let done = pos.func.dfg.make_ebb();
+
+    // Move the `inst` result value onto the `done` EBB.
+    pos.func.dfg.clear_results(inst);
+    pos.func.dfg.attach_ebb_param(done, result);
+
+    // If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction.
+    let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0);
+    pos.ins().brnz(is_neg, neg_ebb, &[]);
+
+    // Easy case: just use a signed conversion.
+    let posres = pos.ins().fcvt_from_sint(ty, x);
+    pos.ins().jump(done, &[posres]);
+
+    // Now handle the negative case.
+    pos.insert_ebb(neg_ebb);
+
+    // Divide x by two to get it in range for the signed conversion, keep the LSB, and scale it
+    // back up on the FP side.
+    let ihalf = pos.ins().ushr_imm(x, 1);
+    let lsb = pos.ins().band_imm(x, 1);
+    let ifinal = pos.ins().bor(ihalf, lsb);
+    let fhalf = pos.ins().fcvt_from_sint(ty, ifinal);
+    let negres = pos.ins().fadd(fhalf, fhalf);
+
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[negres]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, neg_ebb);
+    cfg.recompute_ebb(pos.func, done);
+}
+
+fn expand_fcvt_to_sint(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use ir::condcodes::{FloatCC, IntCC};
+    use ir::immediates::{Ieee32, Ieee64};
+
+    let x;
+    match func.dfg[inst] {
+        ir::InstructionData::Unary {
+            opcode: ir::Opcode::FcvtToSint,
+            arg,
+        } => x = arg,
+        _ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)),
+    }
+    let old_ebb = func.layout.pp_ebb(inst);
+    let xty = func.dfg.value_type(x);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    // Final EBB after the bad value checks.
+    let done = func.dfg.make_ebb();
+
+    // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow.
+    // It produces an INT_MIN result instead.
+    func.dfg.replace(inst).x86_cvtt2si(ty, x);
+
+    let mut pos = FuncCursor::new(func).after_inst(inst);
+    pos.use_srcloc(inst);
+
+    let is_done = pos.ins().icmp_imm(
+        IntCC::NotEqual,
+        result,
+        1 << (ty.lane_bits() - 1),
+    );
+    pos.ins().brnz(is_done, done, &[]);
+
+    // We now have the following possibilities:
+    //
+    // 1. INT_MIN was actually the correct conversion result.
+    // 2. The input was NaN -> trap bad_toint
+    // 3. The input was out of range -> trap int_ovf
+    //
+
+    // Check for NaN.
+    let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
+    pos.ins().trapnz(
+        is_nan,
+        ir::TrapCode::BadConversionToInteger,
+    );
+
+    // Check for case 1: INT_MIN is the correct result.
+    // Determine the smallest floating point number that would convert to INT_MIN.
+    let mut overflow_cc = FloatCC::LessThan;
+    let output_bits = ty.lane_bits();
+    let flimit = match xty {
+        // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
+        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+        ir::types::F32 => {
+            pos.ins().f32const(if output_bits < 32 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee32::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee32::pow2(output_bits - 1).neg()
+            })
+        }
+        ir::types::F64 => {
+            // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
+            // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+            pos.ins().f64const(if output_bits < 64 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee64::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee64::pow2(output_bits - 1).neg()
+            })
+        }
+        _ => panic!("Can't convert {}", xty),
+    };
+    let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
+    pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
+
+    // Finally, we could have a positive value that is too large.
+    let fzero = match xty {
+        ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
+        ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
+        _ => panic!("Can't convert {}", xty),
+    };
+    let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
+    pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
+
+    pos.ins().jump(done, &[]);
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, done);
+}
+
+fn expand_fcvt_to_uint(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use ir::condcodes::{FloatCC, IntCC};
+    use ir::immediates::{Ieee32, Ieee64};
+
+    let x;
+    match func.dfg[inst] {
+        ir::InstructionData::Unary {
+            opcode: ir::Opcode::FcvtToUint,
+            arg,
+        } => x = arg,
+        _ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)),
+    }
+    let old_ebb = func.layout.pp_ebb(inst);
+    let xty = func.dfg.value_type(x);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    // EBB handling numbers >= 2^(N-1).
+    let large = func.dfg.make_ebb();
+
+    // Final EBB after the bad value checks.
+    let done = func.dfg.make_ebb();
+
+    // Move the `inst` result value onto the `done` EBB.
+    func.dfg.clear_results(inst);
+    func.dfg.attach_ebb_param(done, result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
+    // the destination integer type.
+    let pow2nm1 = match xty {
+        ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
+        ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
+        _ => panic!("Can't convert {}", xty),
+    };
+    let is_large = pos.ins().ffcmp(x, pow2nm1);
+    pos.ins().brff(
+        FloatCC::GreaterThanOrEqual,
+        is_large,
+        large,
+        &[],
+    );
+
+    // We need to generate a specific trap code when `x` is NaN, so reuse the flags from the
+    // previous comparison.
+    pos.ins().trapff(
+        FloatCC::Unordered,
+        is_large,
+        ir::TrapCode::BadConversionToInteger,
+    );
+
+    // Now we know that x < 2^(N-1) and not NaN.
+    let sres = pos.ins().x86_cvtt2si(ty, x);
+    let is_neg = pos.ins().ifcmp_imm(sres, 0);
+    pos.ins().brif(
+        IntCC::SignedGreaterThanOrEqual,
+        is_neg,
+        done,
+        &[sres],
+    );
+    pos.ins().trap(ir::TrapCode::IntegerOverflow);
+
+    // Handle the case where x >= 2^(N-1) and not NaN.
+    pos.insert_ebb(large);
+    let adjx = pos.ins().fsub(x, pow2nm1);
+    let lres = pos.ins().x86_cvtt2si(ty, adjx);
+    let is_neg = pos.ins().ifcmp_imm(lres, 0);
+    pos.ins().trapif(
+        IntCC::SignedLessThan,
+        is_neg,
+        ir::TrapCode::IntegerOverflow,
+    );
+    let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
+
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[lfinal]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, large);
+    cfg.recompute_ebb(pos.func, done);
+}
--- a/lib/codegen/src/isa/x86/mod.rs
+++ b/lib/codegen/src/isa/x86/mod.rs
@@ -0,0 +1,129 @@
+//! x86 Instruction Set Architectures.
+
+mod abi;
+mod binemit;
+mod enc_tables;
+mod registers;
+pub mod settings;
+
+use super::super::settings as shared_settings;
+use binemit::{emit_function, CodeSink, MemoryCodeSink};
+use ir;
+use isa::Builder as IsaBuilder;
+use isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
+use isa::{EncInfo, RegClass, RegInfo, TargetIsa};
+use regalloc;
+use result;
+use std::boxed::Box;
+use std::fmt;
+use timing;
+
+#[allow(dead_code)]
+struct Isa {
+    shared_flags: shared_settings::Flags,
+    isa_flags: settings::Flags,
+    cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
+}
+
+/// Get an ISA builder for creating x86 targets.
+pub fn isa_builder() -> IsaBuilder {
+    IsaBuilder {
+        setup: settings::builder(),
+        constructor: isa_constructor,
+    }
+}
+
+fn isa_constructor(
+    shared_flags: shared_settings::Flags,
+    builder: &shared_settings::Builder,
+) -> Box<TargetIsa> {
+    let level1 = if shared_flags.is_64bit() {
+        &enc_tables::LEVEL1_I64[..]
+    } else {
+        &enc_tables::LEVEL1_I32[..]
+    };
+    Box::new(Isa {
+        isa_flags: settings::Flags::new(&shared_flags, builder),
+        shared_flags,
+        cpumode: level1,
+    })
+}
+
+impl TargetIsa for Isa {
+    fn name(&self) -> &'static str {
+        "x86"
+    }
+
+    fn flags(&self) -> &shared_settings::Flags {
+        &self.shared_flags
+    }
+
+    fn uses_cpu_flags(&self) -> bool {
+        true
+    }
+
+    fn register_info(&self) -> RegInfo {
+        registers::INFO.clone()
+    }
+
+    fn encoding_info(&self) -> EncInfo {
+        enc_tables::INFO.clone()
+    }
+
+    fn legal_encodings<'a>(
+        &'a self,
+        func: &'a ir::Function,
+        inst: &'a ir::InstructionData,
+        ctrl_typevar: ir::Type,
+    ) -> Encodings<'a> {
+        lookup_enclist(
+            ctrl_typevar,
+            inst,
+            func,
+            self.cpumode,
+            &enc_tables::LEVEL2[..],
+            &enc_tables::ENCLISTS[..],
+            &enc_tables::LEGALIZE_ACTIONS[..],
+            &enc_tables::RECIPE_PREDICATES[..],
+            &enc_tables::INST_PREDICATES[..],
+            self.isa_flags.predicate_view(),
+        )
+    }
+
+    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
+        abi::legalize_signature(sig, &self.shared_flags, current)
+    }
+
+    fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
+        abi::regclass_for_abi_type(ty)
+    }
+
+    fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
+        abi::allocatable_registers(func, &self.shared_flags)
+    }
+
+    fn emit_inst(
+        &self,
+        func: &ir::Function,
+        inst: ir::Inst,
+        divert: &mut regalloc::RegDiversions,
+        sink: &mut CodeSink,
+    ) {
+        binemit::emit_inst(func, inst, divert, sink)
+    }
+
+    fn emit_function(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
+        emit_function(func, binemit::emit_inst, sink)
+    }
+
+    fn prologue_epilogue(&self, func: &mut ir::Function) -> result::CtonResult {
+        let _tt = timing::prologue_epilogue();
+        abi::prologue_epilogue(func, self)
+    }
+}
+
+impl fmt::Display for Isa {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
+    }
+}
--- a/lib/codegen/src/isa/x86/registers.rs
+++ b/lib/codegen/src/isa/x86/registers.rs
@@ -0,0 +1,63 @@
+//! x86 register descriptions.
+
+use isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
+
+include!(concat!(env!("OUT_DIR"), "/registers-x86.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use isa::RegUnit;
+    use std::string::{String, ToString};
+
+    #[test]
+    fn unit_encodings() {
+        // The encoding of integer registers is not alphabetical.
+        assert_eq!(INFO.parse_regunit("rax"), Some(0));
+        assert_eq!(INFO.parse_regunit("rbx"), Some(3));
+        assert_eq!(INFO.parse_regunit("rcx"), Some(1));
+        assert_eq!(INFO.parse_regunit("rdx"), Some(2));
+        assert_eq!(INFO.parse_regunit("rsi"), Some(6));
+        assert_eq!(INFO.parse_regunit("rdi"), Some(7));
+        assert_eq!(INFO.parse_regunit("rbp"), Some(5));
+        assert_eq!(INFO.parse_regunit("rsp"), Some(4));
+        assert_eq!(INFO.parse_regunit("r8"), Some(8));
+        assert_eq!(INFO.parse_regunit("r15"), Some(15));
+
+        assert_eq!(INFO.parse_regunit("xmm0"), Some(16));
+        assert_eq!(INFO.parse_regunit("xmm15"), Some(31));
+    }
+
+    #[test]
+    fn unit_names() {
+        fn uname(ru: RegUnit) -> String {
+            INFO.display_regunit(ru).to_string()
+        }
+
+        assert_eq!(uname(0), "%rax");
+        assert_eq!(uname(3), "%rbx");
+        assert_eq!(uname(1), "%rcx");
+        assert_eq!(uname(2), "%rdx");
+        assert_eq!(uname(6), "%rsi");
+        assert_eq!(uname(7), "%rdi");
+        assert_eq!(uname(5), "%rbp");
+        assert_eq!(uname(4), "%rsp");
+        assert_eq!(uname(8), "%r8");
+        assert_eq!(uname(15), "%r15");
+        assert_eq!(uname(16), "%xmm0");
+        assert_eq!(uname(31), "%xmm15");
+    }
+
+    #[test]
+    fn regclasses() {
+        assert_eq!(GPR.intersect_index(GPR), Some(GPR.into()));
+        assert_eq!(GPR.intersect_index(ABCD), Some(ABCD.into()));
+        assert_eq!(GPR.intersect_index(FPR), None);
+        assert_eq!(ABCD.intersect_index(GPR), Some(ABCD.into()));
+        assert_eq!(ABCD.intersect_index(ABCD), Some(ABCD.into()));
+        assert_eq!(ABCD.intersect_index(FPR), None);
+        assert_eq!(FPR.intersect_index(FPR), Some(FPR.into()));
+        assert_eq!(FPR.intersect_index(GPR), None);
+        assert_eq!(FPR.intersect_index(ABCD), None);
+    }
+}
--- a/lib/codegen/src/isa/x86/settings.rs
+++ b/lib/codegen/src/isa/x86/settings.rs
@@ -0,0 +1,52 @@
+//! x86 Settings.
+
+use settings::{self, detail, Builder};
+use std::fmt;
+
+// Include code generated by `lib/codegen/meta/gen_settings.py`. This file contains a public
+// `Flags` struct with an impl for all of the settings defined in
+// `lib/codegen/meta/isa/x86/settings.py`.
+include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
+
+#[cfg(test)]
+mod tests {
+    use super::{builder, Flags};
+    use settings::{self, Configurable};
+
+    #[test]
+    fn presets() {
+        let shared = settings::Flags::new(&settings::builder());
+
+        // Nehalem has SSE4.1 but not BMI1.
+        let mut b1 = builder();
+        b1.enable("nehalem").unwrap();
+        let f1 = Flags::new(&shared, &b1);
+        assert_eq!(f1.has_sse41(), true);
+        assert_eq!(f1.has_bmi1(), false);
+
+        let mut b2 = builder();
+        b2.enable("haswell").unwrap();
+        let f2 = Flags::new(&shared, &b2);
+        assert_eq!(f2.has_sse41(), true);
+        assert_eq!(f2.has_bmi1(), true);
+    }
+    #[test]
+    fn display_presets() {
+        // Spot check that the flags Display impl does not cause a panic
+        let shared = settings::Flags::new(&settings::builder());
+
+        let b1 = builder();
+        let f1 = Flags::new(&shared, &b1);
+        let _ = format!("{}", f1);
+
+        let mut b2 = builder();
+        b2.enable("nehalem").unwrap();
+        let f2 = Flags::new(&shared, &b1);
+        let _ = format!("{}", f2);
+
+        let mut b3 = builder();
+        b3.enable("haswell").unwrap();
+        let f3 = Flags::new(&shared, &b1);
+        let _ = format!("{}", f3);
+    }
+}
--- a/lib/codegen/src/iterators.rs
+++ b/lib/codegen/src/iterators.rs
@@ -0,0 +1,98 @@
+//! Iterator utilities.
+
+/// Extra methods for iterators.
+pub trait IteratorExtras: Iterator {
+    /// Create an iterator that produces adjacent pairs of elements from the iterator.
+    fn adjacent_pairs(mut self) -> AdjacentPairs<Self>
+    where
+        Self: Sized,
+        Self::Item: Clone,
+    {
+        let elem = self.next();
+        AdjacentPairs { iter: self, elem }
+    }
+}
+
+impl<T> IteratorExtras for T
+where
+    T: Iterator,
+{
+}
+
+/// Adjacent pairs iterator returned by `adjacent_pairs()`.
+///
+/// This wraps another iterator and produces a sequence of adjacent pairs of elements.
+pub struct AdjacentPairs<I>
+where
+    I: Iterator,
+    I::Item: Clone,
+{
+    iter: I,
+    elem: Option<I::Item>,
+}
+
+impl<I> Iterator for AdjacentPairs<I>
+where
+    I: Iterator,
+    I::Item: Clone,
+{
+    type Item = (I::Item, I::Item);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.elem.take().and_then(|e| {
+            self.elem = self.iter.next();
+            self.elem.clone().map(|n| (e, n))
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::vec::Vec;
+
+    #[test]
+    fn adjpairs() {
+        use super::IteratorExtras;
+
+        assert_eq!(
+            [1, 2, 3, 4]
+                .iter()
+                .cloned()
+                .adjacent_pairs()
+                .collect::<Vec<_>>(),
+            vec![(1, 2), (2, 3), (3, 4)]
+        );
+        assert_eq!(
+            [2, 3, 4]
+                .iter()
+                .cloned()
+                .adjacent_pairs()
+                .collect::<Vec<_>>(),
+            vec![(2, 3), (3, 4)]
+        );
+        assert_eq!(
+            [2, 3, 4]
+                .iter()
+                .cloned()
+                .adjacent_pairs()
+                .collect::<Vec<_>>(),
+            vec![(2, 3), (3, 4)]
+        );
+        assert_eq!(
+            [3, 4].iter().cloned().adjacent_pairs().collect::<Vec<_>>(),
+            vec![(3, 4)]
+        );
+        assert_eq!(
+            [4].iter().cloned().adjacent_pairs().collect::<Vec<_>>(),
+            vec![]
+        );
+        assert_eq!(
+            []
+                .iter()
+                .cloned()
+                .adjacent_pairs()
+                .collect::<Vec<(i32, i32)>>(),
+            vec![]
+        );
+    }
+}
--- a/lib/codegen/src/legalizer/boundary.rs
+++ b/lib/codegen/src/legalizer/boundary.rs
@@ -0,0 +1,683 @@
+//! Legalize ABI boundaries.
+//!
+//! This legalizer sub-module contains code for dealing with ABI boundaries:
+//!
+//! - Function arguments passed to the entry block.
+//! - Function arguments passed to call instructions.
+//! - Return values from call instructions.
+//! - Return values passed to return instructions.
+//!
+//! The ABI boundary legalization happens in two phases:
+//!
+//! 1. The `legalize_signatures` function rewrites all the preamble signatures with ABI information
+//!    and possibly new argument types. It also rewrites the entry block arguments to match.
+//! 2. The `handle_call_abi` and `handle_return_abi` functions rewrite call and return instructions
+//!    to match the new ABI signatures.
+//!
+//! Between the two phases, preamble signatures and call/return arguments don't match. This
+//! intermediate state doesn't type check.
+
+use abi::{legalize_abi_value, ValueConversion};
+use cursor::{Cursor, FuncCursor};
+use flowgraph::ControlFlowGraph;
+use ir::instructions::CallInfo;
+use ir::{AbiParam, ArgumentLoc, ArgumentPurpose, DataFlowGraph, Ebb, Function, Inst, InstBuilder,
+         SigRef, Signature, Type, Value, ValueLoc};
+use isa::TargetIsa;
+use legalizer::split::{isplit, vsplit};
+use std::vec::Vec;
+
+/// Legalize all the function signatures in `func`.
+///
+/// This changes all signatures to be ABI-compliant with full `ArgumentLoc` annotations. It doesn't
+/// change the entry block arguments, calls, or return instructions, so this can leave the function
+/// in a state with type discrepancies.
+pub fn legalize_signatures(func: &mut Function, isa: &TargetIsa) {
+    isa.legalize_signature(&mut func.signature, true);
+    func.signature.compute_argument_bytes();
+    for sig_data in func.dfg.signatures.values_mut() {
+        isa.legalize_signature(sig_data, false);
+        sig_data.compute_argument_bytes();
+    }
+
+    if let Some(entry) = func.layout.entry_block() {
+        legalize_entry_params(func, entry);
+        spill_entry_params(func, entry);
+    }
+}
+
+/// Legalize the entry block parameters after `func`'s signature has been legalized.
+///
+/// The legalized signature may contain more parameters than the original signature, and the
+/// parameter types have been changed. This function goes through the parameters of the entry EBB
+/// and replaces them with parameters of the right type for the ABI.
+///
+/// The original entry EBB parameters are computed from the new ABI parameters by code inserted at
+/// the top of the entry block.
+fn legalize_entry_params(func: &mut Function, entry: Ebb) {
+    let mut has_sret = false;
+    let mut has_link = false;
+    let mut has_vmctx = false;
+    let mut has_sigid = false;
+
+    // Insert position for argument conversion code.
+    // We want to insert instructions before the first instruction in the entry block.
+    // If the entry block is empty, append instructions to it instead.
+    let mut pos = FuncCursor::new(func).at_first_inst(entry);
+
+    // Keep track of the argument types in the ABI-legalized signature.
+    let mut abi_arg = 0;
+
+    // Process the EBB parameters one at a time, possibly replacing one argument with multiple new
+    // ones. We do this by detaching the entry EBB parameters first.
+    let ebb_params = pos.func.dfg.detach_ebb_params(entry);
+    let mut old_arg = 0;
+    while let Some(arg) = ebb_params.get(old_arg, &pos.func.dfg.value_lists) {
+        old_arg += 1;
+
+        let abi_type = pos.func.signature.params[abi_arg];
+        let arg_type = pos.func.dfg.value_type(arg);
+        if arg_type == abi_type.value_type {
+            // No value translation is necessary, this argument matches the ABI type.
+            // Just use the original EBB argument value. This is the most common case.
+            pos.func.dfg.attach_ebb_param(entry, arg);
+            match abi_type.purpose {
+                ArgumentPurpose::Normal => {}
+                ArgumentPurpose::FramePointer => {}
+                ArgumentPurpose::CalleeSaved => {}
+                ArgumentPurpose::StructReturn => {
+                    debug_assert!(!has_sret, "Multiple sret arguments found");
+                    has_sret = true;
+                }
+                ArgumentPurpose::VMContext => {
+                    debug_assert!(!has_vmctx, "Multiple vmctx arguments found");
+                    has_vmctx = true;
+                }
+                ArgumentPurpose::SignatureId => {
+                    debug_assert!(!has_sigid, "Multiple sigid arguments found");
+                    has_sigid = true;
+                }
+                _ => panic!("Unexpected special-purpose arg {}", abi_type),
+            }
+            abi_arg += 1;
+        } else {
+            // Compute the value we want for `arg` from the legalized ABI parameters.
+            let mut get_arg = |func: &mut Function, ty| {
+                let abi_type = func.signature.params[abi_arg];
+                debug_assert_eq!(
+                    abi_type.purpose,
+                    ArgumentPurpose::Normal,
+                    "Can't legalize special-purpose argument"
+                );
+                if ty == abi_type.value_type {
+                    abi_arg += 1;
+                    Ok(func.dfg.append_ebb_param(entry, ty))
+                } else {
+                    Err(abi_type)
+                }
+            };
+            let converted = convert_from_abi(&mut pos, arg_type, Some(arg), &mut get_arg);
+            // The old `arg` is no longer an attached EBB argument, but there are probably still
+            // uses of the value.
+            debug_assert_eq!(pos.func.dfg.resolve_aliases(arg), converted);
+        }
+    }
+
+    // The legalized signature may contain additional parameters representing special-purpose
+    // registers.
+    for &arg in &pos.func.signature.params[abi_arg..] {
+        match arg.purpose {
+            // Any normal parameters should have been processed above.
+            ArgumentPurpose::Normal => {
+                panic!("Leftover arg: {}", arg);
+            }
+            // The callee-save parameters should not appear until after register allocation is
+            // done.
+            ArgumentPurpose::FramePointer |
+            ArgumentPurpose::CalleeSaved => {
+                panic!("Premature callee-saved arg {}", arg);
+            }
+            // These can be meaningfully added by `legalize_signature()`.
+            ArgumentPurpose::Link => {
+                debug_assert!(!has_link, "Multiple link parameters found");
+                has_link = true;
+            }
+            ArgumentPurpose::StructReturn => {
+                debug_assert!(!has_sret, "Multiple sret parameters found");
+                has_sret = true;
+            }
+            ArgumentPurpose::VMContext => {
+                debug_assert!(!has_vmctx, "Multiple vmctx parameters found");
+                has_vmctx = true;
+            }
+            ArgumentPurpose::SignatureId => {
+                debug_assert!(!has_sigid, "Multiple sigid parameters found");
+                has_sigid = true;
+            }
+        }
+
+        // Just create entry block values to match here. We will use them in `handle_return_abi()`
+        // below.
+        pos.func.dfg.append_ebb_param(entry, arg.value_type);
+    }
+}
+
+/// Legalize the results returned from a call instruction to match the ABI signature.
+///
+/// The cursor `pos` points to a call instruction with at least one return value. The cursor will
+/// be left pointing after the instructions inserted to convert the return values.
+///
+/// This function is very similar to the `legalize_entry_params` function above.
+///
+/// Returns the possibly new instruction representing the call.
+fn legalize_inst_results<ResType>(pos: &mut FuncCursor, mut get_abi_type: ResType) -> Inst
+where
+    ResType: FnMut(&Function, usize) -> AbiParam,
+{
+    let call = pos.current_inst().expect(
+        "Cursor must point to a call instruction",
+    );
+
+    // We theoretically allow for call instructions that return a number of fixed results before
+    // the call return values. In practice, it doesn't happen.
+    let fixed_results = pos.func.dfg[call].opcode().constraints().fixed_results();
+    debug_assert_eq!(fixed_results, 0, "Fixed results on calls not supported");
+
+    let results = pos.func.dfg.detach_results(call);
+    let mut next_res = 0;
+    let mut abi_res = 0;
+
+    // Point immediately after the call.
+    pos.next_inst();
+
+    while let Some(res) = results.get(next_res, &pos.func.dfg.value_lists) {
+        next_res += 1;
+
+        let res_type = pos.func.dfg.value_type(res);
+        if res_type == get_abi_type(pos.func, abi_res).value_type {
+            // No value translation is necessary, this result matches the ABI type.
+            pos.func.dfg.attach_result(call, res);
+            abi_res += 1;
+        } else {
+            let mut get_res = |func: &mut Function, ty| {
+                let abi_type = get_abi_type(func, abi_res);
+                if ty == abi_type.value_type {
+                    let last_res = func.dfg.append_result(call, ty);
+                    abi_res += 1;
+                    Ok(last_res)
+                } else {
+                    Err(abi_type)
+                }
+            };
+            let v = convert_from_abi(pos, res_type, Some(res), &mut get_res);
+            debug_assert_eq!(pos.func.dfg.resolve_aliases(res), v);
+        }
+    }
+
+    call
+}
+
+/// Compute original value of type `ty` from the legalized ABI arguments.
+///
+/// The conversion is recursive, controlled by the `get_arg` closure which is called to retrieve an
+/// ABI argument. It returns:
+///
+/// - `Ok(arg)` if the requested type matches the next ABI argument.
+/// - `Err(arg_type)` if further conversions are needed from the ABI argument `arg_type`.
+///
+/// If the `into_result` value is provided, the converted result will be written into that value.
+fn convert_from_abi<GetArg>(
+    pos: &mut FuncCursor,
+    ty: Type,
+    into_result: Option<Value>,
+    get_arg: &mut GetArg,
+) -> Value
+where
+    GetArg: FnMut(&mut Function, Type) -> Result<Value, AbiParam>,
+{
+    // Terminate the recursion when we get the desired type.
+    let arg_type = match get_arg(pos.func, ty) {
+        Ok(v) => {
+            debug_assert_eq!(pos.func.dfg.value_type(v), ty);
+            debug_assert_eq!(into_result, None);
+            return v;
+        }
+        Err(t) => t,
+    };
+
+    // Reconstruct how `ty` was legalized into the `arg_type` argument.
+    let conversion = legalize_abi_value(ty, &arg_type);
+
+    dbg!("convert_from_abi({}): {:?}", ty, conversion);
+
+    // The conversion describes value to ABI argument. We implement the reverse conversion here.
+    match conversion {
+        // Construct a `ty` by concatenating two ABI integers.
+        ValueConversion::IntSplit => {
+            let abi_ty = ty.half_width().expect("Invalid type for conversion");
+            let lo = convert_from_abi(pos, abi_ty, None, get_arg);
+            let hi = convert_from_abi(pos, abi_ty, None, get_arg);
+            dbg!(
+                "intsplit {}: {}, {}: {}",
+                lo,
+                pos.func.dfg.value_type(lo),
+                hi,
+                pos.func.dfg.value_type(hi)
+            );
+            pos.ins().with_results([into_result]).iconcat(lo, hi)
+        }
+        // Construct a `ty` by concatenating two halves of a vector.
+        ValueConversion::VectorSplit => {
+            let abi_ty = ty.half_vector().expect("Invalid type for conversion");
+            let lo = convert_from_abi(pos, abi_ty, None, get_arg);
+            let hi = convert_from_abi(pos, abi_ty, None, get_arg);
+            pos.ins().with_results([into_result]).vconcat(lo, hi)
+        }
+        // Construct a `ty` by bit-casting from an integer type.
+        ValueConversion::IntBits => {
+            debug_assert!(!ty.is_int());
+            let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
+            let arg = convert_from_abi(pos, abi_ty, None, get_arg);
+            pos.ins().with_results([into_result]).bitcast(ty, arg)
+        }
+        // ABI argument is a sign-extended version of the value we want.
+        ValueConversion::Sext(abi_ty) => {
+            let arg = convert_from_abi(pos, abi_ty, None, get_arg);
+            // TODO: Currently, we don't take advantage of the ABI argument being sign-extended.
+            // We could insert an `assert_sreduce` which would fold with a following `sextend` of
+            // this value.
+            pos.ins().with_results([into_result]).ireduce(ty, arg)
+        }
+        ValueConversion::Uext(abi_ty) => {
+            let arg = convert_from_abi(pos, abi_ty, None, get_arg);
+            // TODO: Currently, we don't take advantage of the ABI argument being sign-extended.
+            // We could insert an `assert_ureduce` which would fold with a following `uextend` of
+            // this value.
+            pos.ins().with_results([into_result]).ireduce(ty, arg)
+        }
+    }
+}
+
+/// Convert `value` to match an ABI signature by inserting instructions at `pos`.
+///
+/// This may require expanding the value to multiple ABI arguments. The conversion process is
+/// recursive and controlled by the `put_arg` closure. When a candidate argument value is presented
+/// to the closure, it will perform one of two actions:
+///
+/// 1. If the suggested argument has an acceptable value type, consume it by adding it to the list
+///    of arguments and return `Ok(())`.
+/// 2. If the suggested argument doesn't have the right value type, don't change anything, but
+///    return the `Err(AbiParam)` that is needed.
+///
+fn convert_to_abi<PutArg>(
+    pos: &mut FuncCursor,
+    cfg: &ControlFlowGraph,
+    value: Value,
+    put_arg: &mut PutArg,
+) where
+    PutArg: FnMut(&mut Function, Value) -> Result<(), AbiParam>,
+{
+    // Start by invoking the closure to either terminate the recursion or get the argument type
+    // we're trying to match.
+    let arg_type = match put_arg(pos.func, value) {
+        Ok(_) => return,
+        Err(t) => t,
+    };
+
+    let ty = pos.func.dfg.value_type(value);
+    match legalize_abi_value(ty, &arg_type) {
+        ValueConversion::IntSplit => {
+            let curpos = pos.position();
+            let srcloc = pos.srcloc();
+            let (lo, hi) = isplit(&mut pos.func, cfg, curpos, srcloc, value);
+            convert_to_abi(pos, cfg, lo, put_arg);
+            convert_to_abi(pos, cfg, hi, put_arg);
+        }
+        ValueConversion::VectorSplit => {
+            let curpos = pos.position();
+            let srcloc = pos.srcloc();
+            let (lo, hi) = vsplit(&mut pos.func, cfg, curpos, srcloc, value);
+            convert_to_abi(pos, cfg, lo, put_arg);
+            convert_to_abi(pos, cfg, hi, put_arg);
+        }
+        ValueConversion::IntBits => {
+            debug_assert!(!ty.is_int());
+            let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
+            let arg = pos.ins().bitcast(abi_ty, value);
+            convert_to_abi(pos, cfg, arg, put_arg);
+        }
+        ValueConversion::Sext(abi_ty) => {
+            let arg = pos.ins().sextend(abi_ty, value);
+            convert_to_abi(pos, cfg, arg, put_arg);
+        }
+        ValueConversion::Uext(abi_ty) => {
+            let arg = pos.ins().uextend(abi_ty, value);
+            convert_to_abi(pos, cfg, arg, put_arg);
+        }
+    }
+}
+
+/// Check if a sequence of arguments match a desired sequence of argument types.
+fn check_arg_types(dfg: &DataFlowGraph, args: &[Value], types: &[AbiParam]) -> bool {
+    let arg_types = args.iter().map(|&v| dfg.value_type(v));
+    let sig_types = types.iter().map(|&at| at.value_type);
+    arg_types.eq(sig_types)
+}
+
+/// Check if the arguments of the call `inst` match the signature.
+///
+/// Returns `Ok(())` if the signature matches and no changes are needed, or `Err(sig_ref)` if the
+/// signature doesn't match.
+fn check_call_signature(dfg: &DataFlowGraph, inst: Inst) -> Result<(), SigRef> {
+    // Extract the signature and argument values.
+    let (sig_ref, args) = match dfg[inst].analyze_call(&dfg.value_lists) {
+        CallInfo::Direct(func, args) => (dfg.ext_funcs[func].signature, args),
+        CallInfo::Indirect(sig_ref, args) => (sig_ref, args),
+        CallInfo::NotACall => panic!("Expected call, got {:?}", dfg[inst]),
+    };
+    let sig = &dfg.signatures[sig_ref];
+
+    if check_arg_types(dfg, args, &sig.params[..]) &&
+        check_arg_types(dfg, dfg.inst_results(inst), &sig.returns[..])
+    {
+        // All types check out.
+        Ok(())
+    } else {
+        // Call types need fixing.
+        Err(sig_ref)
+    }
+}
+
+/// Check if the arguments of the return `inst` match the signature.
+fn check_return_signature(dfg: &DataFlowGraph, inst: Inst, sig: &Signature) -> bool {
+    check_arg_types(dfg, dfg.inst_variable_args(inst), &sig.returns)
+}
+
+/// Insert ABI conversion code for the arguments to the call or return instruction at `pos`.
+///
+/// - `abi_args` is the number of arguments that the ABI signature requires.
+/// - `get_abi_type` is a closure that can provide the desired `AbiParam` for a given ABI
+///   argument number in `0..abi_args`.
+///
+fn legalize_inst_arguments<ArgType>(
+    pos: &mut FuncCursor,
+    cfg: &ControlFlowGraph,
+    abi_args: usize,
+    mut get_abi_type: ArgType,
+) where
+    ArgType: FnMut(&Function, usize) -> AbiParam,
+{
+    let inst = pos.current_inst().expect(
+        "Cursor must point to a call instruction",
+    );
+
+    // Lift the value list out of the call instruction so we modify it.
+    let mut vlist = pos.func.dfg[inst].take_value_list().expect(
+        "Call must have a value list",
+    );
+
+    // The value list contains all arguments to the instruction, including the callee on an
+    // indirect call which isn't part of the call arguments that must match the ABI signature.
+    // Figure out how many fixed values are at the front of the list. We won't touch those.
+    let fixed_values = pos.func.dfg[inst]
+        .opcode()
+        .constraints()
+        .fixed_value_arguments();
+    let have_args = vlist.len(&pos.func.dfg.value_lists) - fixed_values;
+
+    // Grow the value list to the right size and shift all the existing arguments to the right.
+    // This lets us write the new argument values into the list without overwriting the old
+    // arguments.
+    //
+    // Before:
+    //
+    //    <-->              fixed_values
+    //        <-----------> have_args
+    //   [FFFFOOOOOOOOOOOOO]
+    //
+    // After grow_at():
+    //
+    //    <-->                     fixed_values
+    //               <-----------> have_args
+    //        <------------------> abi_args
+    //   [FFFF-------OOOOOOOOOOOOO]
+    //               ^
+    //               old_arg_offset
+    //
+    // After writing the new arguments:
+    //
+    //    <-->                     fixed_values
+    //        <------------------> abi_args
+    //   [FFFFNNNNNNNNNNNNNNNNNNNN]
+    //
+    vlist.grow_at(
+        fixed_values,
+        abi_args - have_args,
+        &mut pos.func.dfg.value_lists,
+    );
+    let old_arg_offset = fixed_values + abi_args - have_args;
+
+    let mut abi_arg = 0;
+    for old_arg in 0..have_args {
+        let old_value = vlist
+            .get(old_arg_offset + old_arg, &pos.func.dfg.value_lists)
+            .unwrap();
+        let mut put_arg = |func: &mut Function, arg| {
+            let abi_type = get_abi_type(func, abi_arg);
+            if func.dfg.value_type(arg) == abi_type.value_type {
+                // This is the argument type we need.
+                vlist.as_mut_slice(&mut func.dfg.value_lists)[fixed_values + abi_arg] = arg;
+                abi_arg += 1;
+                Ok(())
+            } else {
+                Err(abi_type)
+            }
+        };
+        convert_to_abi(pos, cfg, old_value, &mut put_arg);
+    }
+
+    // Put the modified value list back.
+    pos.func.dfg[inst].put_value_list(vlist);
+}
+
+/// Insert ABI conversion code before and after the call instruction at `pos`.
+///
+/// Instructions inserted before the call will compute the appropriate ABI values for the
+/// callee's new ABI-legalized signature. The function call arguments are rewritten in place to
+/// match the new signature.
+///
+/// Instructions will be inserted after the call to convert returned ABI values back to the
+/// original return values. The call's result values will be adapted to match the new signature.
+///
+/// Returns `true` if any instructions were inserted.
+pub fn handle_call_abi(mut inst: Inst, func: &mut Function, cfg: &ControlFlowGraph) -> bool {
+    let pos = &mut FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Start by checking if the argument types already match the signature.
+    let sig_ref = match check_call_signature(&pos.func.dfg, inst) {
+        Ok(_) => return spill_call_arguments(pos),
+        Err(s) => s,
+    };
+
+    // OK, we need to fix the call arguments to match the ABI signature.
+    let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
+    legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
+        func.dfg.signatures[sig_ref].params[abi_arg]
+    });
+
+    if !pos.func.dfg.signatures[sig_ref].returns.is_empty() {
+        inst = legalize_inst_results(pos, |func, abi_res| {
+            func.dfg.signatures[sig_ref].returns[abi_res]
+        });
+    }
+
+    debug_assert!(
+        check_call_signature(&pos.func.dfg, inst).is_ok(),
+        "Signature still wrong: {}, {}{}",
+        pos.func.dfg.display_inst(inst, None),
+        sig_ref,
+        pos.func.dfg.signatures[sig_ref]
+    );
+
+    // Go back and insert spills for any stack arguments.
+    pos.goto_inst(inst);
+    spill_call_arguments(pos);
+
+    // Yes, we changed stuff.
+    true
+}
+
+/// Insert ABI conversion code before and after the return instruction at `inst`.
+///
+/// Return `true` if any instructions were inserted.
+pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph) -> bool {
+    // Check if the returned types already match the signature.
+    if check_return_signature(&func.dfg, inst, &func.signature) {
+        return false;
+    }
+
+    // Count the special-purpose return values (`link`, `sret`, and `vmctx`) that were appended to
+    // the legalized signature.
+    let special_args = func.signature
+        .returns
+        .iter()
+        .rev()
+        .take_while(|&rt| {
+            rt.purpose == ArgumentPurpose::Link || rt.purpose == ArgumentPurpose::StructReturn ||
+                rt.purpose == ArgumentPurpose::VMContext
+        })
+        .count();
+    let abi_args = func.signature.returns.len() - special_args;
+
+    let pos = &mut FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
+        func.signature.returns[abi_arg]
+    });
+    debug_assert_eq!(pos.func.dfg.inst_variable_args(inst).len(), abi_args);
+
+    // Append special return arguments for any `sret`, `link`, and `vmctx` return values added to
+    // the legalized signature. These values should simply be propagated from the entry block
+    // arguments.
+    if special_args > 0 {
+        dbg!(
+            "Adding {} special-purpose arguments to {}",
+            special_args,
+            pos.func.dfg.display_inst(inst, None)
+        );
+        let mut vlist = pos.func.dfg[inst].take_value_list().unwrap();
+        for arg in &pos.func.signature.returns[abi_args..] {
+            match arg.purpose {
+                ArgumentPurpose::Link |
+                ArgumentPurpose::StructReturn |
+                ArgumentPurpose::VMContext => {}
+                ArgumentPurpose::Normal => panic!("unexpected return value {}", arg),
+                _ => panic!("Unsupported special purpose return value {}", arg),
+            }
+            // A `link`/`sret`/`vmctx` return value can only appear in a signature that has a
+            // unique matching argument. They are appended at the end, so search the signature from
+            // the end.
+            let idx = pos.func
+                .signature
+                .params
+                .iter()
+                .rposition(|t| t.purpose == arg.purpose)
+                .expect("No matching special purpose argument.");
+            // Get the corresponding entry block value and add it to the return instruction's
+            // arguments.
+            let val = pos.func.dfg.ebb_params(
+                pos.func.layout.entry_block().unwrap(),
+            )
+                [idx];
+            debug_assert_eq!(pos.func.dfg.value_type(val), arg.value_type);
+            vlist.push(val, &mut pos.func.dfg.value_lists);
+        }
+        pos.func.dfg[inst].put_value_list(vlist);
+    }
+
+    debug_assert!(
+        check_return_signature(&pos.func.dfg, inst, &pos.func.signature),
+        "Signature still wrong: {} / signature {}",
+        pos.func.dfg.display_inst(inst, None),
+        pos.func.signature
+    );
+
+    // Yes, we changed stuff.
+    true
+}
+
+/// Assign stack slots to incoming function parameters on the stack.
+///
+/// Values that are passed into the function on the stack must be assigned to an `IncomingArg`
+/// stack slot already during legalization.
+fn spill_entry_params(func: &mut Function, entry: Ebb) {
+    for (abi, &arg) in func.signature.params.iter().zip(func.dfg.ebb_params(entry)) {
+        if let ArgumentLoc::Stack(offset) = abi.location {
+            let ss = func.stack_slots.make_incoming_arg(abi.value_type, offset);
+            func.locations[arg] = ValueLoc::Stack(ss);
+        }
+    }
+}
+
+/// Assign stack slots to outgoing function arguments on the stack.
+///
+/// Values that are passed to a called function on the stack must be assigned to a matching
+/// `OutgoingArg` stack slot. The assignment must happen immediately before the call.
+///
+/// TODO: The outgoing stack slots can be written a bit earlier, as long as there are no branches
+/// or calls between writing the stack slots and the call instruction. Writing the slots earlier
+/// could help reduce register pressure before the call.
+fn spill_call_arguments(pos: &mut FuncCursor) -> bool {
+    let inst = pos.current_inst().expect(
+        "Cursor must point to a call instruction",
+    );
+    let sig_ref = pos.func.dfg.call_signature(inst).expect(
+        "Call instruction expected.",
+    );
+
+    // Start by building a list of stack slots and arguments to be replaced.
+    // This requires borrowing `pos.func.dfg`, so we can't change anything.
+    let arglist = {
+        let locations = &pos.func.locations;
+        let stack_slots = &mut pos.func.stack_slots;
+        pos.func
+            .dfg
+            .inst_variable_args(inst)
+            .iter()
+            .zip(&pos.func.dfg.signatures[sig_ref].params)
+            .enumerate()
+            .filter_map(|(idx, (&arg, abi))| {
+                match abi.location {
+                    ArgumentLoc::Stack(offset) => {
+                        // Assign `arg` to a new stack slot, unless it's already in the correct
+                        // slot. The legalization needs to be idempotent, so we should see a
+                        // correct outgoing slot on the second pass.
+                        let ss = stack_slots.get_outgoing_arg(abi.value_type, offset);
+                        if locations[arg] != ValueLoc::Stack(ss) {
+                            Some((idx, arg, ss))
+                        } else {
+                            None
+                        }
+                    }
+                    _ => None,
+                }
+            })
+            .collect::<Vec<_>>()
+    };
+
+    if arglist.is_empty() {
+        return false;
+    }
+
+    // Insert the spill instructions and rewrite call arguments.
+    for (idx, arg, ss) in arglist {
+        let stack_val = pos.ins().spill(arg);
+        pos.func.locations[stack_val] = ValueLoc::Stack(ss);
+        pos.func.dfg.inst_variable_args_mut(inst)[idx] = stack_val;
+    }
+
+    // We changed stuff.
+    true
+}
--- a/lib/codegen/src/legalizer/call.rs
+++ b/lib/codegen/src/legalizer/call.rs
@@ -0,0 +1,60 @@
+//! Legalization of calls.
+//!
+//! This module exports the `expand_call` function which transforms a `call`
+//! instruction into `func_addr` and `call_indirect` instructions.
+
+use cursor::{Cursor, FuncCursor};
+use flowgraph::ControlFlowGraph;
+use ir::{self, InstBuilder};
+use isa::TargetIsa;
+
+/// Expand a `call` instruction.
+pub fn expand_call(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    isa: &TargetIsa,
+) {
+    // Unpack the instruction.
+    let (func_ref, old_args) = match func.dfg[inst] {
+        ir::InstructionData::Call {
+            opcode,
+            ref args,
+            func_ref,
+        } => {
+            debug_assert_eq!(opcode, ir::Opcode::Call);
+            (func_ref, args.clone())
+        }
+        _ => panic!("Wanted call: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    let ptr_ty = if isa.flags().is_64bit() {
+        ir::types::I64
+    } else {
+        ir::types::I32
+    };
+
+    let sig = func.dfg.ext_funcs[func_ref].signature;
+
+    let callee = {
+        let mut pos = FuncCursor::new(func).at_inst(inst);
+        pos.use_srcloc(inst);
+        pos.ins().func_addr(ptr_ty, func_ref)
+    };
+
+    let mut new_args = ir::ValueList::default();
+    new_args.push(callee, &mut func.dfg.value_lists);
+    for i in 0..old_args.len(&func.dfg.value_lists) {
+        new_args.push(
+            old_args.as_slice(&func.dfg.value_lists)[i],
+            &mut func.dfg.value_lists,
+        );
+    }
+
+    func.dfg.replace(inst).CallIndirect(
+        ir::Opcode::CallIndirect,
+        ptr_ty,
+        sig,
+        new_args,
+    );
+}
--- a/lib/codegen/src/legalizer/globalvar.rs
+++ b/lib/codegen/src/legalizer/globalvar.rs
@@ -0,0 +1,67 @@
+//! Legalization of global variables.
+//!
+//! This module exports the `expand_global_addr` function which transforms a `global_addr`
+//! instruction into code that depends on the kind of global variable referenced.
+
+use cursor::{Cursor, FuncCursor};
+use flowgraph::ControlFlowGraph;
+use ir::{self, InstBuilder};
+use isa::TargetIsa;
+
+/// Expand a `global_addr` instruction according to the definition of the global variable.
+pub fn expand_global_addr(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    _isa: &TargetIsa,
+) {
+    // Unpack the instruction.
+    let gv = match func.dfg[inst] {
+        ir::InstructionData::UnaryGlobalVar { opcode, global_var } => {
+            debug_assert_eq!(opcode, ir::Opcode::GlobalAddr);
+            global_var
+        }
+        _ => panic!("Wanted global_addr: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    match func.global_vars[gv] {
+        ir::GlobalVarData::VMContext { offset } => vmctx_addr(inst, func, offset.into()),
+        ir::GlobalVarData::Deref { base, offset } => deref_addr(inst, func, base, offset.into()),
+        ir::GlobalVarData::Sym { .. } => globalsym(inst, func, gv),
+    }
+}
+
+/// Expand a `global_addr` instruction for a vmctx global.
+fn vmctx_addr(inst: ir::Inst, func: &mut ir::Function, offset: i64) {
+    // Get the value representing the `vmctx` argument.
+    let vmctx = func.special_param(ir::ArgumentPurpose::VMContext).expect(
+        "Missing vmctx parameter",
+    );
+
+    // Simply replace the `global_addr` instruction with an `iadd_imm`, reusing the result value.
+    func.dfg.replace(inst).iadd_imm(vmctx, offset);
+}
+
+/// Expand a `global_addr` instruction for a deref global.
+fn deref_addr(inst: ir::Inst, func: &mut ir::Function, base: ir::GlobalVar, offset: i64) {
+    // We need to load a pointer from the `base` global variable, so insert a new `global_addr`
+    // instruction. This depends on the iterative legalization loop. Note that the IR verifier
+    // detects any cycles in the `deref` globals.
+    let ptr_ty = func.dfg.value_type(func.dfg.first_result(inst));
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    let base_addr = pos.ins().global_addr(ptr_ty, base);
+    let mut mflags = ir::MemFlags::new();
+    // Deref globals are required to be accessible and aligned.
+    mflags.set_notrap();
+    mflags.set_aligned();
+    let base_ptr = pos.ins().load(ptr_ty, mflags, base_addr, 0);
+    pos.func.dfg.replace(inst).iadd_imm(base_ptr, offset);
+}
+
+/// Expand a `global_addr` instruction for a symbolic name global.
+fn globalsym(inst: ir::Inst, func: &mut ir::Function, gv: ir::GlobalVar) {
+    let ptr_ty = func.dfg.value_type(func.dfg.first_result(inst));
+    func.dfg.replace(inst).globalsym_addr(ptr_ty, gv);
+}
--- a/lib/codegen/src/legalizer/heap.rs
+++ b/lib/codegen/src/legalizer/heap.rs
@@ -0,0 +1,190 @@
+//! Legalization of heaps.
+//!
+//! This module exports the `expand_heap_addr` function which transforms a `heap_addr`
+//! instruction into code that depends on the kind of heap referenced.
+
+use cursor::{Cursor, FuncCursor};
+use flowgraph::ControlFlowGraph;
+use ir::condcodes::IntCC;
+use ir::{self, InstBuilder, MemFlags};
+use isa::TargetIsa;
+
+/// Expand a `heap_addr` instruction according to the definition of the heap.
+pub fn expand_heap_addr(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &TargetIsa,
+) {
+    // Unpack the instruction.
+    let (heap, offset, size) = match func.dfg[inst] {
+        ir::InstructionData::HeapAddr {
+            opcode,
+            heap,
+            arg,
+            imm,
+        } => {
+            debug_assert_eq!(opcode, ir::Opcode::HeapAddr);
+            (heap, arg, imm.into())
+        }
+        _ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    match func.heaps[heap].style {
+        ir::HeapStyle::Dynamic { bound_gv } => {
+            dynamic_addr(inst, heap, offset, size, bound_gv, func)
+        }
+        ir::HeapStyle::Static { bound } => {
+            static_addr(inst, heap, offset, size, bound.into(), func, cfg)
+        }
+    }
+}
+
+/// Expand a `heap_addr` for a dynamic heap.
+fn dynamic_addr(
+    inst: ir::Inst,
+    heap: ir::Heap,
+    offset: ir::Value,
+    size: u32,
+    bound_gv: ir::GlobalVar,
+    func: &mut ir::Function,
+) {
+    let size = i64::from(size);
+    let offset_ty = func.dfg.value_type(offset);
+    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
+    let min_size = func.heaps[heap].min_size.into();
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Start with the bounds check. Trap if `offset + size > bound`.
+    let bound_addr = pos.ins().global_addr(addr_ty, bound_gv);
+    let mut mflags = MemFlags::new();
+    // The bound variable is requied to be accessible and aligned.
+    mflags.set_notrap();
+    mflags.set_aligned();
+    let bound = pos.ins().load(offset_ty, mflags, bound_addr, 0);
+
+    let oob;
+    if size == 1 {
+        // `offset > bound - 1` is the same as `offset >= bound`.
+        oob = pos.ins().icmp(
+            IntCC::UnsignedGreaterThanOrEqual,
+            offset,
+            bound,
+        );
+    } else if size <= min_size {
+        // We know that bound >= min_size, so here we can compare `offset > bound - size` without
+        // wrapping.
+        let adj_bound = pos.ins().iadd_imm(bound, -size);
+        oob = pos.ins().icmp(
+            IntCC::UnsignedGreaterThan,
+            offset,
+            adj_bound,
+        );
+    } else {
+        // We need an overflow check for the adjusted offset.
+        let size_val = pos.ins().iconst(offset_ty, size);
+        let (adj_offset, overflow) = pos.ins().iadd_cout(offset, size_val);
+        pos.ins().trapnz(overflow, ir::TrapCode::HeapOutOfBounds);
+        oob = pos.ins().icmp(
+            IntCC::UnsignedGreaterThan,
+            adj_offset,
+            bound,
+        );
+    }
+    pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
+
+    offset_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
+}
+
+/// Expand a `heap_addr` for a static heap.
+fn static_addr(
+    inst: ir::Inst,
+    heap: ir::Heap,
+    offset: ir::Value,
+    size: u32,
+    bound: i64,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+) {
+    let size = i64::from(size);
+    let offset_ty = func.dfg.value_type(offset);
+    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Start with the bounds check. Trap if `offset + size > bound`.
+    if size > bound {
+        // This will simply always trap since `offset >= 0`.
+        pos.ins().trap(ir::TrapCode::HeapOutOfBounds);
+        pos.func.dfg.replace(inst).iconst(addr_ty, 0);
+
+        // Split Ebb, as the trap is a terminator instruction.
+        let curr_ebb = pos.current_ebb().expect("Cursor is not in an ebb");
+        let new_ebb = pos.func.dfg.make_ebb();
+        pos.insert_ebb(new_ebb);
+        cfg.recompute_ebb(pos.func, curr_ebb);
+        cfg.recompute_ebb(pos.func, new_ebb);
+        return;
+    }
+
+    // Check `offset > limit` which is now known non-negative.
+    let limit = bound - size;
+
+    // We may be able to omit the check entirely for 32-bit offsets if the heap bound is 4 GB or
+    // more.
+    if offset_ty != ir::types::I32 || limit < 0xffff_ffff {
+        let oob = if limit & 1 == 1 {
+            // Prefer testing `offset >= limit - 1` when limit is odd because an even number is
+            // likely to be a convenient constant on ARM and other RISC architectures.
+            pos.ins().icmp_imm(
+                IntCC::UnsignedGreaterThanOrEqual,
+                offset,
+                limit - 1,
+            )
+        } else {
+            pos.ins().icmp_imm(
+                IntCC::UnsignedGreaterThan,
+                offset,
+                limit,
+            )
+        };
+        pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
+    }
+
+    offset_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
+}
+
+/// Emit code for the base address computation of a `heap_addr` instruction.
+///
+///
+fn offset_addr(
+    inst: ir::Inst,
+    heap: ir::Heap,
+    addr_ty: ir::Type,
+    mut offset: ir::Value,
+    offset_ty: ir::Type,
+    func: &mut ir::Function,
+) {
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Convert `offset` to `addr_ty`.
+    if offset_ty != addr_ty {
+        offset = pos.ins().uextend(addr_ty, offset);
+    }
+
+    // Add the heap base address base
+    match pos.func.heaps[heap].base {
+        ir::HeapBase::ReservedReg => unimplemented!(),
+        ir::HeapBase::GlobalVar(base_gv) => {
+            let base_addr = pos.ins().global_addr(addr_ty, base_gv);
+            let mut mflags = MemFlags::new();
+            // The base address variable is requied to be accessible and aligned.
+            mflags.set_notrap();
+            mflags.set_aligned();
+            let base = pos.ins().load(addr_ty, mflags, base_addr, 0);
+            pos.func.dfg.replace(inst).iadd(base, offset);
+        }
+    }
+}
--- a/lib/codegen/src/legalizer/libcall.rs
+++ b/lib/codegen/src/legalizer/libcall.rs
@@ -0,0 +1,63 @@
+//! Expanding instructions as runtime library calls.
+
+use ir;
+use ir::InstBuilder;
+
+/// Try to expand `inst` as a library call, returning true is successful.
+pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function) -> bool {
+    // Does the opcode/ctrl_type combo even have a well-known runtime library name.
+    let libcall =
+        match ir::LibCall::for_inst(func.dfg[inst].opcode(), func.dfg.ctrl_typevar(inst)) {
+            Some(lc) => lc,
+            None => return false,
+        };
+
+    let funcref = find_funcref(libcall, func).unwrap_or_else(|| make_funcref(libcall, inst, func));
+
+    // Now we convert `inst` to a call. First save the arguments.
+    let mut args = vec![];
+    args.extend_from_slice(func.dfg.inst_args(inst));
+    // The replace builder will preserve the instruction result values.
+    func.dfg.replace(inst).call(funcref, &args);
+
+    // TODO: ask the ISA to legalize the signature.
+
+    true
+}
+
+/// Get the existing function reference for `libcall` in `func` if it exists.
+fn find_funcref(libcall: ir::LibCall, func: &ir::Function) -> Option<ir::FuncRef> {
+    // We're assuming that all libcall function decls are at the end.
+    // If we get this wrong, worst case we'll have duplicate libcall decls which is harmless.
+    for (fref, func_data) in func.dfg.ext_funcs.iter().rev() {
+        match func_data.name {
+            ir::ExternalName::LibCall(lc) => {
+                if lc == libcall {
+                    return Some(fref);
+                }
+            }
+            _ => break,
+        }
+    }
+    None
+}
+
+/// Create a funcref for `libcall` with a signature matching `inst`.
+fn make_funcref(libcall: ir::LibCall, inst: ir::Inst, func: &mut ir::Function) -> ir::FuncRef {
+    // Start with a system_v calling convention. We'll give the ISA a chance to change it.
+    let mut sig = ir::Signature::new(ir::CallConv::SystemV);
+    for &v in func.dfg.inst_args(inst) {
+        sig.params.push(ir::AbiParam::new(func.dfg.value_type(v)));
+    }
+    for &v in func.dfg.inst_results(inst) {
+        sig.returns.push(ir::AbiParam::new(func.dfg.value_type(v)));
+    }
+    let sigref = func.import_signature(sig);
+
+    // TODO: Can libcalls be colocated in some circumstances?
+    func.import_function(ir::ExtFuncData {
+        name: ir::ExternalName::LibCall(libcall),
+        signature: sigref,
+        colocated: false,
+    })
+}
--- a/lib/codegen/src/legalizer/mod.rs
+++ b/lib/codegen/src/legalizer/mod.rs
@@ -0,0 +1,302 @@
+//! Legalize instructions.
+//!
+//! A legal instruction is one that can be mapped directly to a machine code instruction for the
+//! target ISA. The `legalize_function()` function takes as input any function and transforms it
+//! into an equivalent function using only legal instructions.
+//!
+//! The characteristics of legal instructions depend on the target ISA, so any given instruction
+//! can be legal for one ISA and illegal for another.
+//!
+//! Besides transforming instructions, the legalizer also fills out the `function.encodings` map
+//! which provides a legal encoding recipe for every instruction.
+//!
+//! The legalizer does not deal with register allocation constraints. These constraints are derived
+//! from the encoding recipes, and solved later by the register allocator.
+
+use bitset::BitSet;
+use cursor::{Cursor, FuncCursor};
+use flowgraph::ControlFlowGraph;
+use ir::{self, InstBuilder};
+use isa::TargetIsa;
+use timing;
+
+mod boundary;
+mod call;
+mod globalvar;
+mod heap;
+mod libcall;
+mod split;
+
+use self::globalvar::expand_global_addr;
+use self::heap::expand_heap_addr;
+use self::call::expand_call;
+use self::libcall::expand_as_libcall;
+
+/// Legalize `func` for `isa`.
+///
+/// - Transform any instructions that don't have a legal representation in `isa`.
+/// - Fill out `func.encodings`.
+///
+pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &TargetIsa) {
+    let _tt = timing::legalize();
+    debug_assert!(cfg.is_valid());
+
+    boundary::legalize_signatures(func, isa);
+
+    func.encodings.resize(func.dfg.num_insts());
+
+    let mut pos = FuncCursor::new(func);
+
+    // Process EBBs in layout order. Some legalization actions may split the current EBB or append
+    // new ones to the end. We need to make sure we visit those new EBBs too.
+    while let Some(_ebb) = pos.next_ebb() {
+        // Keep track of the cursor position before the instruction being processed, so we can
+        // double back when replacing instructions.
+        let mut prev_pos = pos.position();
+
+        while let Some(inst) = pos.next_inst() {
+            let opcode = pos.func.dfg[inst].opcode();
+
+            // Check for ABI boundaries that need to be converted to the legalized signature.
+            if opcode.is_call() {
+                if boundary::handle_call_abi(inst, pos.func, cfg) {
+                    // Go back and legalize the inserted argument conversion instructions.
+                    pos.set_position(prev_pos);
+                    continue;
+                }
+            } else if opcode.is_return() {
+                if boundary::handle_return_abi(inst, pos.func, cfg) {
+                    // Go back and legalize the inserted return value conversion instructions.
+                    pos.set_position(prev_pos);
+                    continue;
+                }
+            } else if opcode.is_branch() {
+                split::simplify_branch_arguments(&mut pos.func.dfg, inst);
+            }
+
+            match pos.func.update_encoding(inst, isa) {
+                Ok(()) => {}
+                Err(action) => {
+                    // We should transform the instruction into legal equivalents.
+                    let changed = action(inst, pos.func, cfg, isa);
+                    // If the current instruction was replaced, we need to double back and revisit
+                    // the expanded sequence. This is both to assign encodings and possible to
+                    // expand further.
+                    // There's a risk of infinite looping here if the legalization patterns are
+                    // unsound. Should we attempt to detect that?
+                    if changed {
+                        pos.set_position(prev_pos);
+                        continue;
+                    }
+
+                    // We don't have any pattern expansion for this instruction either.
+                    // Try converting it to a library call as a last resort.
+                    if expand_as_libcall(inst, pos.func) {
+                        pos.set_position(prev_pos);
+                        continue;
+                    }
+                }
+            }
+
+            // Remember this position in case we need to double back.
+            prev_pos = pos.position();
+        }
+    }
+}
+
+// Include legalization patterns that were generated by `gen_legalizer.py` from the `XForms` in
+// `lib/codegen/meta/base/legalize.py`.
+//
+// Concretely, this defines private functions `narrow()`, and `expand()`.
+include!(concat!(env!("OUT_DIR"), "/legalizer.rs"));
+
+/// Custom expansion for conditional trap instructions.
+/// TODO: Add CFG support to the Python patterns so we won't have to do this.
+fn expand_cond_trap(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &TargetIsa,
+) {
+    // Parse the instruction.
+    let trapz;
+    let (arg, code) = match func.dfg[inst] {
+        ir::InstructionData::CondTrap { opcode, arg, code } => {
+            // We want to branch *over* an unconditional trap.
+            trapz = match opcode {
+                ir::Opcode::Trapz => true,
+                ir::Opcode::Trapnz => false,
+                _ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst, None)),
+            };
+            (arg, code)
+        }
+        _ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    // Split the EBB after `inst`:
+    //
+    //     trapnz arg
+    //
+    // Becomes:
+    //
+    //     brz arg, new_ebb
+    //     trap
+    //   new_ebb:
+    //
+    let old_ebb = func.layout.pp_ebb(inst);
+    let new_ebb = func.dfg.make_ebb();
+    if trapz {
+        func.dfg.replace(inst).brnz(arg, new_ebb, &[]);
+    } else {
+        func.dfg.replace(inst).brz(arg, new_ebb, &[]);
+    }
+
+    let mut pos = FuncCursor::new(func).after_inst(inst);
+    pos.use_srcloc(inst);
+    pos.ins().trap(code);
+    pos.insert_ebb(new_ebb);
+
+    // Finally update the CFG.
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, new_ebb);
+}
+
+/// Jump tables.
+fn expand_br_table(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &TargetIsa,
+) {
+    use ir::condcodes::IntCC;
+
+    let (arg, table) = match func.dfg[inst] {
+        ir::InstructionData::BranchTable {
+            opcode: ir::Opcode::BrTable,
+            arg,
+            table,
+        } => (arg, table),
+        _ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    // This is a poor man's jump table using just a sequence of conditional branches.
+    // TODO: Lower into a jump table load and indirect branch.
+    let table_size = func.jump_tables[table].len();
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    for i in 0..table_size {
+        if let Some(dest) = pos.func.jump_tables[table].get_entry(i) {
+            let t = pos.ins().icmp_imm(IntCC::Equal, arg, i as i64);
+            pos.ins().brnz(t, dest, &[]);
+        }
+    }
+
+    // `br_table` falls through when nothing matches.
+    let ebb = pos.current_ebb().unwrap();
+    pos.remove_inst();
+    cfg.recompute_ebb(pos.func, ebb);
+}
+
+/// Expand the select instruction.
+///
+/// Conditional moves are available in some ISAs for some register classes. The remaining selects
+/// are handled by a branch.
+fn expand_select(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &TargetIsa,
+) {
+    let (ctrl, tval, fval) = match func.dfg[inst] {
+        ir::InstructionData::Ternary {
+            opcode: ir::Opcode::Select,
+            args,
+        } => (args[0], args[1], args[2]),
+        _ => panic!("Expected select: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    // Replace `result = select ctrl, tval, fval` with:
+    //
+    //   brnz ctrl, new_ebb(tval)
+    //   jump new_ebb(fval)
+    // new_ebb(result):
+    let old_ebb = func.layout.pp_ebb(inst);
+    let result = func.dfg.first_result(inst);
+    func.dfg.clear_results(inst);
+    let new_ebb = func.dfg.make_ebb();
+    func.dfg.attach_ebb_param(new_ebb, result);
+
+    func.dfg.replace(inst).brnz(ctrl, new_ebb, &[tval]);
+    let mut pos = FuncCursor::new(func).after_inst(inst);
+    pos.use_srcloc(inst);
+    pos.ins().jump(new_ebb, &[fval]);
+    pos.insert_ebb(new_ebb);
+
+    cfg.recompute_ebb(pos.func, new_ebb);
+    cfg.recompute_ebb(pos.func, old_ebb);
+}
+
+/// Expand illegal `f32const` and `f64const` instructions.
+fn expand_fconst(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    _isa: &TargetIsa,
+) {
+    let ty = func.dfg.value_type(func.dfg.first_result(inst));
+    debug_assert!(!ty.is_vector(), "Only scalar fconst supported: {}", ty);
+
+    // In the future, we may want to generate constant pool entries for these constants, but for
+    // now use an `iconst` and a bit cast.
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+    let ival = match pos.func.dfg[inst] {
+        ir::InstructionData::UnaryIeee32 {
+            opcode: ir::Opcode::F32const,
+            imm,
+        } => pos.ins().iconst(ir::types::I32, i64::from(imm.bits())),
+        ir::InstructionData::UnaryIeee64 {
+            opcode: ir::Opcode::F64const,
+            imm,
+        } => pos.ins().iconst(ir::types::I64, imm.bits() as i64),
+        _ => panic!("Expected fconst: {}", pos.func.dfg.display_inst(inst, None)),
+    };
+    pos.func.dfg.replace(inst).bitcast(ty, ival);
+}
+
+/// Expand the stack check instruction.
+pub fn expand_stack_check(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    isa: &TargetIsa,
+) {
+    use ir::condcodes::IntCC;
+
+    let gv = match func.dfg[inst] {
+        ir::InstructionData::UnaryGlobalVar { global_var, .. } => global_var,
+        _ => panic!("Want stack_check: {}", func.dfg.display_inst(inst, isa)),
+    };
+    let ptr_ty = if isa.flags().is_64bit() {
+        ir::types::I64
+    } else {
+        ir::types::I32
+    };
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    let limit_addr = pos.ins().global_addr(ptr_ty, gv);
+
+    let mut mflags = ir::MemFlags::new();
+    mflags.set_aligned();
+    mflags.set_notrap();
+    let limit = pos.ins().load(ptr_ty, mflags, limit_addr, 0);
+    let cflags = pos.ins().ifcmp_sp(limit);
+    pos.func.dfg.replace(inst).trapif(
+        IntCC::UnsignedGreaterThanOrEqual,
+        cflags,
+        ir::TrapCode::StackOverflow,
+    );
+}
--- a/lib/codegen/src/legalizer/split.rs
+++ b/lib/codegen/src/legalizer/split.rs
@@ -0,0 +1,342 @@
+//! Value splitting.
+//!
+//! Some value types are too large to fit in registers, so they need to be split into smaller parts
+//! that the ISA can operate on. There's two dimensions of splitting, represented by two
+//! complementary instruction pairs:
+//!
+//! - `isplit` and `iconcat` for splitting integer types into smaller integers.
+//! - `vsplit` and `vconcat` for splitting vector types into smaller vector types with the same
+//!   lane types.
+//!
+//! There is no floating point splitting. If an ISA doesn't support `f64` values, they probably
+//! have to be bit-cast to `i64` and possibly split into two `i32` values that fit in registers.
+//! This breakdown is handled by the ABI lowering.
+//!
+//! When legalizing a single instruction, it is wrapped in splits and concatenations:
+//!
+//!```cton
+//!     v1 = bxor.i64 v2, v3
+//! ```
+//!
+//! becomes:
+//!
+//!```cton
+//!     v20, v21 = isplit v2
+//!     v30, v31 = isplit v3
+//!     v10 = bxor.i32 v20, v30
+//!     v11 = bxor.i32 v21, v31
+//!     v1 = iconcat v10, v11
+//! ```
+//!
+//! This local expansion approach still leaves the original `i64` values in the code as operands on
+//! the `split` and `concat` instructions. It also creates a lot of redundant code to clean up as
+//! values are constantly split and concatenated.
+//!
+//! # Optimized splitting
+//!
+//! We can eliminate a lot of the splitting code quite easily. Whenever we need to split a value,
+//! first check if the value is defined by the corresponding concatenation. If so, then just use
+//! the two concatenation inputs directly:
+//!
+//! ```cton
+//!     v4 = iadd_imm.i64 v1, 1
+//! ```
+//!
+//! becomes, using the expanded code from above:
+//!
+//! ```cton
+//!     v40, v5 = iadd_imm_cout.i32 v10, 1
+//!     v6 = bint.i32
+//!     v41 = iadd.i32 v11, v6
+//!     v4 = iconcat v40, v41
+//! ```
+//!
+//! This means that the `iconcat` instructions defining `v1` and `v4` end up with no uses, so they
+//! can be trivially deleted by a dead code elimination pass.
+//!
+//! # EBB arguments
+//!
+//! If all instructions that produce an `i64` value are legalized as above, we will eventually end
+//! up with no `i64` values anywhere, except for EBB arguments. We can work around this by
+//! iteratively splitting EBB arguments too. That should leave us with no illegal value types
+//! anywhere.
+//!
+//! It is possible to have circular dependencies of EBB arguments that are never used by any real
+//! instructions. These loops will remain in the program.
+
+use cursor::{Cursor, CursorPosition, FuncCursor};
+use flowgraph::ControlFlowGraph;
+use ir::{self, Ebb, Inst, InstBuilder, InstructionData, Opcode, Type, Value, ValueDef};
+use std::iter;
+use std::vec::Vec;
+
+/// Split `value` into two values using the `isplit` semantics. Do this by reusing existing values
+/// if possible.
+pub fn isplit(
+    func: &mut ir::Function,
+    cfg: &ControlFlowGraph,
+    pos: CursorPosition,
+    srcloc: ir::SourceLoc,
+    value: Value,
+) -> (Value, Value) {
+    split_any(func, cfg, pos, srcloc, value, Opcode::Iconcat)
+}
+
+/// Split `value` into halves using the `vsplit` semantics. Do this by reusing existing values if
+/// possible.
+pub fn vsplit(
+    func: &mut ir::Function,
+    cfg: &ControlFlowGraph,
+    pos: CursorPosition,
+    srcloc: ir::SourceLoc,
+    value: Value,
+) -> (Value, Value) {
+    split_any(func, cfg, pos, srcloc, value, Opcode::Vconcat)
+}
+
+/// After splitting an EBB argument, we need to go back and fix up all of the predecessor
+/// instructions. This is potentially a recursive operation, but we don't implement it recursively
+/// since that could use up too muck stack.
+///
+/// Instead, the repairs are deferred and placed on a work list in stack form.
+struct Repair {
+    concat: Opcode,
+    // The argument type after splitting.
+    split_type: Type,
+    // The destination EBB whose arguments have been split.
+    ebb: Ebb,
+    // Number of the original EBB argument which has been replaced by the low part.
+    num: usize,
+    // Number of the new EBB argument which represents the high part after the split.
+    hi_num: usize,
+}
+
+/// Generic version of `isplit` and `vsplit` controlled by the `concat` opcode.
+fn split_any(
+    func: &mut ir::Function,
+    cfg: &ControlFlowGraph,
+    pos: CursorPosition,
+    srcloc: ir::SourceLoc,
+    value: Value,
+    concat: Opcode,
+) -> (Value, Value) {
+    let mut repairs = Vec::new();
+    let pos = &mut FuncCursor::new(func).at_position(pos).with_srcloc(srcloc);
+    let result = split_value(pos, value, concat, &mut repairs);
+
+    // We have split the value requested, and now we may need to fix some EBB predecessors.
+    while let Some(repair) = repairs.pop() {
+        for (_, inst) in cfg.pred_iter(repair.ebb) {
+            let branch_opc = pos.func.dfg[inst].opcode();
+            debug_assert!(
+                branch_opc.is_branch(),
+                "Predecessor not a branch: {}",
+                pos.func.dfg.display_inst(inst, None)
+            );
+            let fixed_args = branch_opc.constraints().fixed_value_arguments();
+            let mut args = pos.func.dfg[inst].take_value_list().expect(
+                "Branches must have value lists.",
+            );
+            let num_args = args.len(&pos.func.dfg.value_lists);
+            // Get the old value passed to the EBB argument we're repairing.
+            let old_arg = args.get(fixed_args + repair.num, &pos.func.dfg.value_lists)
+                .expect("Too few branch arguments");
+
+            // It's possible that the CFG's predecessor list has duplicates. Detect them here.
+            if pos.func.dfg.value_type(old_arg) == repair.split_type {
+                pos.func.dfg[inst].put_value_list(args);
+                continue;
+            }
+
+            // Split the old argument, possibly causing more repairs to be scheduled.
+            pos.goto_inst(inst);
+            let (lo, hi) = split_value(pos, old_arg, repair.concat, &mut repairs);
+
+            // The `lo` part replaces the original argument.
+            *args.get_mut(fixed_args + repair.num, &mut pos.func.dfg.value_lists)
+                .unwrap() = lo;
+
+            // The `hi` part goes at the end. Since multiple repairs may have been scheduled to the
+            // same EBB, there could be multiple arguments missing.
+            if num_args > fixed_args + repair.hi_num {
+                *args.get_mut(fixed_args + repair.hi_num, &mut pos.func.dfg.value_lists)
+                    .unwrap() = hi;
+            } else {
+                // We need to append one or more arguments. If we're adding more than one argument,
+                // there must be pending repairs on the stack that will fill in the correct values
+                // instead of `hi`.
+                args.extend(
+                    iter::repeat(hi).take(1 + fixed_args + repair.hi_num - num_args),
+                    &mut pos.func.dfg.value_lists,
+                );
+            }
+
+            // Put the value list back after manipulating it.
+            pos.func.dfg[inst].put_value_list(args);
+        }
+    }
+
+    result
+}
+
+/// Split a single value using the integer or vector semantics given by the `concat` opcode.
+///
+/// If the value is defined by a `concat` instruction, just reuse the operand values of that
+/// instruction.
+///
+/// Return the two new values representing the parts of `value`.
+fn split_value(
+    pos: &mut FuncCursor,
+    value: Value,
+    concat: Opcode,
+    repairs: &mut Vec<Repair>,
+) -> (Value, Value) {
+    let value = pos.func.dfg.resolve_aliases(value);
+    let mut reuse = None;
+
+    match pos.func.dfg.value_def(value) {
+        ValueDef::Result(inst, num) => {
+            // This is an instruction result. See if the value was created by a `concat`
+            // instruction.
+            if let InstructionData::Binary { opcode, args, .. } = pos.func.dfg[inst] {
+                debug_assert_eq!(num, 0);
+                if opcode == concat {
+                    reuse = Some((args[0], args[1]));
+                }
+            }
+        }
+        ValueDef::Param(ebb, num) => {
+            // This is an EBB parameter. We can split the parameter value unless this is the entry
+            // block.
+            if pos.func.layout.entry_block() != Some(ebb) {
+                // We are going to replace the parameter at `num` with two new arguments.
+                // Determine the new value types.
+                let ty = pos.func.dfg.value_type(value);
+                let split_type = match concat {
+                    Opcode::Iconcat => ty.half_width().expect("Invalid type for isplit"),
+                    Opcode::Vconcat => ty.half_vector().expect("Invalid type for vsplit"),
+                    _ => panic!("Unhandled concat opcode: {}", concat),
+                };
+
+                // Since the `repairs` stack potentially contains other parameter numbers for
+                // `ebb`, avoid shifting and renumbering EBB parameters. It could invalidate other
+                // `repairs` entries.
+                //
+                // Replace the original `value` with the low part, and append the high part at the
+                // end of the argument list.
+                let lo = pos.func.dfg.replace_ebb_param(value, split_type);
+                let hi_num = pos.func.dfg.num_ebb_params(ebb);
+                let hi = pos.func.dfg.append_ebb_param(ebb, split_type);
+                reuse = Some((lo, hi));
+
+                // Now the original value is dangling. Insert a concatenation instruction that can
+                // compute it from the two new parameters. This also serves as a record of what we
+                // did so a future call to this function doesn't have to redo the work.
+                //
+                // Note that it is safe to move `pos` here since `reuse` was set above, so we don't
+                // need to insert a split instruction before returning.
+                pos.goto_first_inst(ebb);
+                pos.ins().with_result(value).Binary(
+                    concat,
+                    split_type,
+                    lo,
+                    hi,
+                );
+
+                // Finally, splitting the EBB parameter is not enough. We also have to repair all
+                // of the predecessor instructions that branch here.
+                add_repair(concat, split_type, ebb, num, hi_num, repairs);
+            }
+        }
+    }
+
+    // Did the code above succeed in finding values we can reuse?
+    if let Some(pair) = reuse {
+        pair
+    } else {
+        // No, we'll just have to insert the requested split instruction at `pos`. Note that `pos`
+        // has not been moved by the EBB argument code above when `reuse` is `None`.
+        match concat {
+            Opcode::Iconcat => pos.ins().isplit(value),
+            Opcode::Vconcat => pos.ins().vsplit(value),
+            _ => panic!("Unhandled concat opcode: {}", concat),
+        }
+    }
+}
+
+// Add a repair entry to the work list.
+fn add_repair(
+    concat: Opcode,
+    split_type: Type,
+    ebb: Ebb,
+    num: usize,
+    hi_num: usize,
+    repairs: &mut Vec<Repair>,
+) {
+    repairs.push(Repair {
+        concat,
+        split_type,
+        ebb,
+        num,
+        hi_num,
+    });
+}
+
+/// Strip concat-split chains. Return a simpler way of computing the same value.
+///
+/// Given this input:
+///
+/// ```cton
+///     v10 = iconcat v1, v2
+///     v11, v12 = isplit v10
+/// ```
+///
+/// This function resolves `v11` to `v1` and `v12` to `v2`.
+fn resolve_splits(dfg: &ir::DataFlowGraph, value: Value) -> Value {
+    let value = dfg.resolve_aliases(value);
+
+    // Deconstruct a split instruction.
+    let split_res;
+    let concat_opc;
+    let split_arg;
+    if let ValueDef::Result(inst, num) = dfg.value_def(value) {
+        split_res = num;
+        concat_opc = match dfg[inst].opcode() {
+            Opcode::Isplit => Opcode::Iconcat,
+            Opcode::Vsplit => Opcode::Vconcat,
+            _ => return value,
+        };
+        split_arg = dfg.inst_args(inst)[0];
+    } else {
+        return value;
+    }
+
+    // See if split_arg is defined by a concatenation instruction.
+    if let ValueDef::Result(inst, _) = dfg.value_def(split_arg) {
+        if dfg[inst].opcode() == concat_opc {
+            return dfg.inst_args(inst)[split_res];
+        }
+    }
+
+    value
+}
+
+/// Simplify the arguments to a branch *after* the instructions leading up to the branch have been
+/// legalized.
+///
+/// The branch argument repairs performed by `split_any()` above may be performed on branches that
+/// have not yet been legalized. The repaired arguments can be defined by actual split
+/// instructions in that case.
+///
+/// After legalizing the instructions computing the value that was split, it is likely that we can
+/// avoid depending on the split instruction. Its input probably comes from a concatenation.
+pub fn simplify_branch_arguments(dfg: &mut ir::DataFlowGraph, branch: Inst) {
+    let mut new_args = Vec::new();
+
+    for &arg in dfg.inst_args(branch) {
+        let new_arg = resolve_splits(dfg, arg);
+        new_args.push(new_arg);
+    }
+
+    dfg.inst_args_mut(branch).copy_from_slice(&new_args);
+}
--- a/lib/codegen/src/lib.rs
+++ b/lib/codegen/src/lib.rs
@@ -0,0 +1,111 @@
+//! Cretonne code generation library.
+
+#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)]
+#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))]
+#![cfg_attr(feature="cargo-clippy", allow(
+// Rustfmt 0.9.0 is at odds with this lint:
+                block_in_if_condition_stmt,
+// Produces only a false positive:
+                while_let_loop,
+// Produces many false positives, but did produce some valid lints, now fixed:
+                needless_lifetimes,
+// Generated code makes some style transgressions, but readability doesn't suffer much:
+                many_single_char_names,
+                identity_op,
+                needless_borrow,
+                cast_lossless,
+                unreadable_literal,
+                assign_op_pattern,
+                empty_line_after_outer_attr,
+// Hard to avoid in generated code:
+                cyclomatic_complexity,
+                too_many_arguments,
+// Code generator doesn't have a way to collapse identical arms:
+                match_same_arms,
+// These are relatively minor style issues, but would be easy to fix:
+                new_without_default,
+                new_without_default_derive,
+                should_implement_trait,
+                redundant_field_names,
+                useless_let_if_seq,
+                len_without_is_empty))]
+
+// Turns on no_std and alloc features if std is not available.
+#![cfg_attr(not(feature = "std"), no_std)]
+#![cfg_attr(not(feature = "std"), feature(alloc))]
+
+// Include the `hashmap_core` crate if std is not available.
+#[allow(unused_extern_crates)]
+#[cfg(not(feature = "std"))]
+extern crate hashmap_core;
+#[cfg(not(feature = "std"))]
+#[macro_use]
+extern crate alloc;
+extern crate failure;
+#[macro_use]
+extern crate failure_derive;
+
+pub use context::Context;
+pub use legalizer::legalize_function;
+pub use verifier::verify_function;
+pub use write::write_function;
+
+/// Version number of the cretonne-codegen crate.
+pub const VERSION: &str = env!("CARGO_PKG_VERSION");
+
+#[macro_use]
+pub extern crate cretonne_entity as entity;
+
+#[macro_use]
+pub mod dbg;
+
+pub mod bforest;
+pub mod binemit;
+pub mod cfg_printer;
+pub mod cursor;
+pub mod dominator_tree;
+pub mod flowgraph;
+pub mod ir;
+pub mod isa;
+pub mod loop_analysis;
+pub mod print_errors;
+pub mod result;
+pub mod settings;
+pub mod timing;
+pub mod verifier;
+
+pub use entity::packed_option;
+
+mod abi;
+mod bitset;
+mod constant_hash;
+mod context;
+mod dce;
+mod divconst_magic_numbers;
+mod iterators;
+mod legalizer;
+mod licm;
+mod partition_slice;
+mod postopt;
+mod predicates;
+mod preopt;
+mod ref_slice;
+mod regalloc;
+mod scoped_hash_map;
+mod simple_gvn;
+mod stack_layout;
+mod topo_order;
+mod unreachable_code;
+mod write;
+
+/// This replaces `std` in builds with `core`.
+#[cfg(not(feature = "std"))]
+mod std {
+    pub use core::*;
+    pub use alloc::{boxed, vec, string};
+    pub mod collections {
+        pub use hashmap_core::{HashMap, HashSet};
+        pub use hashmap_core::map as hash_map;
+        pub use alloc::BTreeSet;
+    }
+}
--- a/lib/codegen/src/licm.rs
+++ b/lib/codegen/src/licm.rs
@@ -0,0 +1,220 @@
+//! A Loop Invariant Code Motion optimization pass
+
+use cursor::{Cursor, FuncCursor};
+use dominator_tree::DominatorTree;
+use entity::{EntityList, ListPool};
+use flowgraph::ControlFlowGraph;
+use ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, Layout, Opcode, Type, Value};
+use loop_analysis::{Loop, LoopAnalysis};
+use std::collections::HashSet;
+use std::vec::Vec;
+use timing;
+
+/// Performs the LICM pass by detecting loops within the CFG and moving
+/// loop-invariant instructions out of them.
+/// Changes the CFG and domtree in-place during the operation.
+pub fn do_licm(
+    func: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    domtree: &mut DominatorTree,
+    loop_analysis: &mut LoopAnalysis,
+) {
+    let _tt = timing::licm();
+    debug_assert!(cfg.is_valid());
+    debug_assert!(domtree.is_valid());
+    debug_assert!(loop_analysis.is_valid());
+
+    for lp in loop_analysis.loops() {
+        // For each loop that we want to optimize we determine the set of loop-invariant
+        // instructions
+        let invariant_insts = remove_loop_invariant_instructions(lp, func, cfg, loop_analysis);
+        // Then we create the loop's pre-header and fill it with the invariant instructions
+        // Then we remove the invariant instructions from the loop body
+        if !invariant_insts.is_empty() {
+            // If the loop has a natural pre-header we use it, otherwise we create it.
+            let mut pos;
+            match has_pre_header(&func.layout, cfg, domtree, loop_analysis.loop_header(lp)) {
+                None => {
+                    let pre_header =
+                        create_pre_header(loop_analysis.loop_header(lp), func, cfg, domtree);
+                    pos = FuncCursor::new(func).at_last_inst(pre_header);
+                }
+                // If there is a natural pre-header we insert new instructions just before the
+                // related jumping instruction (which is not necessarily at the end).
+                Some((_, last_inst)) => {
+                    pos = FuncCursor::new(func).at_inst(last_inst);
+                }
+            };
+            // The last instruction of the pre-header is the termination instruction (usually
+            // a jump) so we need to insert just before this.
+            for inst in invariant_insts {
+                pos.insert_inst(inst);
+            }
+        }
+    }
+    // We have to recompute the domtree to account for the changes
+    cfg.compute(func);
+    domtree.compute(func, cfg);
+}
+
+// Insert a pre-header before the header, modifying the function layout and CFG to reflect it.
+// A jump instruction to the header is placed at the end of the pre-header.
+fn create_pre_header(
+    header: Ebb,
+    func: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    domtree: &DominatorTree,
+) -> Ebb {
+    let pool = &mut ListPool::<Value>::new();
+    let header_args_values: Vec<Value> = func.dfg.ebb_params(header).into_iter().cloned().collect();
+    let header_args_types: Vec<Type> = header_args_values
+        .clone()
+        .into_iter()
+        .map(|val| func.dfg.value_type(val))
+        .collect();
+    let pre_header = func.dfg.make_ebb();
+    let mut pre_header_args_value: EntityList<Value> = EntityList::new();
+    for typ in header_args_types {
+        pre_header_args_value.push(func.dfg.append_ebb_param(pre_header, typ), pool);
+    }
+    for (_, last_inst) in cfg.pred_iter(header) {
+        // We only follow normal edges (not the back edges)
+        if !domtree.dominates(header, last_inst, &func.layout) {
+            change_branch_jump_destination(last_inst, pre_header, func);
+        }
+    }
+    {
+        let mut pos = FuncCursor::new(func).at_top(header);
+        // Inserts the pre-header at the right place in the layout.
+        pos.insert_ebb(pre_header);
+        pos.next_inst();
+        pos.ins().jump(header, pre_header_args_value.as_slice(pool));
+    }
+    pre_header
+}
+
+// Detects if a loop header has a natural pre-header.
+//
+// A loop header has a pre-header if there is only one predecessor that the header doesn't
+// dominate.
+// Returns the pre-header Ebb and the instruction jumping to the header.
+fn has_pre_header(
+    layout: &Layout,
+    cfg: &ControlFlowGraph,
+    domtree: &DominatorTree,
+    header: Ebb,
+) -> Option<(Ebb, Inst)> {
+    let mut result = None;
+    let mut found = false;
+    for (pred_ebb, last_inst) in cfg.pred_iter(header) {
+        // We only count normal edges (not the back edges)
+        if !domtree.dominates(header, last_inst, layout) {
+            if found {
+                // We have already found one, there are more than one
+                return None;
+            } else {
+                result = Some((pred_ebb, last_inst));
+                found = true;
+            }
+        }
+    }
+    result
+}
+
+// Change the destination of a jump or branch instruction. Does nothing if called with a non-jump
+// or non-branch instruction.
+fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) {
+    match func.dfg[inst].branch_destination_mut() {
+        None => (),
+        Some(instruction_dest) => *instruction_dest = new_ebb,
+    }
+}
+
+/// Test whether the given opcode is unsafe to even consider for LICM.
+fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
+    opcode.can_load() || opcode.can_store() || opcode.is_call() || opcode.is_branch() ||
+        opcode.is_terminator() || opcode.is_return() ||
+        opcode.can_trap() || opcode.other_side_effects() || opcode.writes_cpu_flags()
+}
+
+/// Test whether the given instruction is loop-invariant.
+fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &HashSet<Value>) -> bool {
+    if trivially_unsafe_for_licm(dfg[inst].opcode()) {
+        return false;
+    }
+
+    let inst_args = dfg.inst_args(inst);
+    for arg in inst_args {
+        let arg = dfg.resolve_aliases(*arg);
+        if loop_values.contains(&arg) {
+            return false;
+        }
+    }
+    return true;
+}
+
+// Traverses a loop in reverse post-order from a header EBB and identify loop-invariant
+// instructions. These loop-invariant instructions are then removed from the code and returned
+// (in reverse post-order) for later use.
+fn remove_loop_invariant_instructions(
+    lp: Loop,
+    func: &mut Function,
+    cfg: &ControlFlowGraph,
+    loop_analysis: &LoopAnalysis,
+) -> Vec<Inst> {
+    let mut loop_values: HashSet<Value> = HashSet::new();
+    let mut invariant_insts: Vec<Inst> = Vec::new();
+    let mut pos = FuncCursor::new(func);
+    // We traverse the loop EBB in reverse post-order.
+    for ebb in postorder_ebbs_loop(loop_analysis, cfg, lp).iter().rev() {
+        // Arguments of the EBB are loop values
+        for val in pos.func.dfg.ebb_params(*ebb) {
+            loop_values.insert(*val);
+        }
+        pos.goto_top(*ebb);
+        #[cfg_attr(feature = "cargo-clippy", allow(block_in_if_condition_stmt))]
+        'next_inst: while let Some(inst) = pos.next_inst() {
+            if is_loop_invariant(inst, &pos.func.dfg, &loop_values) {
+                // If all the instruction's argument are defined outside the loop
+                // then this instruction is loop-invariant
+                invariant_insts.push(inst);
+                // We remove it from the loop
+                pos.remove_inst_and_step_back();
+            } else {
+                // If the instruction is not loop-invariant we push its results in the set of
+                // loop values
+                for out in pos.func.dfg.inst_results(inst) {
+                    loop_values.insert(*out);
+                }
+            }
+        }
+    }
+    invariant_insts
+}
+
+/// Return ebbs from a loop in post-order, starting from an entry point in the block.
+fn postorder_ebbs_loop(loop_analysis: &LoopAnalysis, cfg: &ControlFlowGraph, lp: Loop) -> Vec<Ebb> {
+    let mut grey = HashSet::new();
+    let mut black = HashSet::new();
+    let mut stack = vec![loop_analysis.loop_header(lp)];
+    let mut postorder = Vec::new();
+
+    while !stack.is_empty() {
+        let node = stack.pop().unwrap();
+        if !grey.contains(&node) {
+            // This is a white node. Mark it as gray.
+            grey.insert(node);
+            stack.push(node);
+            // Get any children we've never seen before.
+            for child in cfg.succ_iter(node) {
+                if loop_analysis.is_in_loop(child, lp) && !grey.contains(&child) {
+                    stack.push(child);
+                }
+            }
+        } else if !black.contains(&node) {
+            postorder.push(node);
+            black.insert(node);
+        }
+    }
+    postorder
+}
--- a/lib/codegen/src/loop_analysis.rs
+++ b/lib/codegen/src/loop_analysis.rs
@@ -0,0 +1,342 @@
+//! A loop analysis represented as mappings of loops to their header Ebb
+//! and parent in the loop tree.
+
+use dominator_tree::DominatorTree;
+use entity::EntityMap;
+use entity::{Keys, PrimaryMap};
+use flowgraph::ControlFlowGraph;
+use ir::{Ebb, Function, Layout};
+use packed_option::PackedOption;
+use std::vec::Vec;
+use timing;
+
+/// A opaque reference to a code loop.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Loop(u32);
+entity_impl!(Loop, "loop");
+
+/// Loop tree information for a single function.
+///
+/// Loops are referenced by the Loop object, and for each loop you can access its header EBB,
+/// its eventual parent in the loop tree and all the EBB belonging to the loop.
+pub struct LoopAnalysis {
+    loops: PrimaryMap<Loop, LoopData>,
+    ebb_loop_map: EntityMap<Ebb, PackedOption<Loop>>,
+    valid: bool,
+}
+
+struct LoopData {
+    header: Ebb,
+    parent: PackedOption<Loop>,
+}
+
+impl LoopData {
+    /// Creates a `LoopData` object with the loop header and its eventual parent in the loop tree.
+    pub fn new(header: Ebb, parent: Option<Loop>) -> LoopData {
+        LoopData {
+            header: header,
+            parent: parent.into(),
+        }
+    }
+}
+
+/// Methods for querying the loop analysis.
+impl LoopAnalysis {
+    /// Allocate a new blank loop analysis struct. Use `compute` to compute the loop analysis for
+    /// a function.
+    pub fn new() -> Self {
+        Self {
+            valid: false,
+            loops: PrimaryMap::new(),
+            ebb_loop_map: EntityMap::new(),
+        }
+    }
+
+    /// Returns all the loops contained in a function.
+    pub fn loops(&self) -> Keys<Loop> {
+        self.loops.keys()
+    }
+
+    /// Returns the header EBB of a particular loop.
+    ///
+    /// The characteristic property of a loop header block is that it dominates some of its
+    /// predecessors.
+    pub fn loop_header(&self, lp: Loop) -> Ebb {
+        self.loops[lp].header
+    }
+
+    /// Return the eventual parent of a loop in the loop tree.
+    pub fn loop_parent(&self, lp: Loop) -> Option<Loop> {
+        self.loops[lp].parent.expand()
+    }
+
+    /// Determine if an Ebb belongs to a loop by running a finger along the loop tree.
+    ///
+    /// Returns `true` if `ebb` is in loop `lp`.
+    pub fn is_in_loop(&self, ebb: Ebb, lp: Loop) -> bool {
+        let ebb_loop = self.ebb_loop_map[ebb];
+        match ebb_loop.expand() {
+            None => false,
+            Some(ebb_loop) => self.is_child_loop(ebb_loop, lp),
+        }
+    }
+
+    /// Determines if a loop is contained in another loop.
+    ///
+    /// `is_child_loop(child,parent)` returns `true` if and only if `child` is a child loop of
+    /// `parent` (or `child == parent`).
+    pub fn is_child_loop(&self, child: Loop, parent: Loop) -> bool {
+        let mut finger = Some(child);
+        while let Some(finger_loop) = finger {
+            if finger_loop == parent {
+                return true;
+            }
+            finger = self.loop_parent(finger_loop);
+        }
+        false
+    }
+}
+
+impl LoopAnalysis {
+    /// Detects the loops in a function. Needs the control flow graph and the dominator tree.
+    pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph, domtree: &DominatorTree) {
+        let _tt = timing::loop_analysis();
+        self.loops.clear();
+        self.ebb_loop_map.clear();
+        self.ebb_loop_map.resize(func.dfg.num_ebbs());
+        self.find_loop_headers(cfg, domtree, &func.layout);
+        self.discover_loop_blocks(cfg, domtree, &func.layout);
+        self.valid = true;
+    }
+
+    /// Check if the loop analysis is in a valid state.
+    ///
+    /// Note that this doesn't perform any kind of validity checks. It simply checks if the
+    /// `compute()` method has been called since the last `clear()`. It does not check that the
+    /// loop analysis is consistent with the CFG.
+    pub fn is_valid(&self) -> bool {
+        self.valid
+    }
+
+    /// Clear all the data structures contanted in the loop analysis. This will leave the
+    /// analysis in a similar state to a context returned by `new()` except that allocated
+    /// memory be retained.
+    pub fn clear(&mut self) {
+        self.loops.clear();
+        self.ebb_loop_map.clear();
+        self.valid = false;
+    }
+
+    // Traverses the CFG in reverse postorder and create a loop object for every EBB having a
+    // back edge.
+    fn find_loop_headers(
+        &mut self,
+        cfg: &ControlFlowGraph,
+        domtree: &DominatorTree,
+        layout: &Layout,
+    ) {
+        // We traverse the CFG in reverse postorder
+        for &ebb in domtree.cfg_postorder().iter().rev() {
+            for (_, pred_inst) in cfg.pred_iter(ebb) {
+                // If the ebb dominates one of its predecessors it is a back edge
+                if domtree.dominates(ebb, pred_inst, layout) {
+                    // This ebb is a loop header, so we create its associated loop
+                    let lp = self.loops.push(LoopData::new(ebb, None));
+                    self.ebb_loop_map[ebb] = lp.into();
+                    break;
+                    // We break because we only need one back edge to identify a loop header.
+                }
+            }
+        }
+    }
+
+    // Intended to be called after `find_loop_headers`. For each detected loop header,
+    // discovers all the ebb belonging to the loop and its inner loops. After a call to this
+    // function, the loop tree is fully constructed.
+    fn discover_loop_blocks(
+        &mut self,
+        cfg: &ControlFlowGraph,
+        domtree: &DominatorTree,
+        layout: &Layout,
+    ) {
+        let mut stack: Vec<Ebb> = Vec::new();
+        // We handle each loop header in reverse order, corresponding to a pseudo postorder
+        // traversal of the graph.
+        for lp in self.loops().rev() {
+            for (pred, pred_inst) in cfg.pred_iter(self.loops[lp].header) {
+                // We follow the back edges
+                if domtree.dominates(self.loops[lp].header, pred_inst, layout) {
+                    stack.push(pred);
+                }
+            }
+            while let Some(node) = stack.pop() {
+                let continue_dfs: Option<Ebb>;
+                match self.ebb_loop_map[node].expand() {
+                    None => {
+                        // The node hasn't been visited yet, we tag it as part of the loop
+                        self.ebb_loop_map[node] = PackedOption::from(lp);
+                        continue_dfs = Some(node);
+                    }
+                    Some(node_loop) => {
+                        // We copy the node_loop into a mutable reference passed along the while
+                        let mut node_loop = node_loop;
+                        // The node is part of a loop, which can be lp or an inner loop
+                        let mut node_loop_parent_option = self.loops[node_loop].parent;
+                        while let Some(node_loop_parent) = node_loop_parent_option.expand() {
+                            if node_loop_parent == lp {
+                                // We have encounterd lp so we stop (already visited)
+                                break;
+                            } else {
+                                //
+                                node_loop = node_loop_parent;
+                                // We lookup the parent loop
+                                node_loop_parent_option = self.loops[node_loop].parent;
+                            }
+                        }
+                        // Now node_loop_parent is either:
+                        // - None and node_loop is an new inner loop of lp
+                        // - Some(...) and the initial node_loop was a known inner loop of lp
+                        match node_loop_parent_option.expand() {
+                            Some(_) => continue_dfs = None,
+                            None => {
+                                if node_loop != lp {
+                                    self.loops[node_loop].parent = lp.into();
+                                    continue_dfs = Some(self.loops[node_loop].header)
+                                } else {
+                                    // If lp is a one-block loop then we make sure we stop
+                                    continue_dfs = None
+                                }
+                            }
+                        }
+                    }
+                }
+                // Now we have handled the popped node and need to continue the DFS by adding the
+                // predecessors of that node
+                if let Some(continue_dfs) = continue_dfs {
+                    for (pred, _) in cfg.pred_iter(continue_dfs) {
+                        stack.push(pred)
+                    }
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+
+    use cursor::{Cursor, FuncCursor};
+    use dominator_tree::DominatorTree;
+    use flowgraph::ControlFlowGraph;
+    use ir::{types, Function, InstBuilder};
+    use loop_analysis::{Loop, LoopAnalysis};
+    use std::vec::Vec;
+
+    #[test]
+    fn nested_loops_detection() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+        let ebb3 = func.dfg.make_ebb();
+        let cond = func.dfg.append_ebb_param(ebb0, types::I32);
+
+        {
+            let mut cur = FuncCursor::new(&mut func);
+
+            cur.insert_ebb(ebb0);
+            cur.ins().jump(ebb1, &[]);
+
+            cur.insert_ebb(ebb1);
+            cur.ins().jump(ebb2, &[]);
+
+            cur.insert_ebb(ebb2);
+            cur.ins().brnz(cond, ebb1, &[]);
+            cur.ins().jump(ebb3, &[]);
+
+            cur.insert_ebb(ebb3);
+            cur.ins().brnz(cond, ebb0, &[]);
+        }
+
+        let mut loop_analysis = LoopAnalysis::new();
+        let mut cfg = ControlFlowGraph::new();
+        let mut domtree = DominatorTree::new();
+        cfg.compute(&func);
+        domtree.compute(&func, &cfg);
+        loop_analysis.compute(&func, &cfg, &domtree);
+
+        let loops = loop_analysis.loops().collect::<Vec<Loop>>();
+        assert_eq!(loops.len(), 2);
+        assert_eq!(loop_analysis.loop_header(loops[0]), ebb0);
+        assert_eq!(loop_analysis.loop_header(loops[1]), ebb1);
+        assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0]));
+        assert_eq!(loop_analysis.loop_parent(loops[0]), None);
+        assert_eq!(loop_analysis.is_in_loop(ebb0, loops[0]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb0, loops[1]), false);
+        assert_eq!(loop_analysis.is_in_loop(ebb1, loops[1]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb1, loops[0]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb2, loops[1]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb2, loops[0]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb3, loops[0]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb0, loops[1]), false);
+    }
+
+    #[test]
+    fn complex_loop_detection() {
+        let mut func = Function::new();
+        let ebb0 = func.dfg.make_ebb();
+        let ebb1 = func.dfg.make_ebb();
+        let ebb2 = func.dfg.make_ebb();
+        let ebb3 = func.dfg.make_ebb();
+        let ebb4 = func.dfg.make_ebb();
+        let ebb5 = func.dfg.make_ebb();
+        let cond = func.dfg.append_ebb_param(ebb0, types::I32);
+
+        {
+            let mut cur = FuncCursor::new(&mut func);
+
+            cur.insert_ebb(ebb0);
+            cur.ins().brnz(cond, ebb1, &[]);
+            cur.ins().jump(ebb3, &[]);
+
+            cur.insert_ebb(ebb1);
+            cur.ins().jump(ebb2, &[]);
+
+            cur.insert_ebb(ebb2);
+            cur.ins().brnz(cond, ebb1, &[]);
+            cur.ins().jump(ebb5, &[]);
+
+            cur.insert_ebb(ebb3);
+            cur.ins().jump(ebb4, &[]);
+
+            cur.insert_ebb(ebb4);
+            cur.ins().brnz(cond, ebb3, &[]);
+            cur.ins().jump(ebb5, &[]);
+
+            cur.insert_ebb(ebb5);
+            cur.ins().brnz(cond, ebb0, &[]);
+        }
+
+        let mut loop_analysis = LoopAnalysis::new();
+        let mut cfg = ControlFlowGraph::new();
+        let mut domtree = DominatorTree::new();
+        cfg.compute(&func);
+        domtree.compute(&func, &cfg);
+        loop_analysis.compute(&func, &cfg, &domtree);
+
+        let loops = loop_analysis.loops().collect::<Vec<Loop>>();
+        assert_eq!(loops.len(), 3);
+        assert_eq!(loop_analysis.loop_header(loops[0]), ebb0);
+        assert_eq!(loop_analysis.loop_header(loops[1]), ebb1);
+        assert_eq!(loop_analysis.loop_header(loops[2]), ebb3);
+        assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0]));
+        assert_eq!(loop_analysis.loop_parent(loops[2]), Some(loops[0]));
+        assert_eq!(loop_analysis.loop_parent(loops[0]), None);
+        assert_eq!(loop_analysis.is_in_loop(ebb0, loops[0]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb1, loops[1]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb2, loops[1]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb3, loops[2]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb4, loops[2]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb5, loops[0]), true);
+    }
+}
--- a/lib/codegen/src/partition_slice.rs
+++ b/lib/codegen/src/partition_slice.rs
@@ -0,0 +1,77 @@
+//! Rearrange the elements in a slice according to a predicate.
+
+/// Rearrange the elements of the mutable slice `s` such that elements where `p(t)` is true precede
+/// the elements where `p(t)` is false.
+///
+/// The order of elements is not preserved, unless the slice is already partitioned.
+///
+/// Returns the number of elements where `p(t)` is true.
+pub fn partition_slice<T, F>(s: &mut [T], mut p: F) -> usize
+where
+    F: FnMut(&T) -> bool,
+{
+    // Count the length of the prefix where `p` returns true.
+    let mut count = match s.iter().position(|t| !p(t)) {
+        Some(t) => t,
+        None => return s.len(),
+    };
+
+    // Swap remaining `true` elements into place.
+    //
+    // This actually preserves the order of the `true` elements, but the `false` elements get
+    // shuffled.
+    for i in count + 1..s.len() {
+        if p(&s[i]) {
+            s.swap(count, i);
+            count += 1;
+        }
+    }
+
+    count
+}
+
+#[cfg(test)]
+mod tests {
+    use super::partition_slice;
+    use std::vec::Vec;
+
+    fn check(x: &[u32], want: &[u32]) {
+        assert_eq!(x.len(), want.len());
+        let want_count = want.iter().cloned().filter(|&x| x % 10 == 0).count();
+        let mut v = Vec::new();
+        v.extend(x.iter().cloned());
+        let count = partition_slice(&mut v[..], |&x| x % 10 == 0);
+        assert_eq!(v, want);
+        assert_eq!(count, want_count);
+    }
+
+    #[test]
+    fn empty() {
+        check(&[], &[]);
+    }
+
+    #[test]
+    fn singles() {
+        check(&[0], &[0]);
+        check(&[1], &[1]);
+        check(&[10], &[10]);
+    }
+
+    #[test]
+    fn doubles() {
+        check(&[0, 0], &[0, 0]);
+        check(&[0, 5], &[0, 5]);
+        check(&[5, 0], &[0, 5]);
+        check(&[5, 4], &[5, 4]);
+    }
+
+    #[test]
+    fn longer() {
+        check(&[1, 2, 3], &[1, 2, 3]);
+        check(&[1, 2, 10], &[10, 2, 1]); // Note: 2, 1 order not required.
+        check(&[1, 10, 2], &[10, 1, 2]); // Note: 1, 2 order not required.
+        check(&[1, 20, 10], &[20, 10, 1]);
+        check(&[1, 20, 3, 10], &[20, 10, 3, 1]);
+        check(&[20, 3, 10, 1], &[20, 10, 3, 1]);
+    }
+}
--- a/lib/codegen/src/postopt.rs
+++ b/lib/codegen/src/postopt.rs
@@ -0,0 +1,203 @@
+//! A post-legalization rewriting pass.
+
+#![allow(non_snake_case)]
+
+use cursor::{Cursor, EncCursor};
+use ir::condcodes::{CondCode, FloatCC, IntCC};
+use ir::dfg::ValueDef;
+use ir::immediates::Imm64;
+use ir::instructions::{Opcode, ValueList};
+use ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Value};
+use isa::TargetIsa;
+use timing;
+
+/// Information collected about a compare+branch sequence.
+struct CmpBrInfo {
+    /// The branch instruction.
+    br_inst: Inst,
+    /// The icmp, icmp_imm, or fcmp instruction.
+    cmp_inst: Inst,
+    /// The destination of the branch.
+    destination: Ebb,
+    /// The arguments of the branch.
+    args: ValueList,
+    /// The first argument to the comparison. The second is in the `kind` field.
+    cmp_arg: Value,
+    /// If the branch is `brz` rather than `brnz`, we need to invert the condition
+    /// before the branch.
+    invert_branch_cond: bool,
+    /// The kind of comparison, and the second argument.
+    kind: CmpBrKind,
+}
+
+enum CmpBrKind {
+    Icmp { cond: IntCC, arg: Value },
+    IcmpImm { cond: IntCC, imm: Imm64 },
+    Fcmp { cond: FloatCC, arg: Value },
+}
+
+/// Optimize comparisons to use flags values, to avoid materializing conditions
+/// in integer registers.
+///
+/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff
+/// sequences.
+fn optimize_cpu_flags(
+    pos: &mut EncCursor,
+    inst: Inst,
+    last_flags_clobber: Option<Inst>,
+    isa: &TargetIsa,
+) {
+    // Look for compare and branch patterns.
+    // This code could be considerably simplified with non-lexical lifetimes.
+    let info = match pos.func.dfg[inst] {
+        InstructionData::Branch {
+            opcode,
+            destination,
+            ref args,
+        } => {
+            let first_arg = args.first(&pos.func.dfg.value_lists).unwrap();
+            let invert_branch_cond = match opcode {
+                Opcode::Brz => true,
+                Opcode::Brnz => false,
+                _ => panic!(),
+            };
+            if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) {
+                match pos.func.dfg[cond_inst] {
+                    InstructionData::IntCompare {
+                        cond,
+                        args: cmp_args,
+                        ..
+                    } => CmpBrInfo {
+                        br_inst: inst,
+                        cmp_inst: cond_inst,
+                        destination,
+                        args: args.clone(),
+                        cmp_arg: cmp_args[0],
+                        invert_branch_cond,
+                        kind: CmpBrKind::Icmp {
+                            cond,
+                            arg: cmp_args[1],
+                        },
+                    },
+                    InstructionData::IntCompareImm {
+                        cond,
+                        arg: cmp_arg,
+                        imm: cmp_imm,
+                        ..
+                    } => CmpBrInfo {
+                        br_inst: inst,
+                        cmp_inst: cond_inst,
+                        destination,
+                        args: args.clone(),
+                        cmp_arg,
+                        invert_branch_cond,
+                        kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm },
+                    },
+                    InstructionData::FloatCompare {
+                        cond,
+                        args: cmp_args,
+                        ..
+                    } => CmpBrInfo {
+                        br_inst: inst,
+                        cmp_inst: cond_inst,
+                        destination,
+                        args: args.clone(),
+                        cmp_arg: cmp_args[0],
+                        invert_branch_cond,
+                        kind: CmpBrKind::Fcmp {
+                            cond,
+                            arg: cmp_args[1],
+                        },
+                    },
+                    _ => return,
+                }
+            } else {
+                return;
+            }
+        }
+        // TODO: trapif, trueif, selectif, and their ff counterparts.
+        _ => return,
+    };
+
+    // If any instructions clobber the flags between the comparison and the branch,
+    // don't optimize them.
+    if last_flags_clobber != Some(info.cmp_inst) {
+        return;
+    }
+
+    // We found a compare+branch pattern. Transform it to use flags.
+    let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec();
+    pos.goto_inst(info.cmp_inst);
+    match info.kind {
+        CmpBrKind::Icmp { mut cond, arg } => {
+            let flags = pos.ins().ifcmp(info.cmp_arg, arg);
+            pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
+            if info.invert_branch_cond {
+                cond = cond.inverse();
+            }
+            pos.func.dfg.replace(info.br_inst).brif(
+                cond,
+                flags,
+                info.destination,
+                &args,
+            );
+        }
+        CmpBrKind::IcmpImm { mut cond, imm } => {
+            let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm);
+            pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
+            if info.invert_branch_cond {
+                cond = cond.inverse();
+            }
+            pos.func.dfg.replace(info.br_inst).brif(
+                cond,
+                flags,
+                info.destination,
+                &args,
+            );
+        }
+        CmpBrKind::Fcmp { mut cond, arg } => {
+            let flags = pos.ins().ffcmp(info.cmp_arg, arg);
+            pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags);
+            if info.invert_branch_cond {
+                cond = cond.inverse();
+            }
+            pos.func.dfg.replace(info.br_inst).brff(
+                cond,
+                flags,
+                info.destination,
+                &args,
+            );
+        }
+    }
+    pos.func.update_encoding(info.cmp_inst, isa).is_ok();
+    pos.func.update_encoding(info.br_inst, isa).is_ok();
+}
+
+//----------------------------------------------------------------------
+//
+// The main post-opt pass.
+
+pub fn do_postopt(func: &mut Function, isa: &TargetIsa) {
+    let _tt = timing::postopt();
+    let mut pos = EncCursor::new(func, isa);
+    while let Some(_ebb) = pos.next_ebb() {
+        let mut last_flags_clobber = None;
+        while let Some(inst) = pos.next_inst() {
+            if isa.uses_cpu_flags() {
+                // Optimize instructions to make use of flags.
+                optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
+
+                // Track the most recent seen instruction that clobbers the flags.
+                if let Some(constraints) =
+                    isa.encoding_info().operand_constraints(
+                        pos.func.encodings[inst],
+                    )
+                {
+                    if constraints.clobbers_flags {
+                        last_flags_clobber = Some(inst)
+                    }
+                }
+            }
+        }
+    }
+}
--- a/lib/codegen/src/predicates.rs
+++ b/lib/codegen/src/predicates.rs
@@ -0,0 +1,87 @@
+//! Predicate functions for testing instruction fields.
+//!
+//! This module defines functions that are used by the instruction predicates defined by
+//! `lib/codegen/meta/cdsl/predicates.py` classes.
+//!
+//! The predicates the operate on integer fields use `Into<i64>` as a shared trait bound. This
+//! bound is implemented by all the native integer types as well as `Imm64`.
+//!
+//! Some of these predicates may be unused in certain ISA configurations, so we suppress the
+//! dead code warning.
+
+use ir;
+
+/// Check that `x` is the same as `y`.
+#[allow(dead_code)]
+pub fn is_equal<T: Eq + Copy, O: Into<T> + Copy>(x: T, y: O) -> bool {
+    x == y.into()
+}
+
+/// Check that `x` can be represented as a `wd`-bit signed integer with `sc` low zero bits.
+#[allow(dead_code)]
+pub fn is_signed_int<T: Into<i64>>(x: T, wd: u8, sc: u8) -> bool {
+    let s = x.into();
+    s == (s >> sc << (64 - wd + sc) >> (64 - wd))
+}
+
+/// Check that `x` can be represented as a `wd`-bit unsigned integer with `sc` low zero bits.
+#[allow(dead_code)]
+pub fn is_unsigned_int<T: Into<i64>>(x: T, wd: u8, sc: u8) -> bool {
+    let u = x.into() as u64;
+    // Bit-mask of the permitted bits.
+    let m = (1 << wd) - (1 << sc);
+    u == (u & m)
+}
+
+#[allow(dead_code)]
+pub fn is_colocated_func(func_ref: ir::FuncRef, func: &ir::Function) -> bool {
+    func.dfg.ext_funcs[func_ref].colocated
+}
+
+#[allow(dead_code)]
+pub fn is_colocated_data(global_var: ir::GlobalVar, func: &ir::Function) -> bool {
+    match func.global_vars[global_var] {
+        ir::GlobalVarData::Sym { colocated, .. } => colocated,
+        _ => panic!("is_colocated_data only makes sense for data with symbolic addresses"),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn cvt_u32() {
+        let x1 = 0u32;
+        let x2 = 1u32;
+        let x3 = 0xffff_fff0u32;
+
+        assert!(is_signed_int(x1, 1, 0));
+        assert!(is_signed_int(x1, 2, 1));
+        assert!(is_signed_int(x2, 2, 0));
+        assert!(!is_signed_int(x2, 2, 1));
+
+        // `u32` doesn't sign-extend when converted to `i64`.
+        assert!(!is_signed_int(x3, 8, 0));
+
+        assert!(is_unsigned_int(x1, 1, 0));
+        assert!(is_unsigned_int(x1, 8, 4));
+        assert!(is_unsigned_int(x2, 1, 0));
+        assert!(!is_unsigned_int(x2, 8, 4));
+        assert!(!is_unsigned_int(x3, 1, 0));
+        assert!(is_unsigned_int(x3, 32, 4));
+    }
+
+    #[test]
+    fn cvt_imm64() {
+        use ir::immediates::Imm64;
+
+        let x1 = Imm64::new(-8);
+        let x2 = Imm64::new(8);
+
+        assert!(is_signed_int(x1, 16, 2));
+        assert!(is_signed_int(x2, 16, 2));
+        assert!(!is_signed_int(x1, 16, 4));
+        assert!(!is_signed_int(x2, 16, 4));
+    }
+}
--- a/lib/codegen/src/preopt.rs
+++ b/lib/codegen/src/preopt.rs
@@ -0,0 +1,574 @@
+//! A pre-legalization rewriting pass.
+
+#![allow(non_snake_case)]
+
+use cursor::{Cursor, FuncCursor};
+use divconst_magic_numbers::{MS32, MS64, MU32, MU64};
+use divconst_magic_numbers::{magicS32, magicS64, magicU32, magicU64};
+use ir::Inst;
+use ir::dfg::ValueDef;
+use ir::instructions::Opcode;
+use ir::types::{I32, I64};
+use ir::{DataFlowGraph, Function, InstBuilder, InstructionData, Type, Value};
+use timing;
+
+//----------------------------------------------------------------------
+//
+// Pattern-match helpers and transformation for div and rem by constants.
+
+// Simple math helpers
+
+/// if `x` is a power of two, or the negation thereof, return the power along
+/// with a boolean that indicates whether `x` is negative. Else return None.
+#[inline]
+fn isPowerOf2_S32(x: i32) -> Option<(bool, u32)> {
+    // We have to special-case this because abs(x) isn't representable.
+    if x == -0x8000_0000 {
+        return Some((true, 31));
+    }
+    let abs_x = i32::wrapping_abs(x) as u32;
+    if abs_x.is_power_of_two() {
+        return Some((x < 0, abs_x.trailing_zeros()));
+    }
+    None
+}
+
+/// Same comments as for isPowerOf2_S64 apply.
+#[inline]
+fn isPowerOf2_S64(x: i64) -> Option<(bool, u32)> {
+    // We have to special-case this because abs(x) isn't representable.
+    if x == -0x8000_0000_0000_0000 {
+        return Some((true, 63));
+    }
+    let abs_x = i64::wrapping_abs(x) as u64;
+    if abs_x.is_power_of_two() {
+        return Some((x < 0, abs_x.trailing_zeros()));
+    }
+    None
+}
+
+#[derive(Debug)]
+enum DivRemByConstInfo {
+    DivU32(Value, u32), // In all cases, the arguments are:
+    DivU64(Value, u64), // left operand, right operand
+    DivS32(Value, i32),
+    DivS64(Value, i64),
+    RemU32(Value, u32),
+    RemU64(Value, u64),
+    RemS32(Value, i32),
+    RemS64(Value, i64),
+}
+
+/// Possibly create a DivRemByConstInfo from the given components, by
+/// figuring out which, if any, of the 8 cases apply, and also taking care to
+/// sanity-check the immediate.
+fn package_up_divrem_info(
+    argL: Value,
+    argL_ty: Type,
+    argRs: i64,
+    isSigned: bool,
+    isRem: bool,
+) -> Option<DivRemByConstInfo> {
+    let argRu: u64 = argRs as u64;
+    if !isSigned && argL_ty == I32 && argRu < 0x1_0000_0000 {
+        let con = if isRem {
+            DivRemByConstInfo::RemU32
+        } else {
+            DivRemByConstInfo::DivU32
+        };
+        return Some(con(argL, argRu as u32));
+    }
+    if !isSigned && argL_ty == I64 {
+        // unsigned 64, no range constraint
+        let con = if isRem {
+            DivRemByConstInfo::RemU64
+        } else {
+            DivRemByConstInfo::DivU64
+        };
+        return Some(con(argL, argRu));
+    }
+    if isSigned && argL_ty == I32 && (argRu <= 0x7fff_ffff || argRu >= 0xffff_ffff_8000_0000) {
+        let con = if isRem {
+            DivRemByConstInfo::RemS32
+        } else {
+            DivRemByConstInfo::DivS32
+        };
+        return Some(con(argL, argRu as i32));
+    }
+    if isSigned && argL_ty == I64 {
+        // signed 64, no range constraint
+        let con = if isRem {
+            DivRemByConstInfo::RemS64
+        } else {
+            DivRemByConstInfo::DivS64
+        };
+        return Some(con(argL, argRu as i64));
+    }
+    None
+}
+
+/// Examine `idata` to see if it is a div or rem by a constant, and if so
+/// return the operands, signedness, operation size and div-vs-rem-ness in a
+/// handy bundle.
+fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
+    let idata: &InstructionData = &dfg[inst];
+
+    if let InstructionData::BinaryImm { opcode, arg, imm } = *idata {
+        let (isSigned, isRem) = match opcode {
+            Opcode::UdivImm => (false, false),
+            Opcode::UremImm => (false, true),
+            Opcode::SdivImm => (true, false),
+            Opcode::SremImm => (true, true),
+            _other => return None,
+        };
+        // Pull the operation size (type) from the left arg
+        let argL_ty = dfg.value_type(arg);
+        return package_up_divrem_info(arg, argL_ty, imm.into(), isSigned, isRem);
+    }
+
+    None
+}
+
+/// Actually do the transformation given a bundle containing the relevant
+/// information. `divrem_info` describes a div or rem by a constant, that
+/// `pos` currently points at, and `inst` is the associated instruction.
+/// `inst` is replaced by a sequence of other operations that calculate the
+/// same result. Note that there are various `divrem_info` cases where we
+/// cannot do any transformation, in which case `inst` is left unchanged.
+fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCursor, inst: Inst) {
+    let isRem = match *divrem_info {
+        DivRemByConstInfo::DivU32(_, _) |
+        DivRemByConstInfo::DivU64(_, _) |
+        DivRemByConstInfo::DivS32(_, _) |
+        DivRemByConstInfo::DivS64(_, _) => false,
+        DivRemByConstInfo::RemU32(_, _) |
+        DivRemByConstInfo::RemU64(_, _) |
+        DivRemByConstInfo::RemS32(_, _) |
+        DivRemByConstInfo::RemS64(_, _) => true,
+    };
+
+    match *divrem_info {
+        // -------------------- U32 --------------------
+
+        // U32 div, rem by zero: ignore
+        DivRemByConstInfo::DivU32(_n1, 0) |
+        DivRemByConstInfo::RemU32(_n1, 0) => {}
+
+        // U32 div by 1: identity
+        // U32 rem by 1: zero
+        DivRemByConstInfo::DivU32(n1, 1) |
+        DivRemByConstInfo::RemU32(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I32, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        // U32 div, rem by a power-of-2
+        DivRemByConstInfo::DivU32(n1, d) |
+        DivRemByConstInfo::RemU32(n1, d) if d.is_power_of_two() => {
+            debug_assert!(d >= 2);
+            // compute k where d == 2^k
+            let k = d.trailing_zeros();
+            debug_assert!(k >= 1 && k <= 31);
+            if isRem {
+                let mask = (1u64 << k) - 1;
+                pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
+            } else {
+                pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
+            }
+        }
+
+        // U32 div, rem by non-power-of-2
+        DivRemByConstInfo::DivU32(n1, d) |
+        DivRemByConstInfo::RemU32(n1, d) => {
+            debug_assert!(d >= 3);
+            let MU32 {
+                mulBy,
+                doAdd,
+                shiftBy,
+            } = magicU32(d);
+            let qf; // final quotient
+            let q0 = pos.ins().iconst(I32, mulBy as i64);
+            let q1 = pos.ins().umulhi(n1, q0);
+            if doAdd {
+                debug_assert!(shiftBy >= 1 && shiftBy <= 32);
+                let t1 = pos.ins().isub(n1, q1);
+                let t2 = pos.ins().ushr_imm(t1, 1);
+                let t3 = pos.ins().iadd(t2, q1);
+                // I never found any case where shiftBy == 1 here.
+                // So there's no attempt to fold out a zero shift.
+                debug_assert_ne!(shiftBy, 1);
+                qf = pos.ins().ushr_imm(t3, (shiftBy - 1) as i64);
+            } else {
+                debug_assert!(shiftBy >= 0 && shiftBy <= 31);
+                // Whereas there are known cases here for shiftBy == 0.
+                if shiftBy > 0 {
+                    qf = pos.ins().ushr_imm(q1, shiftBy as i64);
+                } else {
+                    qf = q1;
+                }
+            }
+            // Now qf holds the final quotient. If necessary calculate the
+            // remainder instead.
+            if isRem {
+                let tt = pos.ins().imul_imm(qf, d as i64);
+                pos.func.dfg.replace(inst).isub(n1, tt);
+            } else {
+                pos.func.dfg.replace(inst).copy(qf);
+            }
+        }
+
+        // -------------------- U64 --------------------
+
+        // U64 div, rem by zero: ignore
+        DivRemByConstInfo::DivU64(_n1, 0) |
+        DivRemByConstInfo::RemU64(_n1, 0) => {}
+
+        // U64 div by 1: identity
+        // U64 rem by 1: zero
+        DivRemByConstInfo::DivU64(n1, 1) |
+        DivRemByConstInfo::RemU64(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I64, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        // U64 div, rem by a power-of-2
+        DivRemByConstInfo::DivU64(n1, d) |
+        DivRemByConstInfo::RemU64(n1, d) if d.is_power_of_two() => {
+            debug_assert!(d >= 2);
+            // compute k where d == 2^k
+            let k = d.trailing_zeros();
+            debug_assert!(k >= 1 && k <= 63);
+            if isRem {
+                let mask = (1u64 << k) - 1;
+                pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
+            } else {
+                pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
+            }
+        }
+
+        // U64 div, rem by non-power-of-2
+        DivRemByConstInfo::DivU64(n1, d) |
+        DivRemByConstInfo::RemU64(n1, d) => {
+            debug_assert!(d >= 3);
+            let MU64 {
+                mulBy,
+                doAdd,
+                shiftBy,
+            } = magicU64(d);
+            let qf; // final quotient
+            let q0 = pos.ins().iconst(I64, mulBy as i64);
+            let q1 = pos.ins().umulhi(n1, q0);
+            if doAdd {
+                debug_assert!(shiftBy >= 1 && shiftBy <= 64);
+                let t1 = pos.ins().isub(n1, q1);
+                let t2 = pos.ins().ushr_imm(t1, 1);
+                let t3 = pos.ins().iadd(t2, q1);
+                // I never found any case where shiftBy == 1 here.
+                // So there's no attempt to fold out a zero shift.
+                debug_assert_ne!(shiftBy, 1);
+                qf = pos.ins().ushr_imm(t3, (shiftBy - 1) as i64);
+            } else {
+                debug_assert!(shiftBy >= 0 && shiftBy <= 63);
+                // Whereas there are known cases here for shiftBy == 0.
+                if shiftBy > 0 {
+                    qf = pos.ins().ushr_imm(q1, shiftBy as i64);
+                } else {
+                    qf = q1;
+                }
+            }
+            // Now qf holds the final quotient. If necessary calculate the
+            // remainder instead.
+            if isRem {
+                let tt = pos.ins().imul_imm(qf, d as i64);
+                pos.func.dfg.replace(inst).isub(n1, tt);
+            } else {
+                pos.func.dfg.replace(inst).copy(qf);
+            }
+        }
+
+        // -------------------- S32 --------------------
+
+        // S32 div, rem by zero or -1: ignore
+        DivRemByConstInfo::DivS32(_n1, -1) |
+        DivRemByConstInfo::RemS32(_n1, -1) |
+        DivRemByConstInfo::DivS32(_n1, 0) |
+        DivRemByConstInfo::RemS32(_n1, 0) => {}
+
+        // S32 div by 1: identity
+        // S32 rem by 1: zero
+        DivRemByConstInfo::DivS32(n1, 1) |
+        DivRemByConstInfo::RemS32(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I32, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        DivRemByConstInfo::DivS32(n1, d) |
+        DivRemByConstInfo::RemS32(n1, d) => {
+            if let Some((isNeg, k)) = isPowerOf2_S32(d) {
+                // k can be 31 only in the case that d is -2^31.
+                debug_assert!(k >= 1 && k <= 31);
+                let t1 = if k - 1 == 0 {
+                    n1
+                } else {
+                    pos.ins().sshr_imm(n1, (k - 1) as i64)
+                };
+                let t2 = pos.ins().ushr_imm(t1, (32 - k) as i64);
+                let t3 = pos.ins().iadd(n1, t2);
+                if isRem {
+                    // S32 rem by a power-of-2
+                    let t4 = pos.ins().band_imm(t3, i32::wrapping_neg(1 << k) as i64);
+                    // Curiously, we don't care here what the sign of d is.
+                    pos.func.dfg.replace(inst).isub(n1, t4);
+                } else {
+                    // S32 div by a power-of-2
+                    let t4 = pos.ins().sshr_imm(t3, k as i64);
+                    if isNeg {
+                        pos.func.dfg.replace(inst).irsub_imm(t4, 0);
+                    } else {
+                        pos.func.dfg.replace(inst).copy(t4);
+                    }
+                }
+            } else {
+                // S32 div, rem by a non-power-of-2
+                debug_assert!(d < -2 || d > 2);
+                let MS32 { mulBy, shiftBy } = magicS32(d);
+                let q0 = pos.ins().iconst(I32, mulBy as i64);
+                let q1 = pos.ins().smulhi(n1, q0);
+                let q2 = if d > 0 && mulBy < 0 {
+                    pos.ins().iadd(q1, n1)
+                } else if d < 0 && mulBy > 0 {
+                    pos.ins().isub(q1, n1)
+                } else {
+                    q1
+                };
+                debug_assert!(shiftBy >= 0 && shiftBy <= 31);
+                let q3 = if shiftBy == 0 {
+                    q2
+                } else {
+                    pos.ins().sshr_imm(q2, shiftBy as i64)
+                };
+                let t1 = pos.ins().ushr_imm(q3, 31);
+                let qf = pos.ins().iadd(q3, t1);
+                // Now qf holds the final quotient. If necessary calculate
+                // the remainder instead.
+                if isRem {
+                    let tt = pos.ins().imul_imm(qf, d as i64);
+                    pos.func.dfg.replace(inst).isub(n1, tt);
+                } else {
+                    pos.func.dfg.replace(inst).copy(qf);
+                }
+            }
+        }
+
+        // -------------------- S64 --------------------
+
+        // S64 div, rem by zero or -1: ignore
+        DivRemByConstInfo::DivS64(_n1, -1) |
+        DivRemByConstInfo::RemS64(_n1, -1) |
+        DivRemByConstInfo::DivS64(_n1, 0) |
+        DivRemByConstInfo::RemS64(_n1, 0) => {}
+
+        // S64 div by 1: identity
+        // S64 rem by 1: zero
+        DivRemByConstInfo::DivS64(n1, 1) |
+        DivRemByConstInfo::RemS64(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I64, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        DivRemByConstInfo::DivS64(n1, d) |
+        DivRemByConstInfo::RemS64(n1, d) => {
+            if let Some((isNeg, k)) = isPowerOf2_S64(d) {
+                // k can be 63 only in the case that d is -2^63.
+                debug_assert!(k >= 1 && k <= 63);
+                let t1 = if k - 1 == 0 {
+                    n1
+                } else {
+                    pos.ins().sshr_imm(n1, (k - 1) as i64)
+                };
+                let t2 = pos.ins().ushr_imm(t1, (64 - k) as i64);
+                let t3 = pos.ins().iadd(n1, t2);
+                if isRem {
+                    // S64 rem by a power-of-2
+                    let t4 = pos.ins().band_imm(t3, i64::wrapping_neg(1 << k));
+                    // Curiously, we don't care here what the sign of d is.
+                    pos.func.dfg.replace(inst).isub(n1, t4);
+                } else {
+                    // S64 div by a power-of-2
+                    let t4 = pos.ins().sshr_imm(t3, k as i64);
+                    if isNeg {
+                        pos.func.dfg.replace(inst).irsub_imm(t4, 0);
+                    } else {
+                        pos.func.dfg.replace(inst).copy(t4);
+                    }
+                }
+            } else {
+                // S64 div, rem by a non-power-of-2
+                debug_assert!(d < -2 || d > 2);
+                let MS64 { mulBy, shiftBy } = magicS64(d);
+                let q0 = pos.ins().iconst(I64, mulBy);
+                let q1 = pos.ins().smulhi(n1, q0);
+                let q2 = if d > 0 && mulBy < 0 {
+                    pos.ins().iadd(q1, n1)
+                } else if d < 0 && mulBy > 0 {
+                    pos.ins().isub(q1, n1)
+                } else {
+                    q1
+                };
+                debug_assert!(shiftBy >= 0 && shiftBy <= 63);
+                let q3 = if shiftBy == 0 {
+                    q2
+                } else {
+                    pos.ins().sshr_imm(q2, shiftBy as i64)
+                };
+                let t1 = pos.ins().ushr_imm(q3, 63);
+                let qf = pos.ins().iadd(q3, t1);
+                // Now qf holds the final quotient. If necessary calculate
+                // the remainder instead.
+                if isRem {
+                    let tt = pos.ins().imul_imm(qf, d);
+                    pos.func.dfg.replace(inst).isub(n1, tt);
+                } else {
+                    pos.func.dfg.replace(inst).copy(qf);
+                }
+            }
+        }
+    }
+}
+
+/// Apply basic simplifications.
+///
+/// This folds constants with arithmetic to form `_imm` instructions, and other
+/// minor simplifications.
+fn simplify(pos: &mut FuncCursor, inst: Inst) {
+    match pos.func.dfg[inst] {
+        InstructionData::Binary { opcode, args } => {
+            if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
+                if let InstructionData::UnaryImm {
+                    opcode: Opcode::Iconst,
+                    mut imm,
+                } = pos.func.dfg[iconst_inst]
+                {
+                    let new_opcode = match opcode {
+                        Opcode::Iadd => Opcode::IaddImm,
+                        Opcode::Imul => Opcode::ImulImm,
+                        Opcode::Sdiv => Opcode::SdivImm,
+                        Opcode::Udiv => Opcode::UdivImm,
+                        Opcode::Srem => Opcode::SremImm,
+                        Opcode::Urem => Opcode::UremImm,
+                        Opcode::Band => Opcode::BandImm,
+                        Opcode::Bor => Opcode::BorImm,
+                        Opcode::Bxor => Opcode::BxorImm,
+                        Opcode::Rotl => Opcode::RotlImm,
+                        Opcode::Rotr => Opcode::RotrImm,
+                        Opcode::Ishl => Opcode::IshlImm,
+                        Opcode::Ushr => Opcode::UshrImm,
+                        Opcode::Sshr => Opcode::SshrImm,
+                        Opcode::Isub => {
+                            imm = imm.wrapping_neg();
+                            Opcode::IaddImm
+                        }
+                        _ => return,
+                    };
+                    let ty = pos.func.dfg.ctrl_typevar(inst);
+                    pos.func.dfg.replace(inst).BinaryImm(
+                        new_opcode,
+                        ty,
+                        imm,
+                        args[0],
+                    );
+                }
+            } else if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[0]) {
+                if let InstructionData::UnaryImm {
+                    opcode: Opcode::Iconst,
+                    imm,
+                } = pos.func.dfg[iconst_inst]
+                {
+                    let new_opcode = match opcode {
+                        Opcode::Isub => Opcode::IrsubImm,
+                        _ => return,
+                    };
+                    let ty = pos.func.dfg.ctrl_typevar(inst);
+                    pos.func.dfg.replace(inst).BinaryImm(
+                        new_opcode,
+                        ty,
+                        imm,
+                        args[1],
+                    );
+                }
+            }
+        }
+        InstructionData::IntCompare { opcode, cond, args } => {
+            debug_assert_eq!(opcode, Opcode::Icmp);
+            if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
+                if let InstructionData::UnaryImm {
+                    opcode: Opcode::Iconst,
+                    imm,
+                } = pos.func.dfg[iconst_inst]
+                {
+                    pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
+                }
+            }
+        }
+        InstructionData::CondTrap { .. } |
+        InstructionData::Branch { .. } |
+        InstructionData::Ternary { opcode: Opcode::Select, .. } => {
+            // Fold away a redundant `bint`.
+            let maybe = {
+                let args = pos.func.dfg.inst_args(inst);
+                if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
+                    if let InstructionData::Unary {
+                        opcode: Opcode::Bint,
+                        arg: bool_val,
+                    } = pos.func.dfg[def_inst]
+                    {
+                        Some(bool_val)
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                }
+            };
+            if let Some(bool_val) = maybe {
+                let args = pos.func.dfg.inst_args_mut(inst);
+                args[0] = bool_val;
+            }
+        }
+        _ => {}
+    }
+}
+
+/// The main pre-opt pass.
+pub fn do_preopt(func: &mut Function) {
+    let _tt = timing::preopt();
+    let mut pos = FuncCursor::new(func);
+    while let Some(_ebb) = pos.next_ebb() {
+        while let Some(inst) = pos.next_inst() {
+            // Apply basic simplifications.
+            simplify(&mut pos, inst);
+
+            //-- BEGIN -- division by constants ----------------
+
+            let mb_dri = get_div_info(inst, &pos.func.dfg);
+            if let Some(divrem_info) = mb_dri {
+                do_divrem_transformation(&divrem_info, &mut pos, inst);
+                continue;
+            }
+
+            //-- END -- division by constants ------------------
+        }
+    }
+}
--- a/lib/codegen/src/print_errors.rs
+++ b/lib/codegen/src/print_errors.rs
@@ -0,0 +1,34 @@
+//! Utility routines for pretty-printing error messages.
+
+use ir;
+use isa::TargetIsa;
+use result::CtonError;
+use std::fmt::Write;
+use std::string::{String, ToString};
+use verifier;
+
+/// Pretty-print a verifier error.
+pub fn pretty_verifier_error(
+    func: &ir::Function,
+    isa: Option<&TargetIsa>,
+    err: &verifier::Error,
+) -> String {
+    let mut msg = err.to_string();
+    match err.location {
+        ir::entities::AnyEntity::Inst(inst) => {
+            write!(msg, "\n{}: {}\n\n", inst, func.dfg.display_inst(inst, isa)).unwrap()
+        }
+        _ => msg.push('\n'),
+    }
+    write!(msg, "{}", func.display(isa)).unwrap();
+    msg
+}
+
+/// Pretty-print a Cretonne error.
+pub fn pretty_error(func: &ir::Function, isa: Option<&TargetIsa>, err: CtonError) -> String {
+    if let CtonError::Verifier(e) = err {
+        pretty_verifier_error(func, isa, &e)
+    } else {
+        err.to_string()
+    }
+}
--- a/lib/codegen/src/ref_slice.rs
+++ b/lib/codegen/src/ref_slice.rs
@@ -0,0 +1,18 @@
+//! Functions for converting a reference into a singleton slice.
+//!
+//! See also the [`ref_slice` crate](https://crates.io/crates/ref_slice).
+//!
+//! We define the functions here to avoid external dependencies, and to ensure that they are
+//! inlined in this crate.
+//!
+//! Despite their using an unsafe block, these functions are completely safe.
+
+use std::slice;
+
+pub fn ref_slice<T>(s: &T) -> &[T] {
+    unsafe { slice::from_raw_parts(s, 1) }
+}
+
+pub fn ref_slice_mut<T>(s: &mut T) -> &mut [T] {
+    unsafe { slice::from_raw_parts_mut(s, 1) }
+}
--- a/lib/codegen/src/regalloc/affinity.rs
+++ b/lib/codegen/src/regalloc/affinity.rs
@@ -0,0 +1,131 @@
+//! Value affinity for register allocation.
+//!
+//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class
+//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy
+//! instruction operand constraints.
+//!
+//! For values that want to be in registers, the affinity hint includes a register class or
+//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a
+//! larger register class instead.
+
+use ir::{AbiParam, ArgumentLoc};
+use isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa};
+use std::fmt;
+
+/// Preferred register allocation for an SSA value.
+#[derive(Clone, Copy, Debug)]
+pub enum Affinity {
+    /// No affinity.
+    ///
+    /// This indicates a value that is not defined or used by any real instructions. It is a ghost
+    /// value that won't appear in the final program.
+    None,
+
+    /// This value should be placed in a spill slot on the stack.
+    Stack,
+
+    /// This value prefers a register from the given register class.
+    Reg(RegClassIndex),
+}
+
+impl Default for Affinity {
+    fn default() -> Self {
+        Affinity::None
+    }
+}
+
+impl Affinity {
+    /// Create an affinity that satisfies a single constraint.
+    ///
+    /// This will never create an `Affinity::None`.
+    /// Use the `Default` implementation for that.
+    pub fn new(constraint: &OperandConstraint) -> Affinity {
+        if constraint.kind == ConstraintKind::Stack {
+            Affinity::Stack
+        } else {
+            Affinity::Reg(constraint.regclass.into())
+        }
+    }
+
+    /// Create an affinity that matches an ABI argument for `isa`.
+    pub fn abi(arg: &AbiParam, isa: &TargetIsa) -> Affinity {
+        match arg.location {
+            ArgumentLoc::Unassigned => Affinity::None,
+            ArgumentLoc::Reg(_) => Affinity::Reg(isa.regclass_for_abi_type(arg.value_type).into()),
+            ArgumentLoc::Stack(_) => Affinity::Stack,
+        }
+    }
+
+    /// Is this the `None` affinity?
+    pub fn is_none(self) -> bool {
+        match self {
+            Affinity::None => true,
+            _ => false,
+        }
+    }
+
+    /// Is this the `Reg` affinity?
+    pub fn is_reg(self) -> bool {
+        match self {
+            Affinity::Reg(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Is this the `Stack` affinity?
+    pub fn is_stack(self) -> bool {
+        match self {
+            Affinity::Stack => true,
+            _ => false,
+        }
+    }
+
+    /// Merge an operand constraint into this affinity.
+    ///
+    /// Note that this does not guarantee that the register allocator will pick a register that
+    /// satisfies the constraint.
+    pub fn merge(&mut self, constraint: &OperandConstraint, reg_info: &RegInfo) {
+        match *self {
+            Affinity::None => *self = Affinity::new(constraint),
+            Affinity::Reg(rc) => {
+                // If the preferred register class is a subclass of the constraint, there's no need
+                // to change anything.
+                if constraint.kind != ConstraintKind::Stack &&
+                    !constraint.regclass.has_subclass(rc)
+                {
+                    // If the register classes don't overlap, `intersect` returns `None`, and we
+                    // just keep our previous affinity.
+                    if let Some(subclass) = constraint.regclass.intersect_index(reg_info.rc(rc)) {
+                        // This constraint shrinks our preferred register class.
+                        *self = Affinity::Reg(subclass);
+                    }
+                }
+            }
+            Affinity::Stack => {}
+        }
+    }
+
+    /// Return an object that can display this value affinity, using the register info from the
+    /// target ISA.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayAffinity<'a> {
+        DisplayAffinity(self, regs.into())
+    }
+}
+
+/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA.
+pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayAffinity<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self.0 {
+            Affinity::None => write!(f, "none"),
+            Affinity::Stack => write!(f, "stack"),
+            Affinity::Reg(rci) => {
+                match self.1 {
+                    Some(regs) => write!(f, "{}", regs.rc(rci)),
+                    None => write!(f, "{}", rci),
+                }
+            }
+        }
+    }
+}
--- a/lib/codegen/src/regalloc/coalescing.rs
+++ b/lib/codegen/src/regalloc/coalescing.rs
--- a/lib/codegen/src/regalloc/coloring.rs
+++ b/lib/codegen/src/regalloc/coloring.rs
--- a/lib/codegen/src/regalloc/context.rs
+++ b/lib/codegen/src/regalloc/context.rs
@@ -0,0 +1,159 @@
+//! Register allocator context.
+//!
+//! The `Context` struct contains data structures that should be preserved across invocations of
+//! the register allocator algorithm. This doesn't preserve any data between functions, but it
+//! avoids allocating data structures independently for each function begin compiled.
+
+use dominator_tree::DominatorTree;
+use flowgraph::ControlFlowGraph;
+use ir::Function;
+use isa::TargetIsa;
+use regalloc::coalescing::Coalescing;
+use regalloc::coloring::Coloring;
+use regalloc::live_value_tracker::LiveValueTracker;
+use regalloc::liveness::Liveness;
+use regalloc::reload::Reload;
+use regalloc::spilling::Spilling;
+use regalloc::virtregs::VirtRegs;
+use result::CtonResult;
+use timing;
+use topo_order::TopoOrder;
+use verifier::{verify_context, verify_cssa, verify_liveness, verify_locations};
+
+/// Persistent memory allocations for register allocation.
+pub struct Context {
+    liveness: Liveness,
+    virtregs: VirtRegs,
+    coalescing: Coalescing,
+    topo: TopoOrder,
+    tracker: LiveValueTracker,
+    spilling: Spilling,
+    reload: Reload,
+    coloring: Coloring,
+}
+
+impl Context {
+    /// Create a new context for register allocation.
+    ///
+    /// This context should be reused for multiple functions in order to avoid repeated memory
+    /// allocations.
+    pub fn new() -> Self {
+        Self {
+            liveness: Liveness::new(),
+            virtregs: VirtRegs::new(),
+            coalescing: Coalescing::new(),
+            topo: TopoOrder::new(),
+            tracker: LiveValueTracker::new(),
+            spilling: Spilling::new(),
+            reload: Reload::new(),
+            coloring: Coloring::new(),
+        }
+    }
+
+    /// Clear all data structures in this context.
+    pub fn clear(&mut self) {
+        self.liveness.clear();
+        self.virtregs.clear();
+        self.coalescing.clear();
+        self.topo.clear();
+        self.tracker.clear();
+        self.spilling.clear();
+        self.reload.clear();
+        self.coloring.clear();
+    }
+
+    /// Allocate registers in `func`.
+    ///
+    /// After register allocation, all values in `func` have been assigned to a register or stack
+    /// location that is consistent with instruction encoding constraints.
+    pub fn run(
+        &mut self,
+        isa: &TargetIsa,
+        func: &mut Function,
+        cfg: &ControlFlowGraph,
+        domtree: &mut DominatorTree,
+    ) -> CtonResult {
+        let _tt = timing::regalloc();
+        debug_assert!(domtree.is_valid());
+
+        // `Liveness` and `Coloring` are self-clearing.
+        self.virtregs.clear();
+
+        // Tracker state (dominator live sets) is actually reused between the spilling and coloring
+        // phases.
+        self.tracker.clear();
+
+        // Pass: Liveness analysis.
+        self.liveness.compute(isa, func, cfg);
+
+        if isa.flags().enable_verifier() {
+            verify_liveness(isa, func, cfg, &self.liveness)?;
+        }
+
+        // Pass: Coalesce and create Conventional SSA form.
+        self.coalescing.conventional_ssa(
+            isa,
+            func,
+            cfg,
+            domtree,
+            &mut self.liveness,
+            &mut self.virtregs,
+        );
+
+        if isa.flags().enable_verifier() {
+            verify_context(func, cfg, domtree, isa)?;
+            verify_liveness(isa, func, cfg, &self.liveness)?;
+            verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
+        }
+
+        // Pass: Spilling.
+        self.spilling.run(
+            isa,
+            func,
+            domtree,
+            &mut self.liveness,
+            &self.virtregs,
+            &mut self.topo,
+            &mut self.tracker,
+        );
+
+        if isa.flags().enable_verifier() {
+            verify_context(func, cfg, domtree, isa)?;
+            verify_liveness(isa, func, cfg, &self.liveness)?;
+            verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
+        }
+
+        // Pass: Reload.
+        self.reload.run(
+            isa,
+            func,
+            domtree,
+            &mut self.liveness,
+            &mut self.topo,
+            &mut self.tracker,
+        );
+
+        if isa.flags().enable_verifier() {
+            verify_context(func, cfg, domtree, isa)?;
+            verify_liveness(isa, func, cfg, &self.liveness)?;
+            verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
+        }
+
+        // Pass: Coloring.
+        self.coloring.run(
+            isa,
+            func,
+            domtree,
+            &mut self.liveness,
+            &mut self.tracker,
+        );
+
+        if isa.flags().enable_verifier() {
+            verify_context(func, cfg, domtree, isa)?;
+            verify_liveness(isa, func, cfg, &self.liveness)?;
+            verify_locations(isa, func, Some(&self.liveness))?;
+            verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
+        }
+        Ok(())
+    }
+}
--- a/lib/codegen/src/regalloc/diversion.rs
+++ b/lib/codegen/src/regalloc/diversion.rs
@@ -0,0 +1,215 @@
+//! Register diversions.
+//!
+//! Normally, a value is assigned to a single register or stack location by the register allocator.
+//! Sometimes, it is necessary to move register values to a different register in order to satisfy
+//! instruction constraints.
+//!
+//! These register diversions are local to an EBB. No values can be diverted when entering a new
+//! EBB.
+
+use ir::{InstructionData, Opcode};
+use ir::{StackSlot, Value, ValueLoc, ValueLocations};
+use isa::{RegInfo, RegUnit};
+use std::fmt;
+use std::vec::Vec;
+
+/// A diversion of a value from its original location to a new register or stack location.
+///
+/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the
+/// same value.
+///
+/// When tracking diversions, the `from` field is the original assigned value location, and `to` is
+/// the current one.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct Diversion {
+    /// The value that is diverted.
+    pub value: Value,
+    /// The original value location.
+    pub from: ValueLoc,
+    /// The current value location.
+    pub to: ValueLoc,
+}
+
+impl Diversion {
+    /// Make a new diversion.
+    pub fn new(value: Value, from: ValueLoc, to: ValueLoc) -> Diversion {
+        debug_assert!(from.is_assigned() && to.is_assigned());
+        Diversion { value, from, to }
+    }
+}
+
+/// Keep track of diversions in an EBB.
+pub struct RegDiversions {
+    current: Vec<Diversion>,
+}
+
+impl RegDiversions {
+    /// Create a new empty diversion tracker.
+    pub fn new() -> Self {
+        Self { current: Vec::new() }
+    }
+
+    /// Clear the tracker, preparing for a new EBB.
+    pub fn clear(&mut self) {
+        self.current.clear()
+    }
+
+    /// Are there any diversions?
+    pub fn is_empty(&self) -> bool {
+        self.current.is_empty()
+    }
+
+    /// Get the current diversion of `value`, if any.
+    pub fn diversion(&self, value: Value) -> Option<&Diversion> {
+        self.current.iter().find(|d| d.value == value)
+    }
+
+    /// Get all current diversions.
+    pub fn all(&self) -> &[Diversion] {
+        self.current.as_slice()
+    }
+
+    /// Get the current location for `value`. Fall back to the assignment map for non-diverted
+    /// values
+    pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc {
+        match self.diversion(value) {
+            Some(d) => d.to,
+            None => locations[value],
+        }
+    }
+
+    /// Get the current register location for `value`, or panic if `value` isn't in a register.
+    pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit {
+        self.get(value, locations).unwrap_reg()
+    }
+
+    /// Get the current stack location for `value`, or panic if `value` isn't in a stack slot.
+    pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot {
+        self.get(value, locations).unwrap_stack()
+    }
+
+    /// Record any kind of move.
+    ///
+    /// The `from` location must match an existing `to` location, if any.
+    pub fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) {
+        debug_assert!(from.is_assigned() && to.is_assigned());
+        if let Some(i) = self.current.iter().position(|d| d.value == value) {
+            debug_assert_eq!(self.current[i].to, from, "Bad regmove chain for {}", value);
+            if self.current[i].from != to {
+                self.current[i].to = to;
+            } else {
+                self.current.swap_remove(i);
+            }
+        } else {
+            self.current.push(Diversion::new(value, from, to));
+        }
+    }
+
+    /// Record a register -> register move.
+    pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) {
+        self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to));
+    }
+
+    /// Record a register -> stack move.
+    pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) {
+        self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to));
+    }
+
+    /// Record a stack -> register move.
+    pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) {
+        self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to));
+    }
+
+    /// Apply the effect of `inst`.
+    ///
+    /// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to
+    /// match.
+    pub fn apply(&mut self, inst: &InstructionData) {
+        match *inst {
+            InstructionData::RegMove {
+                opcode: Opcode::Regmove,
+                arg,
+                src,
+                dst,
+            } => self.regmove(arg, src, dst),
+            InstructionData::RegSpill {
+                opcode: Opcode::Regspill,
+                arg,
+                src,
+                dst,
+            } => self.regspill(arg, src, dst),
+            InstructionData::RegFill {
+                opcode: Opcode::Regfill,
+                arg,
+                src,
+                dst,
+            } => self.regfill(arg, src, dst),
+            _ => {}
+        }
+    }
+
+    /// Drop any recorded move for `value`.
+    ///
+    /// Returns the `to` location of the removed diversion.
+    pub fn remove(&mut self, value: Value) -> Option<ValueLoc> {
+        self.current.iter().position(|d| d.value == value).map(
+            |i| {
+                self.current.swap_remove(i).to
+            },
+        )
+    }
+
+    /// Return an object that can display the diversions.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayDiversions<'a> {
+        DisplayDiversions(self, regs.into())
+    }
+}
+
+/// Object that displays register diversions.
+pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayDiversions<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{{")?;
+        for div in self.0.all() {
+            write!(
+                f,
+                " {}: {} -> {}",
+                div.value,
+                div.from.display(self.1),
+                div.to.display(self.1)
+            )?
+        }
+        write!(f, " }}")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use entity::EntityRef;
+    use ir::Value;
+
+    #[test]
+    fn inserts() {
+        let mut divs = RegDiversions::new();
+        let v1 = Value::new(1);
+        let v2 = Value::new(2);
+
+        divs.regmove(v1, 10, 12);
+        assert_eq!(
+            divs.diversion(v1),
+            Some(&Diversion {
+                value: v1,
+                from: ValueLoc::Reg(10),
+                to: ValueLoc::Reg(12),
+            })
+        );
+        assert_eq!(divs.diversion(v2), None);
+
+        divs.regmove(v1, 12, 11);
+        assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11));
+        divs.regmove(v1, 11, 10);
+        assert_eq!(divs.diversion(v1), None);
+    }
+}
--- a/lib/codegen/src/regalloc/live_value_tracker.rs
+++ b/lib/codegen/src/regalloc/live_value_tracker.rs
@@ -0,0 +1,348 @@
+//! Track which values are live in an EBB with instruction granularity.
+//!
+//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in an EBB.
+//! The sets of live values are computed on the fly as the tracker is moved from instruction to
+//! instruction, starting at the EBB header.
+
+use dominator_tree::DominatorTree;
+use entity::{EntityList, ListPool};
+use ir::{DataFlowGraph, Ebb, ExpandedProgramPoint, Inst, Layout, Value};
+use partition_slice::partition_slice;
+use regalloc::affinity::Affinity;
+use regalloc::liveness::Liveness;
+use regalloc::liverange::LiveRange;
+use std::collections::HashMap;
+use std::vec::Vec;
+
+type ValueList = EntityList<Value>;
+
+/// Compute and track live values throughout an EBB.
+pub struct LiveValueTracker {
+    /// The set of values that are live at the current program point.
+    live: LiveValueVec,
+
+    /// Saved set of live values for every jump and branch that can potentially be an immediate
+    /// dominator of an EBB.
+    ///
+    /// This is the set of values that are live *before* the branch.
+    idom_sets: HashMap<Inst, ValueList>,
+
+    /// Memory pool for the live sets.
+    idom_pool: ListPool<Value>,
+}
+
+/// Information about a value that is live at the current program point.
+#[derive(Debug)]
+pub struct LiveValue {
+    /// The live value.
+    pub value: Value,
+
+    /// The local ending point of the live range in the current EBB, as returned by
+    /// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`.
+    pub endpoint: Inst,
+
+    /// The affinity of the value as represented in its `LiveRange`.
+    ///
+    /// This value is simply a copy of the affinity stored in the live range. We copy it because
+    /// almost all users of `LiveValue` need to look at it.
+    pub affinity: Affinity,
+
+    /// The live range for this value never leaves its EBB.
+    pub is_local: bool,
+
+    /// This value is dead - the live range ends immediately.
+    pub is_dead: bool,
+}
+
+struct LiveValueVec {
+    /// The set of values that are live at the current program point.
+    values: Vec<LiveValue>,
+
+    /// How many values at the front of `values` are known to be live after `inst`?
+    ///
+    /// This is used to pass a much smaller slice to `partition_slice` when its called a second
+    /// time for the same instruction.
+    live_prefix: Option<(Inst, usize)>,
+}
+
+impl LiveValueVec {
+    fn new() -> Self {
+        Self {
+            values: Vec::new(),
+            live_prefix: None,
+        }
+    }
+
+    /// Add a new live value to `values`. Copy some properties from `lr`.
+    fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) {
+        self.values.push(LiveValue {
+            value,
+            endpoint,
+            affinity: lr.affinity,
+            is_local: lr.is_local(),
+            is_dead: lr.is_dead(),
+        });
+    }
+
+    /// Remove all elements.
+    fn clear(&mut self) {
+        self.values.clear();
+        self.live_prefix = None;
+    }
+
+    /// Make sure that the values killed by `next_inst` are moved to the end of the `values`
+    /// vector.
+    ///
+    /// Returns the number of values that will be live after `next_inst`.
+    fn live_after(&mut self, next_inst: Inst) -> usize {
+        // How many values at the front of the vector are already known to survive `next_inst`?
+        // We don't need to pass this prefix to `partition_slice()`
+        let keep = match self.live_prefix {
+            Some((i, prefix)) if i == next_inst => prefix,
+            _ => 0,
+        };
+
+        // Move the remaining surviving values to the front partition of the vector.
+        let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst);
+
+        // Remember the new prefix length in case we get called again for the same `next_inst`.
+        self.live_prefix = Some((next_inst, prefix));
+        prefix
+    }
+
+    /// Remove the values killed by `next_inst`.
+    fn remove_kill_values(&mut self, next_inst: Inst) {
+        let keep = self.live_after(next_inst);
+        self.values.truncate(keep);
+    }
+
+    /// Remove any dead values.
+    fn remove_dead_values(&mut self) {
+        self.values.retain(|v| !v.is_dead);
+        self.live_prefix = None;
+    }
+}
+
+impl LiveValueTracker {
+    /// Create a new blank tracker.
+    pub fn new() -> Self {
+        Self {
+            live: LiveValueVec::new(),
+            idom_sets: HashMap::new(),
+            idom_pool: ListPool::new(),
+        }
+    }
+
+    /// Clear all cached information.
+    pub fn clear(&mut self) {
+        self.live.clear();
+        self.idom_sets.clear();
+        self.idom_pool.clear();
+    }
+
+    /// Get the set of currently live values.
+    ///
+    /// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and
+    /// defined by the current instruction.
+    pub fn live(&self) -> &[LiveValue] {
+        &self.live.values
+    }
+
+    /// Get a mutable set of currently live values.
+    ///
+    /// Use with care and don't move entries around.
+    pub fn live_mut(&mut self) -> &mut [LiveValue] {
+        &mut self.live.values
+    }
+
+    /// Move the current position to the top of `ebb`.
+    ///
+    /// This depends on the stored live value set at `ebb`'s immediate dominator, so that must have
+    /// been visited first.
+    ///
+    /// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values
+    /// from the immediate dominator. The second slice is the set of `ebb` parameters.
+    ///
+    /// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them.
+    pub fn ebb_top(
+        &mut self,
+        ebb: Ebb,
+        dfg: &DataFlowGraph,
+        liveness: &Liveness,
+        layout: &Layout,
+        domtree: &DominatorTree,
+    ) -> (&[LiveValue], &[LiveValue]) {
+        // Start over, compute the set of live values at the top of the EBB from two sources:
+        //
+        // 1. Values that were live before `ebb`'s immediate dominator, filtered for those that are
+        //    actually live-in.
+        // 2. Arguments to `ebb` that are not dead.
+        //
+        self.live.clear();
+
+        // Compute the live-in values. Start by filtering the set of values that were live before
+        // the immediate dominator. Just use the empty set if there's no immediate dominator (i.e.,
+        // the entry block or an unreachable block).
+        if let Some(idom) = domtree.idom(ebb) {
+            // If the immediate dominator exits, we must have a stored list for it. This is a
+            // requirement to the order EBBs are visited: All dominators must have been processed
+            // before the current EBB.
+            let idom_live_list = self.idom_sets.get(&idom).expect(
+                "No stored live set for dominator",
+            );
+            let ctx = liveness.context(layout);
+            // Get just the values that are live-in to `ebb`.
+            for &value in idom_live_list.as_slice(&self.idom_pool) {
+                let lr = liveness.get(value).expect(
+                    "Immediate dominator value has no live range",
+                );
+
+                // Check if this value is live-in here.
+                if let Some(endpoint) = lr.livein_local_end(ebb, ctx) {
+                    self.live.push(value, endpoint, lr);
+                }
+            }
+        }
+
+        // Now add all the live parameters to `ebb`.
+        let first_arg = self.live.values.len();
+        for &value in dfg.ebb_params(ebb) {
+            let lr = &liveness[value];
+            debug_assert_eq!(lr.def(), ebb.into());
+            match lr.def_local_end().into() {
+                ExpandedProgramPoint::Inst(endpoint) => {
+                    self.live.push(value, endpoint, lr);
+                }
+                ExpandedProgramPoint::Ebb(local_ebb) => {
+                    // This is a dead EBB parameter which is not even live into the first
+                    // instruction in the EBB.
+                    debug_assert_eq!(
+                        local_ebb,
+                        ebb,
+                        "EBB parameter live range ends at wrong EBB header"
+                    );
+                    // Give this value a fake endpoint that is the first instruction in the EBB.
+                    // We expect it to be removed by calling `drop_dead_args()`.
+                    self.live.push(
+                        value,
+                        layout.first_inst(ebb).expect("Empty EBB"),
+                        lr,
+                    );
+                }
+            }
+        }
+
+        self.live.values.split_at(first_arg)
+    }
+
+    /// Prepare to move past `inst`.
+    ///
+    /// Determine the set of already live values that are killed by `inst`, and add the new defined
+    /// values to the tracked set.
+    ///
+    /// Returns `(throughs, kills, defs)` as a tuple of slices:
+    ///
+    /// 1. The `throughs` slice is the set of live-through values that are neither defined nor
+    ///    killed by the instruction.
+    /// 2. The `kills` slice is the set of values that were live before the instruction and are
+    ///    killed at the instruction. This does not include dead defs.
+    /// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes
+    ///    dead defines.
+    ///
+    /// The order of `throughs` and `kills` is arbitrary.
+    ///
+    /// The `drop_dead()` method must be called next to actually remove the dead values from the
+    /// tracked set after the two returned slices are no longer needed.
+    pub fn process_inst(
+        &mut self,
+        inst: Inst,
+        dfg: &DataFlowGraph,
+        liveness: &Liveness,
+    ) -> (&[LiveValue], &[LiveValue], &[LiveValue]) {
+        // Save a copy of the live values before any branches or jumps that could be somebody's
+        // immediate dominator.
+        if dfg[inst].opcode().is_branch() {
+            self.save_idom_live_set(inst);
+        }
+
+        // Move killed values to the end of the vector.
+        // Don't remove them yet, `drop_dead()` will do that.
+        let first_kill = self.live.live_after(inst);
+
+        // Add the values defined by `inst`.
+        let first_def = self.live.values.len();
+        for &value in dfg.inst_results(inst) {
+            let lr = &liveness[value];
+            debug_assert_eq!(lr.def(), inst.into());
+            match lr.def_local_end().into() {
+                ExpandedProgramPoint::Inst(endpoint) => {
+                    self.live.push(value, endpoint, lr);
+                }
+                ExpandedProgramPoint::Ebb(ebb) => {
+                    panic!("Instruction result live range can't end at {}", ebb);
+                }
+            }
+        }
+
+        (
+            &self.live.values[0..first_kill],
+            &self.live.values[first_kill..first_def],
+            &self.live.values[first_def..],
+        )
+    }
+
+    /// Prepare to move past a ghost instruction.
+    ///
+    /// This is like `process_inst`, except any defs are ignored.
+    ///
+    /// Returns `(throughs, kills)`.
+    pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) {
+        let first_kill = self.live.live_after(inst);
+        self.live.values.as_slice().split_at(first_kill)
+    }
+
+    /// Drop the values that are now dead after moving past `inst`.
+    ///
+    /// This removes both live values that were killed by `inst` and dead defines on `inst` itself.
+    ///
+    /// This must be called after `process_inst(inst)` and before proceeding to the next
+    /// instruction.
+    pub fn drop_dead(&mut self, inst: Inst) {
+        // Remove both live values that were killed by `inst` and dead defines from `inst`.
+        self.live.remove_kill_values(inst);
+    }
+
+    /// Drop any values that are marked as `is_dead`.
+    ///
+    /// Use this after calling `ebb_top` to clean out dead EBB parameters.
+    pub fn drop_dead_params(&mut self) {
+        self.live.remove_dead_values();
+    }
+
+    /// Process new spills.
+    ///
+    /// Any values where `f` returns true are spilled and will be treated as if their affinity was
+    /// `Stack`.
+    pub fn process_spills<F>(&mut self, mut f: F)
+    where
+        F: FnMut(Value) -> bool,
+    {
+        for lv in &mut self.live.values {
+            if f(lv.value) {
+                lv.affinity = Affinity::Stack;
+            }
+        }
+    }
+
+    /// Save the current set of live values so it is associated with `idom`.
+    fn save_idom_live_set(&mut self, idom: Inst) {
+        let values = self.live.values.iter().map(|lv| lv.value);
+        let pool = &mut self.idom_pool;
+        // If there already is a set saved for `idom`, just keep it.
+        self.idom_sets.entry(idom).or_insert_with(|| {
+            let mut list = ValueList::default();
+            list.extend(values, pool);
+            list
+        });
+    }
+}
--- a/lib/codegen/src/regalloc/liveness.rs
+++ b/lib/codegen/src/regalloc/liveness.rs
@@ -0,0 +1,458 @@
+//! Liveness analysis for SSA values.
+//!
+//! This module computes the live range of all the SSA values in a function and produces a
+//! `LiveRange` instance for each.
+//!
+//!
+//! # Liveness consumers
+//!
+//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each
+//! EBB and assigns a register to the defined values. This algorithm needs to maintain a set of the
+//! currently live values as it is iterating down the instructions in the EBB. It asks the
+//! following questions:
+//!
+//! - What is the set of live values at the entry to the EBB?
+//! - When moving past a use of a value, is that value still alive in the EBB, or was that the last
+//!   use?
+//! - When moving past a branch, which of the live values are still live below the branch?
+//!
+//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and
+//! `livein_local_end` queries. The coloring algorithm visits EBBs in a topological order of the
+//! dominator tree, so it can compute the set of live values at the beginning of an EBB by starting
+//! from the set of live values at the dominating branch instruction and filtering it with
+//! `livein_local_end`. These sets do not need to be stored in the liveness analysis.
+//!
+//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the
+//! number of live values at every program point and insert spill code until the number of
+//! registers needed is small enough.
+//!
+//!
+//! # Alternative algorithms
+//!
+//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few
+//! alternatives.
+//!
+//! ## Data-flow equations
+//!
+//! The classic *live variables analysis* that you will find in all compiler books from the
+//! previous century does not depend on SSA form. It is typically implemented by iteratively
+//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of
+//! variables for every basic block in the program.
+//!
+//! This algorithm has some disadvantages that makes us look elsewhere:
+//!
+//! - Quadratic memory use. We need a bit per variable per basic block in the function.
+//! - Sparse representation. In practice, the majority of SSA values never leave their basic block,
+//!   and those that do span basic blocks rarely span a large number of basic blocks. This makes
+//!   the bit-vectors quite sparse.
+//! - Traditionally, the data-flow equations were solved for real program *variables* which does
+//!   not include temporaries used in evaluating expressions. We have an SSA form program which
+//!   blurs the distinction between temporaries and variables. This makes the quadratic memory
+//!   problem worse because there are many more SSA values than there was variables in the original
+//!   program, and we don't know a priori which SSA values leave their basic block.
+//! - Missing last-use information. For values that are not live-out of a basic block, we would
+//!   need to store information about the last use in the block somewhere. LLVM stores this
+//!   information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a
+//!   source of problems for LLVM's register allocator.
+//!
+//! Data-flow equations can detect when a variable is used uninitialized, and they can handle
+//! multiple definitions of the same variable. We don't need this generality since we already have
+//! a program in SSA form.
+//!
+//! ## LLVM's liveness analysis
+//!
+//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is
+//! a disjoint union of related SSA values that should be assigned to the same physical register.
+//! It uses a compact data structure very similar to our `LiveRange`. The important difference is
+//! that Cretonne's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval`
+//! describes the live range of a virtual register *and* which one of the related SSA values is
+//! live at any given program point.
+//!
+//! LLVM computes the live range of each virtual register independently by using the use-def chains
+//! that are baked into its IR. The algorithm for a single virtual register is:
+//!
+//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using
+//!    the def-chain. This does not include any phi-values.
+//! 2. Go through the virtual register's use chain and perform the following steps at each use:
+//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks
+//!    that already contain some liveness and extend the last live SSA value in the block to be
+//!    live-out. Also build a list of new basic blocks where the register needs to be live-in.
+//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new
+//!    PHI values to be created when different SSA values can reach the same block.
+//!
+//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered
+//! one SSA value.
+//!
+//! This algorithm has some advantages compared to the data-flow equations:
+//!
+//! - The live ranges of local virtual registers are computed very quickly without ever traversing
+//!   the CFG. The memory needed to store these live ranges is independent of the number of basic
+//!   blocks in the program.
+//! - The time to compute the live range of a global virtual register is proportional to the number
+//!   of basic blocks covered. Many virtual registers only cover a few blocks, even in very large
+//!   functions.
+//! - A single live range can be recomputed after making modifications to the IR. No global
+//!   algorithm is necessary. This feature depends on having use-def chains for virtual registers
+//!   which Cretonne doesn't.
+//!
+//! Cretonne uses a very similar data structures and algorithms to LLVM, with the important
+//! difference that live ranges are computed per SSA value instead of per virtual register, and the
+//! uses in Cretonne IR refers to SSA values instead of virtual registers. This means that Cretonne
+//! can skip the last step of reconstructing SSA form for the virtual register uses.
+//!
+//! ## Fast Liveness Checking for SSA-Form Programs
+//!
+//! A liveness analysis that is often brought up in the context of SSA-based register allocation
+//! was presented at CGO 2008:
+//!
+//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness
+//! Checking for SSA-Form Programs.* CGO.
+//!
+//! This analysis uses a global pre-computation that only depends on the CFG of the function. It
+//! then allows liveness queries for any (value, program point) pair. Each query traverses the use
+//! chain of the value and performs lookups in the precomputed bit-vectors.
+//!
+//! I did not seriously consider this analysis for Cretonne because:
+//!
+//! - It depends critically on use chains which Cretonne doesn't have.
+//! - Popular variables like the `this` pointer in a C++ method can have very large use chains.
+//!   Traversing such a long use chain on every liveness lookup has the potential for some nasty
+//!   quadratic behavior in unfortunate cases.
+//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow
+//!   based approach, which isn't that impressive.
+//!
+//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cretonne
+//! gains use chains, this approach would be worth a proper evaluation.
+//!
+//!
+//! # Cretonne's liveness analysis
+//!
+//! The algorithm implemented in this module is similar to LLVM's with these differences:
+//!
+//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual
+//!   register.
+//! - Instructions in Cretonne IR contains references to SSA values, not virtual registers.
+//! - All live ranges are computed in one traversal of the program. Cretonne doesn't have use
+//!   chains, so it is not possible to compute the live range for a single SSA value independently.
+//!
+//! The liveness computation visits all instructions in the program. The order is not important for
+//! the algorithm to be correct. At each instruction, the used values are examined.
+//!
+//! - The first time a value is encountered, its live range is constructed as a dead live range
+//!   containing only the defining program point.
+//! - The local interval of the value's live range is extended so it reaches the use. This may
+//!   require creating a new live-in local interval for the EBB.
+//! - If the live range became live-in to the EBB, add the EBB to a work-list.
+//! - While the work-list is non-empty pop a live-in EBB and repeat the two steps above, using each
+//!   of the live-in EBB's CFG predecessor instructions as a 'use'.
+//!
+//! The effect of this algorithm is to extend the live range of each to reach uses as they are
+//! visited. No data about each value beyond the live range is needed between visiting uses, so
+//! nothing is lost by computing the live range of all values simultaneously.
+//!
+//! ## Cache efficiency of Cretonne vs LLVM
+//!
+//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the
+//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use
+//! chain order, some cache thrashing can occur as a result of pulling instructions into cache
+//! somewhat chaotically.
+//!
+//! Cretonne uses a transposed algorithm, visiting instructions in order. This means that each
+//! instruction is brought into cache only once, and it is likely that the other instructions on
+//! the same cache line will be visited before the line is evicted.
+//!
+//! Cretonne's problem is that the `LiveRange` structs are visited many times and not always
+//! regularly. We should strive to make the `LiveRange` struct as small as possible such that
+//! multiple related values can live on the same cache line.
+//!
+//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current
+//!   implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the
+//!   size to 32 bytes.
+//! - Related values should be stored on the same cache line. The current sparse set implementation
+//!   does a decent job of that.
+//! - For global values, the list of live-in intervals is very likely to fit on a single cache
+//!   line. These lists are very likely to be found in L2 cache at least.
+//!
+//! There is some room for improvement.
+
+use entity::SparseMap;
+use flowgraph::ControlFlowGraph;
+use ir::dfg::ValueDef;
+use ir::{Ebb, Function, Inst, Layout, ProgramPoint, Value};
+use isa::{EncInfo, TargetIsa};
+use regalloc::affinity::Affinity;
+use regalloc::liverange::{LiveRange, LiveRangeContext, LiveRangeForest};
+use std::mem;
+use std::ops::Index;
+use std::vec::Vec;
+use timing;
+
+/// A set of live ranges, indexed by value number.
+type LiveRangeSet = SparseMap<Value, LiveRange>;
+
+/// Get a mutable reference to the live range for `value`.
+/// Create it if necessary.
+fn get_or_create<'a>(
+    lrset: &'a mut LiveRangeSet,
+    value: Value,
+    isa: &TargetIsa,
+    func: &Function,
+    enc_info: &EncInfo,
+) -> &'a mut LiveRange {
+    // It would be better to use `get_mut()` here, but that leads to borrow checker fighting
+    // which can probably only be resolved by non-lexical lifetimes.
+    // https://github.com/rust-lang/rfcs/issues/811
+    if lrset.get(value).is_none() {
+        // Create a live range for value. We need the program point that defines it.
+        let def;
+        let affinity;
+        match func.dfg.value_def(value) {
+            ValueDef::Result(inst, rnum) => {
+                def = inst.into();
+                // Initialize the affinity from the defining instruction's result constraints.
+                // Don't do this for call return values which are always tied to a single register.
+                affinity = enc_info
+                    .operand_constraints(func.encodings[inst])
+                    .and_then(|rc| rc.outs.get(rnum))
+                    .map(Affinity::new)
+                    .or_else(|| {
+                        // If this is a call, get the return value affinity.
+                        func.dfg.call_signature(inst).map(|sig| {
+                            Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa)
+                        })
+                    })
+                    .unwrap_or_default();
+            }
+            ValueDef::Param(ebb, num) => {
+                def = ebb.into();
+                if func.layout.entry_block() == Some(ebb) {
+                    // The affinity for entry block parameters can be inferred from the function
+                    // signature.
+                    affinity = Affinity::abi(&func.signature.params[num], isa);
+                } else {
+                    // Give normal EBB parameters a register affinity matching their type.
+                    let rc = isa.regclass_for_abi_type(func.dfg.value_type(value));
+                    affinity = Affinity::Reg(rc.into());
+                }
+            }
+        };
+        lrset.insert(LiveRange::new(value, def, affinity));
+    }
+    lrset.get_mut(value).unwrap()
+}
+
+/// Extend the live range for `value` so it reaches `to` which must live in `ebb`.
+fn extend_to_use(
+    lr: &mut LiveRange,
+    ebb: Ebb,
+    to: Inst,
+    worklist: &mut Vec<Ebb>,
+    func: &Function,
+    cfg: &ControlFlowGraph,
+    forest: &mut LiveRangeForest,
+) {
+    // This is our scratch working space, and we'll leave it empty when we return.
+    debug_assert!(worklist.is_empty());
+
+    // Extend the range locally in `ebb`.
+    // If there already was a live interval in that block, we're done.
+    if lr.extend_in_ebb(ebb, to, &func.layout, forest) {
+        worklist.push(ebb);
+    }
+
+    // The work list contains those EBBs where we have learned that the value needs to be
+    // live-in.
+    //
+    // This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the
+    // CFG from the existing live range to `ebb`.
+    //
+    // Extend the live range as we go. The live range itself also serves as a visited set since
+    // `extend_in_ebb` will never return true twice for the same EBB.
+    //
+    while let Some(livein) = worklist.pop() {
+        // We've learned that the value needs to be live-in to the `livein` EBB.
+        // Make sure it is also live at all predecessor branches to `livein`.
+        for (pred, branch) in cfg.pred_iter(livein) {
+            if lr.extend_in_ebb(pred, branch, &func.layout, forest) {
+                // This predecessor EBB also became live-in. We need to process it later.
+                worklist.push(pred);
+            }
+        }
+    }
+}
+
+/// Liveness analysis for a function.
+///
+/// Compute a live range for every SSA value used in the function.
+pub struct Liveness {
+    /// The live ranges that have been computed so far.
+    ranges: LiveRangeSet,
+
+    /// Memory pool for the live ranges.
+    forest: LiveRangeForest,
+
+    /// Working space for the `extend_to_use` algorithm.
+    /// This vector is always empty, except for inside that function.
+    /// It lives here to avoid repeated allocation of scratch memory.
+    worklist: Vec<Ebb>,
+}
+
+impl Liveness {
+    /// Create a new empty liveness analysis.
+    ///
+    /// The memory allocated for this analysis can be reused for multiple functions. Use the
+    /// `compute` method to actually runs the analysis for a function.
+    pub fn new() -> Self {
+        Self {
+            ranges: LiveRangeSet::new(),
+            forest: LiveRangeForest::new(),
+            worklist: Vec::new(),
+        }
+    }
+
+    /// Get a context needed for working with a `LiveRange`.
+    pub fn context<'a>(&'a self, layout: &'a Layout) -> LiveRangeContext<'a, Layout> {
+        LiveRangeContext::new(layout, &self.forest)
+    }
+
+    /// Clear all data structures in this liveness analysis.
+    pub fn clear(&mut self) {
+        self.ranges.clear();
+        self.forest.clear();
+        self.worklist.clear();
+    }
+
+    /// Get the live range for `value`, if it exists.
+    pub fn get(&self, value: Value) -> Option<&LiveRange> {
+        self.ranges.get(value)
+    }
+
+    /// Create a new live range for `value`.
+    ///
+    /// The new live range will be defined at `def` with no extent, like a dead value.
+    ///
+    /// This asserts that `value` does not have an existing live range.
+    pub fn create_dead<PP>(&mut self, value: Value, def: PP, affinity: Affinity)
+    where
+        PP: Into<ProgramPoint>,
+    {
+        let old = self.ranges.insert(
+            LiveRange::new(value, def.into(), affinity),
+        );
+        debug_assert!(old.is_none(), "{} already has a live range", value);
+    }
+
+    /// Move the definition of `value` to `def`.
+    ///
+    /// The old and new def points must be in the same EBB, and before the end of the live range.
+    pub fn move_def_locally<PP>(&mut self, value: Value, def: PP)
+    where
+        PP: Into<ProgramPoint>,
+    {
+        let lr = self.ranges.get_mut(value).expect("Value has no live range");
+        lr.move_def_locally(def.into());
+    }
+
+    /// Locally extend the live range for `value` to reach `user`.
+    ///
+    /// It is assumed the `value` is already live before `user` in `ebb`.
+    ///
+    /// Returns a mutable reference to the value's affinity in case that also needs to be updated.
+    pub fn extend_locally(
+        &mut self,
+        value: Value,
+        ebb: Ebb,
+        user: Inst,
+        layout: &Layout,
+    ) -> &mut Affinity {
+        debug_assert_eq!(Some(ebb), layout.inst_ebb(user));
+        let lr = self.ranges.get_mut(value).expect("Value has no live range");
+        let livein = lr.extend_in_ebb(ebb, user, layout, &mut self.forest);
+        debug_assert!(!livein, "{} should already be live in {}", value, ebb);
+        &mut lr.affinity
+    }
+
+    /// Change the affinity of `value` to `Stack` and return the previous affinity.
+    pub fn spill(&mut self, value: Value) -> Affinity {
+        let lr = self.ranges.get_mut(value).expect("Value has no live range");
+        mem::replace(&mut lr.affinity, Affinity::Stack)
+    }
+
+    /// Compute the live ranges of all SSA values used in `func`.
+    /// This clears out any existing analysis stored in this data structure.
+    pub fn compute(&mut self, isa: &TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
+        let _tt = timing::ra_liveness();
+        self.ranges.clear();
+
+        // Get ISA data structures used for computing live range affinities.
+        let enc_info = isa.encoding_info();
+        let reg_info = isa.register_info();
+
+        // The liveness computation needs to visit all uses, but the order doesn't matter.
+        // TODO: Perhaps this traversal of the function could be combined with a dead code
+        // elimination pass if we visit a post-order of the dominator tree?
+        // TODO: Resolve value aliases while we're visiting instructions?
+        for ebb in func.layout.ebbs() {
+            // Make sure we have created live ranges for dead EBB parameters.
+            // TODO: If these parameters are really dead, we could remove them, except for the
+            // entry block which must match the function signature.
+            for &arg in func.dfg.ebb_params(ebb) {
+                get_or_create(&mut self.ranges, arg, isa, func, &enc_info);
+            }
+
+            for inst in func.layout.ebb_insts(ebb) {
+                // Eliminate all value aliases, they would confuse the register allocator.
+                func.dfg.resolve_aliases_in_arguments(inst);
+
+                // Make sure we have created live ranges for dead defs.
+                // TODO: When we implement DCE, we can use the absence of a live range to indicate
+                // an unused value.
+                for &def in func.dfg.inst_results(inst) {
+                    get_or_create(&mut self.ranges, def, isa, func, &enc_info);
+                }
+
+                // Iterator of constraints, one per value operand.
+                let encoding = func.encodings[inst];
+                let mut operand_constraints = enc_info
+                    .operand_constraints(encoding)
+                    .map(|c| c.ins)
+                    .unwrap_or(&[])
+                    .iter();
+
+                for &arg in func.dfg.inst_args(inst) {
+                    // Get the live range, create it as a dead range if necessary.
+                    let lr = get_or_create(&mut self.ranges, arg, isa, func, &enc_info);
+
+                    // Extend the live range to reach this use.
+                    extend_to_use(
+                        lr,
+                        ebb,
+                        inst,
+                        &mut self.worklist,
+                        func,
+                        cfg,
+                        &mut self.forest,
+                    );
+
+                    // Apply operand constraint, ignoring any variable arguments after the fixed
+                    // operands described by `operand_constraints`. Variable arguments are either
+                    // EBB arguments or call/return ABI arguments.
+                    if let Some(constraint) = operand_constraints.next() {
+                        lr.affinity.merge(constraint, &reg_info);
+                    }
+                }
+            }
+        }
+    }
+}
+
+impl Index<Value> for Liveness {
+    type Output = LiveRange;
+
+    fn index(&self, index: Value) -> &LiveRange {
+        match self.ranges.get(index) {
+            Some(lr) => lr,
+            None => panic!("{} has no live range", index),
+        }
+    }
+}
--- a/lib/codegen/src/regalloc/liverange.rs
+++ b/lib/codegen/src/regalloc/liverange.rs
@@ -0,0 +1,748 @@
+//! Data structure representing the live range of an SSA value.
+//!
+//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of
+//! an SSA value begins where it is defined and extends to all program points where the value is
+//! still needed.
+//!
+//! # Local Live Ranges
+//!
+//! Inside a single extended basic block, the live range of a value is always an interval between
+//! two program points (if the value is live in the EBB at all). The starting point is either:
+//!
+//! 1. The instruction that defines the value, or
+//! 2. The EBB header, because the value is an argument to the EBB, or
+//! 3. The EBB header, because the value is defined in another EBB and live-in to this one.
+//!
+//! The ending point of the local live range is the last of the following program points in the
+//! EBB:
+//!
+//! 1. The last use in the EBB, where a *use* is an instruction that has the value as an argument.
+//! 2. The last branch or jump instruction in the EBB that can reach a use.
+//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it.
+//!
+//! Note that 2. includes loop back-edges to the same EBB. In general, if a value is defined
+//! outside a loop and used inside the loop, it will be live in the entire loop.
+//!
+//! # Global Live Ranges
+//!
+//! Values that appear in more than one EBB have a *global live range* which can be seen as the
+//! disjoint union of the per-EBB local intervals for all of the EBBs where the value is live.
+//! Together with a `ProgramOrder` which provides a linear ordering of the EBBs, the global live
+//! range becomes a linear sequence of disjoint intervals, at most one per EBB.
+//!
+//! In the special case of a dead value, the global live range is a single interval where the start
+//! and end points are the same. The global live range of a value is never completely empty.
+//!
+//! # Register interference
+//!
+//! The register allocator uses live ranges to determine if values *interfere*, which means that
+//! they can't be stored in the same register. Two live ranges interfere if and only if any of
+//! their intervals overlap.
+//!
+//! If one live range ends at an instruction that defines another live range, those two live ranges
+//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input
+//! register for an output value. If Cretonne gets support for inline assembly, we will need to
+//! handle *early clobbers* which are output registers that are not allowed to alias any input
+//! registers.
+//!
+//! If `i1 < i2 < i3` are program points, we have:
+//!
+//! - `i1-i2` and `i1-i3` interfere because the intervals overlap.
+//! - `i1-i2` and `i2-i3` don't interfere.
+//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register.
+//! - `i1-i2` and `i2-i2` don't interfere.
+//! - `i2-i3` and `i2-i2` do interfere.
+//!
+//! Because of this behavior around interval end points, live range interference is not completely
+//! equivalent to mathematical intersection of open or half-open intervals.
+//!
+//! # Implementation notes
+//!
+//! A few notes about the implementation of this data structure. This should not concern someone
+//! only looking to use the public interface.
+//!
+//! ## EBB ordering
+//!
+//! The relative order of EBBs is used to maintain a sorted list of live-in intervals and to
+//! coalesce adjacent live-in intervals when the prior interval covers the whole EBB. This doesn't
+//! depend on any property of the program order, so alternative orderings are possible:
+//!
+//! 1. The EBB layout order. This is what we currently use.
+//! 2. A topological order of the dominator tree. All the live-in intervals would come after the
+//!    def interval.
+//! 3. A numerical order by EBB number. Performant because it doesn't need to indirect through the
+//!    `ProgramOrder` for comparisons.
+//!
+//! These orderings will cause small differences in coalescing opportunities, but all of them would
+//! do a decent job of compressing a long live range. The numerical order might be preferable
+//! because:
+//!
+//! - It has better performance because EBB numbers can be compared directly without any table
+//!   lookups.
+//! - If EBB numbers are not reused, it is safe to allocate new EBBs without getting spurious
+//!   live-in intervals from any coalesced representations that happen to cross a new EBB.
+//!
+//! For comparing instructions, the layout order is always what we want.
+//!
+//! ## Alternative representation
+//!
+//! Since a local live-in interval always begins at its EBB header, it is uniquely described by its
+//! end point instruction alone. We can use the layout to look up the EBB containing the end point.
+//! This means that a sorted `Vec<Inst>` would be enough to represent the set of live-in intervals.
+//!
+//! Coalescing is an important compression technique because some live ranges can span thousands of
+//! EBBs. We can represent that by switching to a sorted `Vec<ProgramPoint>` representation where
+//! an `[Ebb, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding
+//! `Ebb` entry represents a single live-in interval.
+//!
+//! This representation is more compact for a live range with many uncoalesced live-in intervals.
+//! It is more complicated to work with, though, so it is probably not worth it. The performance
+//! benefits of switching to a numerical EBB order only appears if the binary search is doing
+//! EBB-EBB comparisons.
+//!
+//! ## B-tree representation
+//!
+//! A `BTreeMap<Ebb, Inst>` could also be used for the live-in intervals. It looks like the
+//! standard library B-tree doesn't provide the necessary interface for an efficient implementation
+//! of coalescing, so we would need to roll our own.
+//!
+
+use bforest;
+use entity::SparseMapValue;
+use ir::{Ebb, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value};
+use regalloc::affinity::Affinity;
+use std::cmp::Ordering;
+
+/// Global live range of a single SSA value.
+///
+/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an
+/// SSA value is the disjoint union of a set of intervals, each local to a single EBB, and with at
+/// most one interval per EBB. We further distinguish between:
+///
+/// 1. The *def interval* is the local interval in the EBB where the value is defined, and
+/// 2. The *live-in intervals* are the local intervals in the remaining EBBs.
+///
+/// A live-in interval always begins at the EBB header, while the def interval can begin at the
+/// defining instruction, or at the EBB header for an EBB argument value.
+///
+/// All values have a def interval, but a large proportion of values don't have any live-in
+/// intervals. These are called *local live ranges*.
+///
+/// # Program order requirements
+///
+/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for
+/// ordering instructions inside an EBB *and* for ordering EBBs. The methods that depend on the
+/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to
+/// ensure that the provided ordering is consistent between calls.
+///
+/// In particular, changing the order of EBBs or inserting new EBBs will invalidate live ranges.
+///
+/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the
+/// instructions using or defining their value, `LiveRange` structs can contain references to
+/// branch and jump instructions.
+pub type LiveRange = GenLiveRange<Layout>;
+
+/// Generic live range implementation.
+///
+/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order.
+/// Use `LiveRange` instead of using this generic directly.
+pub struct GenLiveRange<PO: ProgramOrder> {
+    /// The value described by this live range.
+    /// This member can't be modified in case the live range is stored in a `SparseMap`.
+    value: Value,
+
+    /// The preferred register allocation for this value.
+    pub affinity: Affinity,
+
+    /// The instruction or EBB header where this value is defined.
+    def_begin: ProgramPoint,
+
+    /// The end point of the def interval. This must always belong to the same EBB as `def_begin`.
+    ///
+    /// We always have `def_begin <= def_end` with equality implying a dead def live range with no
+    /// uses.
+    def_end: ProgramPoint,
+
+    /// Additional live-in intervals sorted in program order.
+    ///
+    /// This map is empty for most values which are only used in one EBB.
+    ///
+    /// A map entry `ebb -> inst` means that the live range is live-in to `ebb`, continuing up to
+    /// `inst` which may belong to a later EBB in the program order.
+    ///
+    /// The entries are non-overlapping, and none of them overlap the EBB where the value is
+    /// defined.
+    liveins: bforest::Map<Ebb, Inst, PO>,
+}
+
+/// Context information needed to query a `LiveRange`.
+pub struct LiveRangeContext<'a, PO: 'a + ProgramOrder> {
+    /// Ordering of EBBs.
+    pub order: &'a PO,
+    /// Memory pool.
+    pub forest: &'a bforest::MapForest<Ebb, Inst, PO>,
+}
+
+impl<'a, PO: ProgramOrder> LiveRangeContext<'a, PO> {
+    /// Make a new context.
+    pub fn new(
+        order: &'a PO,
+        forest: &'a bforest::MapForest<Ebb, Inst, PO>,
+    ) -> LiveRangeContext<'a, PO> {
+        LiveRangeContext { order, forest }
+    }
+}
+
+impl<'a, PO: ProgramOrder> Clone for LiveRangeContext<'a, PO> {
+    fn clone(&self) -> Self {
+        LiveRangeContext {
+            order: self.order,
+            forest: self.forest,
+        }
+    }
+}
+
+impl<'a, PO: ProgramOrder> Copy for LiveRangeContext<'a, PO> {}
+
+/// Forest of B-trees used for storing live ranges.
+pub type LiveRangeForest = bforest::MapForest<Ebb, Inst, Layout>;
+
+impl<PO: ProgramOrder> bforest::Comparator<Ebb> for PO {
+    fn cmp(&self, a: Ebb, b: Ebb) -> Ordering {
+        self.cmp(a, b)
+    }
+}
+
+impl<PO: ProgramOrder> GenLiveRange<PO> {
+    /// Create a new live range for `value` defined at `def`.
+    ///
+    /// The live range will be created as dead, but it can be extended with `extend_in_ebb()`.
+    pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> GenLiveRange<PO> {
+        GenLiveRange {
+            value,
+            affinity,
+            def_begin: def,
+            def_end: def,
+            liveins: bforest::Map::new(),
+        }
+    }
+
+    /// Extend the local interval for `ebb` so it reaches `to` which must belong to `ebb`.
+    /// Create a live-in interval if necessary.
+    ///
+    /// If the live range already has a local interval in `ebb`, extend its end point so it
+    /// includes `to`, and return false.
+    ///
+    /// If the live range did not previously have a local interval in `ebb`, add one so the value
+    /// is live-in to `ebb`, extending to `to`. Return true.
+    ///
+    /// The return value can be used to detect if we just learned that the value is live-in to
+    /// `ebb`. This can trigger recursive extensions in `ebb`'s CFG predecessor blocks.
+    pub fn extend_in_ebb(
+        &mut self,
+        ebb: Ebb,
+        to: Inst,
+        order: &PO,
+        forest: &mut bforest::MapForest<Ebb, Inst, PO>,
+    ) -> bool {
+        // First check if we're extending the def interval.
+        //
+        // We're assuming here that `to` never precedes `def_begin` in the same EBB, but we can't
+        // check it without a method for getting `to`'s EBB.
+        if order.cmp(ebb, self.def_end) != Ordering::Greater &&
+            order.cmp(to, self.def_begin) != Ordering::Less
+        {
+            let to_pp = to.into();
+            debug_assert_ne!(
+                to_pp,
+                self.def_begin,
+                "Can't use value in the defining instruction."
+            );
+            if order.cmp(to, self.def_end) == Ordering::Greater {
+                self.def_end = to_pp;
+            }
+            return false;
+        }
+
+        // Now check if we're extending any of the existing live-in intervals.
+        let mut c = self.liveins.cursor(forest, order);
+        let first_time_livein;
+
+        if let Some(end) = c.goto(ebb) {
+            // There's an interval beginning at `ebb`. See if it extends.
+            first_time_livein = false;
+            if order.cmp(end, to) == Ordering::Less {
+                *c.value_mut().unwrap() = to;
+            } else {
+                return first_time_livein;
+            }
+        } else if let Some((_, end)) = c.prev() {
+            // There's no interval beginning at `ebb`, but we could still be live-in at `ebb` with
+            // a coalesced interval that begins before and ends after.
+            if order.cmp(end, ebb) == Ordering::Greater {
+                // Yep, the previous interval overlaps `ebb`.
+                first_time_livein = false;
+                if order.cmp(end, to) == Ordering::Less {
+                    *c.value_mut().unwrap() = to;
+                } else {
+                    return first_time_livein;
+                }
+            } else {
+                first_time_livein = true;
+                // The current interval does not overlap `ebb`, but it may still be possible to
+                // coalesce with it.
+                if order.is_ebb_gap(end, ebb) {
+                    *c.value_mut().unwrap() = to;
+                } else {
+                    c.insert(ebb, to);
+                }
+            }
+        } else {
+            // There is no existing interval before `ebb`.
+            first_time_livein = true;
+            c.insert(ebb, to);
+        }
+
+        // Now `c` to left pointing at an interval that ends in `to`.
+        debug_assert_eq!(c.value(), Some(to));
+
+        // See if it can be coalesced with the following interval.
+        if let Some((next_ebb, next_end)) = c.next() {
+            if order.is_ebb_gap(to, next_ebb) {
+                // Remove this interval and extend the previous end point to `next_end`.
+                c.remove();
+                c.prev();
+                *c.value_mut().unwrap() = next_end;
+            }
+        }
+
+        first_time_livein
+    }
+
+    /// Is this the live range of a dead value?
+    ///
+    /// A dead value has no uses, and its live range ends at the same program point where it is
+    /// defined.
+    pub fn is_dead(&self) -> bool {
+        self.def_begin == self.def_end
+    }
+
+    /// Is this a local live range?
+    ///
+    /// A local live range is only used in the same EBB where it was defined. It is allowed to span
+    /// multiple basic blocks within that EBB.
+    pub fn is_local(&self) -> bool {
+        self.liveins.is_empty()
+    }
+
+    /// Get the program point where this live range is defined.
+    ///
+    /// This will be an EBB header when the value is an EBB argument, otherwise it is the defining
+    /// instruction.
+    pub fn def(&self) -> ProgramPoint {
+        self.def_begin
+    }
+
+    /// Move the definition of this value to a new program point.
+    ///
+    /// It is only valid to move the definition within the same EBB, and it can't be moved beyond
+    /// `def_local_end()`.
+    pub fn move_def_locally(&mut self, def: ProgramPoint) {
+        self.def_begin = def;
+    }
+
+    /// Get the local end-point of this live range in the EBB where it is defined.
+    ///
+    /// This can be the EBB header itself in the case of a dead EBB argument.
+    /// Otherwise, it will be the last local use or branch/jump that can reach a use.
+    pub fn def_local_end(&self) -> ProgramPoint {
+        self.def_end
+    }
+
+    /// Get the local end-point of this live range in an EBB where it is live-in.
+    ///
+    /// If this live range is not live-in to `ebb`, return `None`. Otherwise, return the end-point
+    /// of this live range's local interval in `ebb`.
+    ///
+    /// If the live range is live through all of `ebb`, the terminator of `ebb` is a correct
+    /// answer, but it is also possible that an even later program point is returned. So don't
+    /// depend on the returned `Inst` to belong to `ebb`.
+    pub fn livein_local_end(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> Option<Inst> {
+        self.liveins
+            .get_or_less(ebb, ctx.forest, ctx.order)
+            .and_then(|(_, inst)| {
+                // We have an entry that ends at `inst`.
+                if ctx.order.cmp(inst, ebb) == Ordering::Greater {
+                    Some(inst)
+                } else {
+                    None
+                }
+            })
+    }
+
+    /// Is this value live-in to `ebb`?
+    ///
+    /// An EBB argument is not considered to be live in.
+    pub fn is_livein(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
+        self.livein_local_end(ebb, ctx).is_some()
+    }
+
+    /// Get all the live-in intervals.
+    ///
+    /// Note that the intervals are stored in a compressed form so each entry may span multiple
+    /// EBBs where the value is live in.
+    pub fn liveins<'a>(
+        &'a self,
+        ctx: LiveRangeContext<'a, PO>,
+    ) -> bforest::MapIter<'a, Ebb, Inst, PO> {
+        self.liveins.iter(ctx.forest)
+    }
+
+    /// Check if this live range overlaps a definition in `ebb`.
+    pub fn overlaps_def(
+        &self,
+        def: ExpandedProgramPoint,
+        ebb: Ebb,
+        ctx: LiveRangeContext<PO>,
+    ) -> bool {
+        // Two defs at the same program point always overlap, even if one is dead.
+        if def == self.def_begin.into() {
+            return true;
+        }
+
+        // Check for an overlap with the local range.
+        if ctx.order.cmp(def, self.def_begin) != Ordering::Less &&
+            ctx.order.cmp(def, self.def_end) == Ordering::Less
+        {
+            return true;
+        }
+
+        // Check for an overlap with a live-in range.
+        match self.livein_local_end(ebb, ctx) {
+            Some(inst) => ctx.order.cmp(def, inst) == Ordering::Less,
+            None => false,
+        }
+    }
+
+    /// Check if this live range reaches a use at `user` in `ebb`.
+    pub fn reaches_use(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
+        // Check for an overlap with the local range.
+        if ctx.order.cmp(user, self.def_begin) == Ordering::Greater &&
+            ctx.order.cmp(user, self.def_end) != Ordering::Greater
+        {
+            return true;
+        }
+
+        // Check for an overlap with a live-in range.
+        match self.livein_local_end(ebb, ctx) {
+            Some(inst) => ctx.order.cmp(user, inst) != Ordering::Greater,
+            None => false,
+        }
+    }
+
+    /// Check if this live range is killed at `user` in `ebb`.
+    pub fn killed_at(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
+        self.def_local_end() == user.into() || self.livein_local_end(ebb, ctx) == Some(user)
+    }
+}
+
+/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values.
+impl<PO: ProgramOrder> SparseMapValue<Value> for GenLiveRange<PO> {
+    fn key(&self) -> Value {
+        self.value
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{GenLiveRange, LiveRangeContext};
+    use bforest;
+    use entity::EntityRef;
+    use ir::{Ebb, Inst, Value};
+    use ir::{ExpandedProgramPoint, ProgramOrder};
+    use std::cmp::Ordering;
+    use std::vec::Vec;
+
+    // Dummy program order which simply compares indexes.
+    // It is assumed that EBBs have indexes that are multiples of 10, and instructions have indexes
+    // in between. `is_ebb_gap` assumes that terminator instructions have indexes of the form
+    // ebb * 10 + 1. This is used in the coalesce test.
+    struct ProgOrder {}
+
+    impl ProgramOrder for ProgOrder {
+        fn cmp<A, B>(&self, a: A, b: B) -> Ordering
+        where
+            A: Into<ExpandedProgramPoint>,
+            B: Into<ExpandedProgramPoint>,
+        {
+            fn idx(pp: ExpandedProgramPoint) -> usize {
+                match pp {
+                    ExpandedProgramPoint::Inst(i) => i.index(),
+                    ExpandedProgramPoint::Ebb(e) => e.index(),
+                }
+            }
+
+            let ia = idx(a.into());
+            let ib = idx(b.into());
+            ia.cmp(&ib)
+        }
+
+        fn is_ebb_gap(&self, inst: Inst, ebb: Ebb) -> bool {
+            inst.index() % 10 == 1 && ebb.index() / 10 == inst.index() / 10 + 1
+        }
+    }
+
+    impl ProgOrder {
+        // Get the EBB corresponding to `inst`.
+        fn inst_ebb(&self, inst: Inst) -> Ebb {
+            let i = inst.index();
+            Ebb::new(i - i % 10)
+        }
+
+        // Get the EBB of a program point.
+        fn pp_ebb<PP: Into<ExpandedProgramPoint>>(&self, pp: PP) -> Ebb {
+            match pp.into() {
+                ExpandedProgramPoint::Inst(i) => self.inst_ebb(i),
+                ExpandedProgramPoint::Ebb(e) => e,
+            }
+        }
+
+        // Validate the live range invariants.
+        fn validate(
+            &self,
+            lr: &GenLiveRange<ProgOrder>,
+            forest: &bforest::MapForest<Ebb, Inst, ProgOrder>,
+        ) {
+            // The def interval must cover a single EBB.
+            let def_ebb = self.pp_ebb(lr.def_begin);
+            assert_eq!(def_ebb, self.pp_ebb(lr.def_end));
+
+            // Check that the def interval isn't backwards.
+            match self.cmp(lr.def_begin, lr.def_end) {
+                Ordering::Equal => assert!(lr.liveins.is_empty()),
+                Ordering::Greater => {
+                    panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end)
+                }
+                Ordering::Less => {}
+            }
+
+            // Check the live-in intervals.
+            let mut prev_end = None;
+            for (begin, end) in lr.liveins.iter(forest) {
+                assert_eq!(self.cmp(begin, end), Ordering::Less);
+                if let Some(e) = prev_end {
+                    assert_eq!(self.cmp(e, begin), Ordering::Less);
+                }
+
+                assert!(
+                    self.cmp(lr.def_end, begin) == Ordering::Less ||
+                        self.cmp(lr.def_begin, end) == Ordering::Greater,
+                    "Interval can't overlap the def EBB"
+                );
+
+                // Save for next round.
+                prev_end = Some(end);
+            }
+        }
+    }
+
+    // Singleton `ProgramOrder` for tests below.
+    const PO: &'static ProgOrder = &ProgOrder {};
+
+    #[test]
+    fn dead_def_range() {
+        let v0 = Value::new(0);
+        let e0 = Ebb::new(0);
+        let i1 = Inst::new(1);
+        let i2 = Inst::new(2);
+        let e2 = Ebb::new(2);
+        let lr = GenLiveRange::new(v0, i1.into(), Default::default());
+        let forest = &bforest::MapForest::new();
+        let ctx = LiveRangeContext::new(PO, forest);
+        assert!(lr.is_dead());
+        assert!(lr.is_local());
+        assert_eq!(lr.def(), i1.into());
+        assert_eq!(lr.def_local_end(), i1.into());
+        assert_eq!(lr.livein_local_end(e2, ctx), None);
+        PO.validate(&lr, ctx.forest);
+
+        // A dead live range overlaps its own def program point.
+        assert!(lr.overlaps_def(i1.into(), e0, ctx));
+        assert!(!lr.overlaps_def(i2.into(), e0, ctx));
+        assert!(!lr.overlaps_def(e0.into(), e0, ctx));
+    }
+
+    #[test]
+    fn dead_arg_range() {
+        let v0 = Value::new(0);
+        let e2 = Ebb::new(2);
+        let lr = GenLiveRange::new(v0, e2.into(), Default::default());
+        let forest = &bforest::MapForest::new();
+        let ctx = LiveRangeContext::new(PO, forest);
+        assert!(lr.is_dead());
+        assert!(lr.is_local());
+        assert_eq!(lr.def(), e2.into());
+        assert_eq!(lr.def_local_end(), e2.into());
+        // The def interval of an EBB argument does not count as live-in.
+        assert_eq!(lr.livein_local_end(e2, ctx), None);
+        PO.validate(&lr, ctx.forest);
+    }
+
+    #[test]
+    fn local_def() {
+        let v0 = Value::new(0);
+        let e10 = Ebb::new(10);
+        let i11 = Inst::new(11);
+        let i12 = Inst::new(12);
+        let i13 = Inst::new(13);
+        let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
+        let forest = &mut bforest::MapForest::new();
+
+        assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert!(!lr.is_dead());
+        assert!(lr.is_local());
+        assert_eq!(lr.def(), i11.into());
+        assert_eq!(lr.def_local_end(), i13.into());
+
+        // Extending to an already covered inst should not change anything.
+        assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert_eq!(lr.def(), i11.into());
+        assert_eq!(lr.def_local_end(), i13.into());
+    }
+
+    #[test]
+    fn local_arg() {
+        let v0 = Value::new(0);
+        let e10 = Ebb::new(10);
+        let i11 = Inst::new(11);
+        let i12 = Inst::new(12);
+        let i13 = Inst::new(13);
+        let mut lr = GenLiveRange::new(v0, e10.into(), Default::default());
+        let forest = &mut bforest::MapForest::new();
+
+        // Extending a dead EBB argument in its own block should not indicate that a live-in
+        // interval was created.
+        assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert!(!lr.is_dead());
+        assert!(lr.is_local());
+        assert_eq!(lr.def(), e10.into());
+        assert_eq!(lr.def_local_end(), i12.into());
+
+        // Extending to an already covered inst should not change anything.
+        assert_eq!(lr.extend_in_ebb(e10, i11, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert_eq!(lr.def(), e10.into());
+        assert_eq!(lr.def_local_end(), i12.into());
+
+        // Extending further.
+        assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert_eq!(lr.def(), e10.into());
+        assert_eq!(lr.def_local_end(), i13.into());
+    }
+
+    #[test]
+    fn global_def() {
+        let v0 = Value::new(0);
+        let e10 = Ebb::new(10);
+        let i11 = Inst::new(11);
+        let i12 = Inst::new(12);
+        let e20 = Ebb::new(20);
+        let i21 = Inst::new(21);
+        let i22 = Inst::new(22);
+        let i23 = Inst::new(23);
+        let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
+        let forest = &mut bforest::MapForest::new();
+
+        assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
+
+        // Adding a live-in interval.
+        assert_eq!(lr.extend_in_ebb(e20, i22, PO, forest), true);
+        PO.validate(&lr, forest);
+        assert_eq!(
+            lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
+            Some(i22)
+        );
+
+        // Non-extending the live-in.
+        assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), false);
+        assert_eq!(
+            lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
+            Some(i22)
+        );
+
+        // Extending the existing live-in.
+        assert_eq!(lr.extend_in_ebb(e20, i23, PO, forest), false);
+        PO.validate(&lr, forest);
+        assert_eq!(
+            lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
+            Some(i23)
+        );
+    }
+
+    #[test]
+    fn coalesce() {
+        let v0 = Value::new(0);
+        let i11 = Inst::new(11);
+        let e20 = Ebb::new(20);
+        let i21 = Inst::new(21);
+        let e30 = Ebb::new(30);
+        let i31 = Inst::new(31);
+        let e40 = Ebb::new(40);
+        let i41 = Inst::new(41);
+        let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
+        let forest = &mut bforest::MapForest::new();
+
+        assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e30, i31)]
+        );
+
+        // Coalesce to previous
+        assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e30, i41)]
+        );
+
+        // Coalesce to next
+        assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e20, i41)]
+        );
+
+        let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
+
+        assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e40, i41)]
+        );
+
+        assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e20, i21), (e40, i41)]
+        );
+
+        // Coalesce to previous and next
+        assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
+        assert_eq!(
+            lr.liveins(LiveRangeContext::new(PO, forest))
+                .collect::<Vec<_>>(),
+            [(e20, i41)]
+        );
+    }
+
+    // TODO: Add more tests that exercise the binary search algorithm.
+}
--- a/lib/codegen/src/regalloc/mod.rs
+++ b/lib/codegen/src/regalloc/mod.rs
@@ -0,0 +1,23 @@
+//! Register allocation.
+//!
+//! This module contains data structures and algorithms used for register allocation.
+
+pub mod register_set;
+pub mod coloring;
+pub mod live_value_tracker;
+pub mod liveness;
+pub mod liverange;
+pub mod virtregs;
+
+mod affinity;
+mod coalescing;
+mod context;
+mod diversion;
+mod pressure;
+mod reload;
+mod solver;
+mod spilling;
+
+pub use self::register_set::RegisterSet;
+pub use self::context::Context;
+pub use self::diversion::RegDiversions;
--- a/lib/codegen/src/regalloc/pressure.rs
+++ b/lib/codegen/src/regalloc/pressure.rs
@@ -0,0 +1,377 @@
+//! Register pressure tracking.
+//!
+//! SSA-based register allocation depends on a spilling phase that "lowers register pressure
+//! sufficiently". This module defines the data structures needed to measure register pressure
+//! accurately enough to guarantee that the coloring phase will not run out of registers.
+//!
+//! Ideally, measuring register pressure amounts to simply counting the number of live registers at
+//! any given program point. This simplistic method has two problems:
+//!
+//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point
+//!    register banks, so we need to at least count the number of live registers in each register
+//!    bank separately.
+//!
+//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM
+//!    ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register.
+//!    This makes it difficult to accurately measure register pressure.
+//!
+//! This module deals with the problems via *register banks* and *top-level register classes*.
+//! Register classes in different register banks are completely independent, so we can count
+//! registers in one bank without worrying about the other bank at all.
+//!
+//! All register classes have a unique top-level register class, and we will count registers for
+//! each top-level register class individually. However, a register bank can have multiple
+//! top-level register classes that interfere with each other, so all top-level counts need to
+//! be considered when determining how many more registers can be allocated.
+//!
+//! Currently, the only register bank with multiple top-level registers is the `arm32`
+//! floating-point register bank which has `S`, `D`, and `Q` top-level classes.
+//!
+//! # Base and transient counts
+//!
+//! We maintain two separate register counts per top-level register class: base counts and
+//! transient counts. The base counts are adjusted with the `take` and `free` functions. The
+//! transient counts are adjusted with `take_transient` and `free_transient`.
+
+// Remove once we're using the pressure tracker.
+#![allow(dead_code)]
+
+use isa::registers::{RegClass, RegClassMask, RegInfo, MAX_TRACKED_TOPRCS};
+use regalloc::RegisterSet;
+use std::cmp::min;
+use std::fmt;
+use std::iter::ExactSizeIterator;
+
+/// Information per top-level register class.
+///
+/// Everything but the counts is static information computed from the constructor arguments.
+#[derive(Default)]
+struct TopRC {
+    // Number of registers currently used from this register class.
+    base_count: u32,
+    transient_count: u32,
+
+    // Max number of registers that can be allocated.
+    limit: u32,
+
+    // Register units per register.
+    width: u8,
+
+    // The first aliasing top-level RC.
+    first_toprc: u8,
+
+    // The number of aliasing top-level RCs.
+    num_toprcs: u8,
+}
+
+impl TopRC {
+    fn total_count(&self) -> u32 {
+        self.base_count + self.transient_count
+    }
+}
+
+pub struct Pressure {
+    // Bit mask of top-level register classes that are aliased by other top-level register classes.
+    // Unaliased register classes can use a simpler interference algorithm.
+    aliased: RegClassMask,
+
+    // Current register counts per top-level register class.
+    toprc: [TopRC; MAX_TRACKED_TOPRCS],
+}
+
+impl Pressure {
+    /// Create a new register pressure tracker.
+    pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Pressure {
+        let mut p = Pressure {
+            aliased: 0,
+            toprc: Default::default(),
+        };
+
+        // Get the layout of aliasing top-level register classes from the register banks.
+        for bank in reginfo.banks.iter() {
+            let first = bank.first_toprc;
+            let num = bank.num_toprcs;
+
+            if bank.pressure_tracking {
+                for rc in &mut p.toprc[first..first + num] {
+                    rc.first_toprc = first as u8;
+                    rc.num_toprcs = num as u8;
+                }
+
+                // Flag the top-level register classes with aliases.
+                if num > 1 {
+                    p.aliased |= ((1 << num) - 1) << first;
+                }
+            } else {
+                // This bank has no pressure tracking, so its top-level register classes may exceed
+                // `MAX_TRACKED_TOPRCS`. Fill in dummy entries.
+                for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOPRCS)] {
+                    // These aren't used if we don't set the `aliased` bit.
+                    rc.first_toprc = !0;
+                    rc.limit = !0;
+                }
+            }
+        }
+
+        // Compute per-class limits from `usable`.
+        for (toprc, rc) in p.toprc.iter_mut().take_while(|t| t.num_toprcs > 0).zip(
+            reginfo.classes,
+        )
+        {
+            toprc.limit = usable.iter(rc).len() as u32;
+            toprc.width = rc.width;
+        }
+
+        p
+    }
+
+    /// Check for an available register in the register class `rc`.
+    ///
+    /// If it is possible to allocate one more register from `rc`'s top-level register class,
+    /// returns 0.
+    ///
+    /// If not, returns a bit-mask of top-level register classes that are interfering. Register
+    /// pressure should be eased in one of the returned top-level register classes before calling
+    /// `can_take()` to check again.
+    fn check_avail(&self, rc: RegClass) -> RegClassMask {
+        let entry = match self.toprc.get(rc.toprc as usize) {
+            None => return 0, // Not a pressure tracked bank.
+            Some(e) => e,
+        };
+        let mask = 1 << rc.toprc;
+        if (self.aliased & mask) == 0 {
+            // This is a simple unaliased top-level register class.
+            if entry.total_count() < entry.limit {
+                0
+            } else {
+                mask
+            }
+        } else {
+            // This is the more complicated case. The top-level register class has aliases.
+            self.check_avail_aliased(entry)
+        }
+    }
+
+    /// Check for an available register in a top-level register class that may have aliases.
+    ///
+    /// This is the out-of-line slow path for `check_avail()`.
+    fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask {
+        let first = usize::from(entry.first_toprc);
+        let num = usize::from(entry.num_toprcs);
+        let width = u32::from(entry.width);
+        let ulimit = entry.limit * width;
+
+        // Count up the number of available register units.
+        let mut units = 0;
+        for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) {
+            let rcw = u32::from(rc.width);
+            // If `rc.width` is smaller than `width`, each register in `rc` could potentially block
+            // one of ours. This is assuming that none of the smaller registers are straddling the
+            // bigger ones.
+            //
+            // If `rc.width` is larger than `width`, we are also assuming that the registers are
+            // aligned and `rc.width` is a multiple of `width`.
+            let u = if rcw < width {
+                // We can't take more than the total number of register units in the class.
+                // This matters for arm32 S-registers which can only ever lock out 16 D-registers.
+                min(rc.total_count() * width, rc.limit * rcw)
+            } else {
+                rc.total_count() * rcw
+            };
+
+            // If this top-level RC on its own is responsible for exceeding our limit, return it
+            // early to guarantee that registers here are spilled before spilling other registers
+            // unnecessarily.
+            if u >= ulimit {
+                return 1 << rci;
+            }
+
+            units += u;
+        }
+
+        // We've counted up the worst-case number of register units claimed by all aliasing
+        // classes. Compare to the unit limit in this class.
+        if units < ulimit {
+            0
+        } else {
+            // Registers need to be spilled from any one of the aliasing classes.
+            ((1 << num) - 1) << first
+        }
+    }
+
+    /// Take a register from `rc`.
+    ///
+    /// This does not check if there are enough registers available.
+    pub fn take(&mut self, rc: RegClass) {
+        self.toprc.get_mut(rc.toprc as usize).map(
+            |t| t.base_count += 1,
+        );
+    }
+
+    /// Free a register in `rc`.
+    pub fn free(&mut self, rc: RegClass) {
+        self.toprc.get_mut(rc.toprc as usize).map(
+            |t| t.base_count -= 1,
+        );
+    }
+
+    /// Reset all counts to 0, both base and transient.
+    pub fn reset(&mut self) {
+        for e in &mut self.toprc {
+            e.base_count = 0;
+            e.transient_count = 0;
+        }
+    }
+
+    /// Try to increment a transient counter.
+    ///
+    /// This will fail if there are not enough registers available.
+    pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> {
+        let mask = self.check_avail(rc);
+        if mask == 0 {
+            self.toprc.get_mut(rc.toprc as usize).map(|t| {
+                t.transient_count += 1
+            });
+            Ok(())
+        } else {
+            Err(mask)
+        }
+    }
+
+    /// Reset all transient counts to 0.
+    pub fn reset_transient(&mut self) {
+        for e in &mut self.toprc {
+            e.transient_count = 0;
+        }
+    }
+
+    /// Preserve the transient counts by transferring them to the base counts.
+    pub fn preserve_transient(&mut self) {
+        for e in &mut self.toprc {
+            e.base_count += e.transient_count;
+            e.transient_count = 0;
+        }
+    }
+}
+
+impl fmt::Display for Pressure {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Pressure[")?;
+        for rc in &self.toprc {
+            if rc.limit > 0 && rc.limit < !0 {
+                write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?;
+            }
+        }
+        write!(f, " ]")
+    }
+}
+
+#[cfg(test)]
+#[cfg(build_arm32)]
+mod tests {
+    use super::Pressure;
+    use isa::{RegClass, TargetIsa};
+    use regalloc::RegisterSet;
+    use std::borrow::Borrow;
+    use std::boxed::Box;
+
+    // Make an arm32 `TargetIsa`, if possible.
+    fn arm32() -> Option<Box<TargetIsa>> {
+        use isa;
+        use settings;
+
+        let shared_builder = settings::builder();
+        let shared_flags = settings::Flags::new(&shared_builder);
+
+        isa::lookup("arm32").ok().map(|b| b.finish(shared_flags))
+    }
+
+    // Get a register class by name.
+    fn rc_by_name(isa: &TargetIsa, name: &str) -> RegClass {
+        isa.register_info()
+            .classes
+            .iter()
+            .find(|rc| rc.name == name)
+            .expect("Can't find named register class.")
+    }
+
+    #[test]
+    fn basic_counting() {
+        let isa = arm32().expect("This test requires arm32 support");
+        let isa = isa.borrow();
+        let gpr = rc_by_name(isa, "GPR");
+        let s = rc_by_name(isa, "S");
+        let reginfo = isa.register_info();
+        let regs = RegisterSet::new();
+
+        let mut pressure = Pressure::new(&reginfo, &regs);
+        let mut count = 0;
+        while pressure.check_avail(gpr) == 0 {
+            pressure.take(gpr);
+            count += 1;
+        }
+        assert_eq!(count, 16);
+        assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
+        assert_eq!(pressure.check_avail(s), 0);
+        pressure.free(gpr);
+        assert_eq!(pressure.check_avail(gpr), 0);
+        pressure.take(gpr);
+        assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
+        assert_eq!(pressure.check_avail(s), 0);
+        pressure.reset();
+        assert_eq!(pressure.check_avail(gpr), 0);
+        assert_eq!(pressure.check_avail(s), 0);
+    }
+
+    #[test]
+    fn arm_float_bank() {
+        let isa = arm32().expect("This test requires arm32 support");
+        let isa = isa.borrow();
+        let s = rc_by_name(isa, "S");
+        let d = rc_by_name(isa, "D");
+        let q = rc_by_name(isa, "Q");
+        let reginfo = isa.register_info();
+        let regs = RegisterSet::new();
+
+        let mut pressure = Pressure::new(&reginfo, &regs);
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        // Allocating a single S-register should not affect availability.
+        pressure.take(s);
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        pressure.take(d);
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        pressure.take(q);
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        // Take a total of 16 S-regs.
+        for _ in 1..16 {
+            pressure.take(s);
+        }
+        assert_eq!(pressure.check_avail(s), 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert_eq!(pressure.check_avail(q), 0);
+
+        // We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs.
+        for _ in 0..6 {
+            assert_eq!(pressure.check_avail(d), 0);
+            assert_eq!(pressure.check_avail(q), 0);
+            pressure.take(q);
+        }
+
+        // We've taken 16 S, 1 D, and 7 Qs.
+        assert!(pressure.check_avail(s) != 0);
+        assert_eq!(pressure.check_avail(d), 0);
+        assert!(pressure.check_avail(q) != 0);
+    }
+}
--- a/lib/codegen/src/regalloc/register_set.rs
+++ b/lib/codegen/src/regalloc/register_set.rs
@@ -0,0 +1,321 @@
+//! Set of allocatable registers as a bit vector of register units.
+//!
+//! While allocating registers, we need to keep track of which registers are available and which
+//! registers are in use. Since registers can alias in different ways, we track this via the
+//! "register unit" abstraction. Every register contains one or more register units. Registers that
+//! share a register unit can't be in use at the same time.
+
+use isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask};
+use std::char;
+use std::fmt;
+use std::iter::ExactSizeIterator;
+use std::mem::size_of_val;
+
+/// Set of registers available for allocation.
+#[derive(Clone)]
+pub struct RegisterSet {
+    avail: RegUnitMask,
+}
+
+// Given a register class and a register unit in the class, compute a word index and a bit mask of
+// register units representing that register.
+//
+// Note that a register is not allowed to straddle words.
+fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) {
+    // Bit mask representing the register. It is `rc.width` consecutive units.
+    let width_bits = (1 << rc.width) - 1;
+    // Index into avail[] of the word containing `reg`.
+    let word_index = (reg / 32) as usize;
+    // The actual bits in the word that cover `reg`.
+    let reg_bits = width_bits << (reg % 32);
+
+    (word_index, reg_bits)
+}
+
+impl RegisterSet {
+    /// Create a new register set with all registers available.
+    ///
+    /// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of
+    /// allocatable registers where reserved registers have been filtered out.
+    pub fn new() -> Self {
+        Self { avail: [!0; 3] }
+    }
+
+    /// Create a new register set with no registers available.
+    pub fn empty() -> Self {
+        Self { avail: [0; 3] }
+    }
+
+    /// Returns `true` if the specified register is available.
+    pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool {
+        let (idx, bits) = bitmask(rc, reg);
+        (self.avail[idx] & bits) == bits
+    }
+
+    /// Allocate `reg` from `rc` so it is no longer available.
+    ///
+    /// It is an error to take a register that doesn't have all of its register units available.
+    pub fn take(&mut self, rc: RegClass, reg: RegUnit) {
+        let (idx, bits) = bitmask(rc, reg);
+        debug_assert!(
+            (self.avail[idx] & bits) == bits,
+            "{}:{} not available in {}",
+            rc,
+            rc.info.display_regunit(reg),
+            self.display(rc.info)
+        );
+        self.avail[idx] &= !bits;
+    }
+
+    /// Return `reg` and all of its register units to the set of available registers.
+    pub fn free(&mut self, rc: RegClass, reg: RegUnit) {
+        let (idx, bits) = bitmask(rc, reg);
+        debug_assert!(
+            (self.avail[idx] & bits) == 0,
+            "{}:{} not allocated in {}",
+            rc,
+            rc.info.display_regunit(reg),
+            self.display(rc.info)
+        );
+        self.avail[idx] |= bits;
+    }
+
+    /// Return an iterator over all available registers belonging to the register class `rc`.
+    ///
+    /// This doesn't allocate anything from the set; use `take()` for that.
+    pub fn iter(&self, rc: RegClass) -> RegSetIter {
+        // Start by copying the RC mask. It is a single set bit for each register in the class.
+        let mut rsi = RegSetIter { regs: rc.mask };
+
+        // Mask out the unavailable units.
+        for idx in 0..self.avail.len() {
+            // If a single unit in a register is unavailable, the whole register can't be used.
+            // If a register straddles a word boundary, it will be marked as unavailable.
+            // There's an assertion in `cdsl/registers.py` to check for that.
+            for i in 0..rc.width {
+                rsi.regs[idx] &= self.avail[idx] >> i;
+            }
+        }
+        rsi
+    }
+
+    /// Check if any register units allocated out of this set interferes with units allocated out
+    /// of `other`.
+    ///
+    /// This assumes that unused bits are 1.
+    pub fn interferes_with(&self, other: &RegisterSet) -> bool {
+        self.avail.iter().zip(&other.avail).any(
+            |(&x, &y)| (x | y) != !0,
+        )
+    }
+
+    /// Intersect this set of registers with `other`. This has the effect of removing any register
+    /// units from this set that are not in `other`.
+    pub fn intersect(&mut self, other: &RegisterSet) {
+        for (x, &y) in self.avail.iter_mut().zip(&other.avail) {
+            *x &= y;
+        }
+    }
+
+    /// Return an object that can display this register set, using the register info from the
+    /// target ISA.
+    pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayRegisterSet<'a> {
+        DisplayRegisterSet(self.clone(), regs.into())
+    }
+}
+
+/// Iterator over available registers in a register class.
+pub struct RegSetIter {
+    regs: RegUnitMask,
+}
+
+impl Iterator for RegSetIter {
+    type Item = RegUnit;
+
+    fn next(&mut self) -> Option<RegUnit> {
+        let mut unit_offset = 0;
+
+        // Find the first set bit in `self.regs`.
+        for word in &mut self.regs {
+            if *word != 0 {
+                // Compute the register unit number from the lowest set bit in the word.
+                let unit = unit_offset + word.trailing_zeros() as RegUnit;
+
+                // Clear that lowest bit so we won't find it again.
+                *word &= *word - 1;
+
+                return Some(unit);
+            }
+            // How many register units was there in the word? This is a constant 32 for `u32` etc.
+            unit_offset += 8 * size_of_val(word) as RegUnit;
+        }
+
+        // All of `self.regs` is 0.
+        None
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum();
+        (bits, Some(bits))
+    }
+}
+
+impl ExactSizeIterator for RegSetIter {}
+
+/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
+pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayRegisterSet<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "[")?;
+        match self.1 {
+            None => {
+                for w in &self.0.avail {
+                    write!(f, " #{:08x}", w)?;
+                }
+            }
+            Some(reginfo) => {
+                let toprcs = reginfo
+                    .banks
+                    .iter()
+                    .map(|b| b.first_toprc + b.num_toprcs)
+                    .max()
+                    .expect("No register banks");
+                for rc in &reginfo.classes[0..toprcs] {
+                    if rc.width == 1 {
+                        let bank = &reginfo.banks[rc.bank as usize];
+                        write!(f, " {}: ", rc)?;
+                        for offset in 0..bank.units {
+                            let reg = bank.first_unit + offset;
+                            if !rc.contains(reg) {
+                                continue;
+                            }
+                            if !self.0.is_avail(rc, reg) {
+                                write!(f, "-")?;
+                                continue;
+                            }
+                            // Display individual registers as either the second letter of their
+                            // name or the last digit of their number.
+                            // This works for x86 (rax, rbx, ...) and for numbered regs.
+                            write!(
+                                f,
+                                "{}",
+                                bank.names
+                                    .get(offset as usize)
+                                    .and_then(|name| name.chars().nth(1))
+                                    .unwrap_or_else(
+                                        || char::from_digit(u32::from(offset % 10), 10).unwrap(),
+                                    )
+                            )?;
+                        }
+                    }
+                }
+            }
+        }
+        write!(f, " ]")
+    }
+}
+
+impl fmt::Display for RegisterSet {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.display(None).fmt(f)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use isa::registers::{RegClass, RegClassData};
+    use std::vec::Vec;
+
+    // Register classes for testing.
+    const GPR: RegClass = &RegClassData {
+        name: "GPR",
+        index: 0,
+        width: 1,
+        bank: 0,
+        toprc: 0,
+        first: 28,
+        subclasses: 0,
+        mask: [0xf0000000, 0x0000000f, 0],
+        info: &INFO,
+    };
+
+    const DPR: RegClass = &RegClassData {
+        name: "DPR",
+        index: 0,
+        width: 2,
+        bank: 0,
+        toprc: 0,
+        first: 28,
+        subclasses: 0,
+        mask: [0x50000000, 0x0000000a, 0],
+        info: &INFO,
+    };
+
+    const INFO: RegInfo = RegInfo {
+        banks: &[],
+        classes: &[],
+    };
+
+    #[test]
+    fn put_and_take() {
+        let mut regs = RegisterSet::new();
+
+        // `GPR` has units 28-36.
+        assert_eq!(regs.iter(GPR).len(), 8);
+        assert_eq!(regs.iter(GPR).count(), 8);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [28, 30, 33, 35]);
+
+        assert!(regs.is_avail(GPR, 29));
+        regs.take(&GPR, 29);
+        assert!(!regs.is_avail(GPR, 29));
+
+        assert_eq!(regs.iter(GPR).count(), 7);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
+
+        assert!(regs.is_avail(GPR, 30));
+        regs.take(&GPR, 30);
+        assert!(!regs.is_avail(GPR, 30));
+
+        assert_eq!(regs.iter(GPR).count(), 6);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
+
+        assert!(regs.is_avail(GPR, 32));
+        regs.take(&GPR, 32);
+        assert!(!regs.is_avail(GPR, 32));
+
+        assert_eq!(regs.iter(GPR).count(), 5);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
+
+        regs.free(&GPR, 30);
+        assert!(regs.is_avail(GPR, 30));
+        assert!(!regs.is_avail(GPR, 29));
+        assert!(!regs.is_avail(GPR, 32));
+
+        assert_eq!(regs.iter(GPR).count(), 6);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
+
+        regs.free(&GPR, 32);
+        assert!(regs.is_avail(GPR, 31));
+        assert!(!regs.is_avail(GPR, 29));
+        assert!(regs.is_avail(GPR, 32));
+
+        assert_eq!(regs.iter(GPR).count(), 7);
+        assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
+    }
+
+    #[test]
+    fn interference() {
+        let mut regs1 = RegisterSet::new();
+        let mut regs2 = RegisterSet::new();
+
+        assert!(!regs1.interferes_with(&regs2));
+        regs1.take(&GPR, 32);
+        assert!(!regs1.interferes_with(&regs2));
+        regs2.take(&GPR, 31);
+        assert!(!regs1.interferes_with(&regs2));
+        regs1.intersect(&regs2);
+        assert!(regs1.interferes_with(&regs2));
+    }
+}
--- a/Show More
+++ b/Show More