Merge remote-tracking branch 'origin/master' into no_std

2018-03-30 15:14:30 -07:00
parent fc7b0a7e51 b523b69c16
commit 07693048f0
260 changed files with 4509 additions and 5074 deletions
--- a/lib/cretonne/Cargo.toml
+++ b/lib/cretonne/Cargo.toml
@@ -1,13 +1,13 @@
 [package]
 authors = ["The Cretonne Project Developers"]
 name = "cretonne"
-version = "0.3.4"
+version = "0.4.1"
 description = "Low-level code generator library"
 license = "Apache-2.0"
 documentation = "https://cretonne.readthedocs.io/"
 repository = "https://github.com/Cretonne/cretonne"
 readme = "README.md"
-keywords = [ "compile", "compiler", "jit" ]
+keywords = ["compile", "compiler", "jit"]
 build = "build.rs"

 [lib]
@@ -32,3 +32,7 @@ optional = true
 default = ["std"]
 std = []
 core = ["hashmap_core"]
+
+[badges]
+maintenance = { status = "experimental" }
+travis-ci = { repository = "Cretonne/cretonne" }
--- a/lib/cretonne/README.md
+++ b/lib/cretonne/README.md
@@ -1,2 +1,2 @@
 This crate contains the core Cretonne code generator. It translates code from an
-intermediate language into executable machine code.
+intermediate representation into executable machine code.
--- a/lib/cretonne/build.rs
+++ b/lib/cretonne/build.rs
@@ -18,7 +18,6 @@
 // The build script expects to be run from the directory where this build.rs file lives. The
 // current directory is used to find the sources.

-
 use std::env;
 use std::process;

--- a/lib/cretonne/meta/base/formats.py
+++ b/lib/cretonne/meta/base/formats.py
@@ -2,7 +2,7 @@
 The cretonne.formats defines all instruction formats.

 Every instruction format has a corresponding `InstructionData` variant in the
-Rust representation of cretonne IL, so all instruction formats must be defined
+Rust representation of Cretonne IR, so all instruction formats must be defined
 in this module.
 """
 from __future__ import absolute_import
--- a/lib/cretonne/meta/base/instructions.py
+++ b/lib/cretonne/meta/base/instructions.py
@@ -588,6 +588,9 @@ stack_check = Instruction(
    Read the stack limit from ``GV`` and compare it to the stack pointer. If
    the stack pointer has reached or exceeded the limit, generate a trap with a
    ``stk_ovf`` code.
+
+    The global variable must be accessible and naturally aligned for a
+    pointer-sized value.
    """,
    ins=GV, can_trap=True)

--- a/lib/cretonne/meta/base/legalize.py
+++ b/lib/cretonne/meta/base/legalize.py
@@ -41,6 +41,8 @@ widen = XFormGroup('widen', """

        The transformations in the 'widen' group work by expressing
        instructions in terms of larger types.
+
+        This group is not yet implemented.
        """)

 expand = XFormGroup('expand', """
--- a/lib/cretonne/meta/base/settings.py
+++ b/lib/cretonne/meta/base/settings.py
@@ -20,10 +20,10 @@ opt_level = EnumSetting(

 enable_verifier = BoolSetting(
        """
-        Run the Cretonne IL verifier at strategic times during compilation.
+        Run the Cretonne IR verifier at strategic times during compilation.

        This makes compilation slower but catches many bugs. The verifier is
-        disabled by default, except when reading Cretonne IL from a text file.
+        disabled by default, except when reading Cretonne IR from a text file.
        """,
        default=True)

--- a/lib/cretonne/meta/build.py
+++ b/lib/cretonne/meta/build.py
@@ -14,19 +14,27 @@ import gen_legalizer
 import gen_registers
 import gen_binemit

-parser = argparse.ArgumentParser(description='Generate sources for Cretonne.')
-parser.add_argument('--out-dir', help='set output directory')

-args = parser.parse_args()
-out_dir = args.out_dir
+def main():
+    # type: () -> None
+    parser = argparse.ArgumentParser(
+            description='Generate sources for Cretonne.')
+    parser.add_argument('--out-dir', help='set output directory')

-isas = isa.all_isas()
+    args = parser.parse_args()
+    out_dir = args.out_dir

-gen_types.generate(out_dir)
-gen_instr.generate(isas, out_dir)
-gen_settings.generate(isas, out_dir)
-gen_encoding.generate(isas, out_dir)
-gen_legalizer.generate(isas, out_dir)
-gen_registers.generate(isas, out_dir)
-gen_binemit.generate(isas, out_dir)
-gen_build_deps.generate()
+    isas = isa.all_isas()
+
+    gen_types.generate(out_dir)
+    gen_instr.generate(isas, out_dir)
+    gen_settings.generate(isas, out_dir)
+    gen_encoding.generate(isas, out_dir)
+    gen_legalizer.generate(isas, out_dir)
+    gen_registers.generate(isas, out_dir)
+    gen_binemit.generate(isas, out_dir)
+    gen_build_deps.generate()
+
+
+if __name__ == "__main__":
+    main()
--- a/lib/cretonne/meta/cdsl/ast.py
+++ b/lib/cretonne/meta/cdsl/ast.py
@@ -559,7 +559,7 @@ class Enumerator(Literal):
    is an AST leaf node representing one of the values.

    :param kind: The enumerated `ImmediateKind` containing the value.
-    :param value: The textual IL representation of the value.
+    :param value: The textual IR representation of the value.

    `Enumerator` nodes are not usually created directly. They are created by
    using the dot syntax on immediate kinds: `intcc.ult`.
--- a/lib/cretonne/meta/cdsl/isa.py
+++ b/lib/cretonne/meta/cdsl/isa.py
@@ -12,7 +12,7 @@ from .instructions import InstructionGroup
 try:
    from typing import Tuple, Union, Any, Iterable, Sequence, List, Set, Dict, TYPE_CHECKING  # noqa
    if TYPE_CHECKING:
-        from .instructions import MaybeBoundInst, InstructionGroup, InstructionFormat  # noqa
+        from .instructions import MaybeBoundInst, InstructionFormat  # noqa
        from .predicates import PredNode, PredKey  # noqa
        from .settings import SettingGroup  # noqa
        from .registers import RegBank  # noqa
@@ -172,8 +172,7 @@ class TargetISA(object):
        """
        for cpumode in self.cpumodes:
            self.legalize_code(cpumode.default_legalize)
-            for x in sorted(cpumode.type_legalize.values(),
-                            key=lambda x: x.name):
+            for x in cpumode.type_legalize.values():
                self.legalize_code(x)

    def legalize_code(self, xgrp):
@@ -232,7 +231,7 @@ class CPUMode(object):
        # Tables for configuring legalization actions when no valid encoding
        # exists for an instruction.
        self.default_legalize = None  # type: XFormGroup
-        self.type_legalize = dict()  # type: Dict[ValueType, XFormGroup]
+        self.type_legalize = OrderedDict()  # type: OrderedDict[ValueType, XFormGroup]  # noqa

    def __str__(self):
        # type: () -> str
--- a/lib/cretonne/meta/check.sh
+++ b/lib/cretonne/meta/check.sh
@@ -2,7 +2,7 @@
 set -euo pipefail
 cd $(dirname "$0")

-runif() {
+function runif() {
    if command -v "$1" > /dev/null; then
        echo "   === $1 ==="
        "$@"
--- a/lib/cretonne/meta/gen_binemit.py
+++ b/lib/cretonne/meta/gen_binemit.py
@@ -152,7 +152,7 @@ def gen_isa(isa, fmt):
            fmt.line('let bits = encoding.bits();')
            with fmt.indented('match func.encodings[inst].recipe() {', '}'):
                for i, recipe in enumerate(isa.all_recipes):
-                    fmt.comment(recipe.name)
+                    fmt.comment('Recipe {}'.format(recipe.name))
                    with fmt.indented('{} => {{'.format(i), '}'):
                        gen_recipe(recipe, fmt)
                fmt.line('_ => {},')
--- a/lib/cretonne/meta/gen_encoding.py
+++ b/lib/cretonne/meta/gen_encoding.py
@@ -600,8 +600,8 @@ def make_tables(cpumode):
        table[ty][inst].encodings.append(enc)

    # Ensure there are level 1 table entries for all types with a custom
-    # legalize action. Try to be stable relative to dict ordering.
-    for ty in sorted(cpumode.type_legalize.keys(), key=str):
+    # legalize action.
+    for ty in cpumode.type_legalize.keys():
        table[ty]

    return table
@@ -756,7 +756,7 @@ def emit_recipe_constraints(isa, fmt):
            'static RECIPE_CONSTRAINTS: [RecipeConstraints; {}] = ['
            .format(len(isa.all_recipes)), '];'):
        for r in isa.all_recipes:
-            fmt.comment(r.name)
+            fmt.comment('Constraints for recipe {}:'.format(r.name))
            tied_i2o, tied_o2i = r.ties()
            fixed_ins, fixed_outs = r.fixed_ops()
            with fmt.indented('RecipeConstraints {', '},'):
@@ -830,7 +830,7 @@ def emit_recipe_sizing(isa, fmt):
            'static RECIPE_SIZING: [RecipeSizing; {}] = ['
            .format(len(isa.all_recipes)), '];'):
        for r in isa.all_recipes:
-            fmt.comment(r.name)
+            fmt.comment('Code size information for recipe {}:'.format(r.name))
            with fmt.indented('RecipeSizing {', '},'):
                fmt.format('bytes: {},', r.size)
                if r.branch_range:
--- a/lib/cretonne/meta/gen_instr.py
+++ b/lib/cretonne/meta/gen_instr.py
@@ -49,11 +49,11 @@ def gen_formats(fmt):
        with fmt.indented(
                "fn from(inst: &'a InstructionData) -> InstructionFormat {",
                '}'):
-            with fmt.indented('match *inst {', '}'):
-                for f in InstructionFormat.all_formats:
-                    fmt.line(('InstructionData::{} {{ .. }} => ' +
-                              'InstructionFormat::{},')
-                             .format(f.name, f.name))
+            m = srcgen.Match('*inst')
+            for f in InstructionFormat.all_formats:
+                m.arm('InstructionData::' + f.name, ['..'],
+                      'InstructionFormat::' + f.name)
+            fmt.match(m)
    fmt.line()


@@ -74,33 +74,64 @@ def gen_arguments_method(fmt, is_mut):
            'pool: &\'a {m}ir::ValueListPool) -> '
            '&{m}[Value] {{'
            .format(f=method, m=mut), '}'):
-        with fmt.indented('match *self {', '}'):
-            for f in InstructionFormat.all_formats:
-                n = 'InstructionData::' + f.name
+        m = srcgen.Match('*self')
+        for f in InstructionFormat.all_formats:
+            n = 'InstructionData::' + f.name

-                # Formats with a value list put all of their arguments in the
-                # list. We don't split them up, just return it all as variable
-                # arguments. (I expect the distinction to go away).
-                if f.has_value_list:
-                    arg = ''.format(mut)
-                    fmt.line(
-                        '{} {{ ref {}args, .. }} => args.{}(pool),'
-                        .format(n, mut, as_slice))
-                    continue
+            # Formats with a value list put all of their arguments in the
+            # list. We don't split them up, just return it all as variable
+            # arguments. (I expect the distinction to go away).
+            if f.has_value_list:
+                m.arm(n, ['ref {}args'.format(mut), '..'],
+                      'args.{}(pool)'.format(as_slice))
+                continue

-                # Fixed args.
-                if f.num_value_operands == 0:
-                    arg = '&{}[]'.format(mut)
-                    capture = ''
+            # Fixed args.
+            fields = []
+            if f.num_value_operands == 0:
+                arg = '&{}[]'.format(mut)
+            elif f.num_value_operands == 1:
+                fields.append('ref {}arg'.format(mut))
+                arg = '{}(arg)'.format(rslice)
+            else:
+                args = 'args_arity{}'.format(f.num_value_operands)
+                fields.append('args: ref {}{}'.format(mut, args))
+                arg = args
+            fields.append('..')
+            m.arm(n, fields, arg)
+        fmt.match(m)
+
+
+def gen_instruction_data(fmt):
+    # type: (srcgen.Formatter) -> None
+    """
+    Generate the InstructionData enum.
+
+    Every variant must contain `opcode` and `ty` fields. An instruction that
+    doesn't produce a value should have its `ty` field set to `VOID`. The size
+    of `InstructionData` should be kept at 16 bytes on 64-bit architectures. If
+    more space is needed to represent an instruction, use a `Box<AuxData>` to
+    store the additional information out of line.
+    """
+
+    fmt.line('#[derive(Clone, Debug, Hash, PartialEq, Eq)]')
+    fmt.line('#[allow(missing_docs)]')
+    with fmt.indented('pub enum InstructionData {', '}'):
+        for f in InstructionFormat.all_formats:
+            with fmt.indented('{} {{'.format(f.name), '},'):
+                fmt.line('opcode: Opcode,')
+                if f.typevar_operand is None:
+                    pass
+                elif f.has_value_list:
+                    fmt.line('args: ValueList,')
                elif f.num_value_operands == 1:
-                    capture = 'ref {}arg, '.format(mut)
-                    arg = '{}(arg)'.format(rslice)
+                    fmt.line('arg: Value,')
                else:
-                    capture = 'ref {}args, '.format(mut)
-                    arg = 'args'
-                fmt.line(
-                        '{} {{ {}.. }} => {},'
-                        .format(n, capture, arg))
+                    fmt.line('args: [Value; {}],'.format(f.num_value_operands))
+                for field in f.imm_fields:
+                    fmt.line(
+                            '{}: {},'
+                            .format(field.member, field.kind.rust_type))


 def gen_instruction_data_impl(fmt):
@@ -123,39 +154,37 @@ def gen_instruction_data_impl(fmt):
    with fmt.indented('impl InstructionData {', '}'):
        fmt.doc_comment('Get the opcode of this instruction.')
        with fmt.indented('pub fn opcode(&self) -> Opcode {', '}'):
-            with fmt.indented('match *self {', '}'):
-                for f in InstructionFormat.all_formats:
-                    fmt.line(
-                            'InstructionData::{} {{ opcode, .. }} => opcode,'
-                            .format(f.name))
+            m = srcgen.Match('*self')
+            for f in InstructionFormat.all_formats:
+                m.arm('InstructionData::' + f.name, ['opcode', '..'],
+                      'opcode')
+            fmt.match(m)
        fmt.line()

        fmt.doc_comment('Get the controlling type variable operand.')
        with fmt.indented(
                'pub fn typevar_operand(&self, pool: &ir::ValueListPool) -> '
                'Option<Value> {', '}'):
-            with fmt.indented('match *self {', '}'):
-                for f in InstructionFormat.all_formats:
-                    n = 'InstructionData::' + f.name
-                    if f.typevar_operand is None:
-                        fmt.line(n + ' { .. } => None,')
-                    elif f.has_value_list:
-                        # We keep all arguments in a value list.
-                        i = f.typevar_operand
-                        fmt.line(
-                                '{} {{ ref args, .. }} => '
-                                'args.get({}, pool),'.format(n, i))
-                    elif f.num_value_operands == 1:
-                        # We have a single value operand called 'arg'.
-                        fmt.line(n + ' { arg, .. } => Some(arg),')
-                    else:
-                        # We have multiple value operands and an array `args`.
-                        # Which `args` index to use?
-                        i = f.typevar_operand
-                        fmt.line(
-                                n +
-                                ' {{ ref args, .. }} => Some(args[{}]),'
-                                .format(i))
+            m = srcgen.Match('*self')
+            for f in InstructionFormat.all_formats:
+                n = 'InstructionData::' + f.name
+                if f.typevar_operand is None:
+                    m.arm(n, ['..'], 'None')
+                elif f.has_value_list:
+                    # We keep all arguments in a value list.
+                    i = f.typevar_operand
+                    m.arm(n, ['ref args', '..'],
+                          'args.get({}, pool)'.format(i))
+                elif f.num_value_operands == 1:
+                    # We have a single value operand called 'arg'.
+                    m.arm(n, ['arg', '..'], 'Some(arg)')
+                else:
+                    # We have multiple value operands and an array `args`.
+                    # Which `args` index to use?
+                    args = 'args_arity{}'.format(f.num_value_operands)
+                    m.arm(n, ['args: ref {}'.format(args), '..'],
+                          'Some({}[{}])'.format(args, f.typevar_operand))
+            fmt.match(m)
        fmt.line()

        fmt.doc_comment(
@@ -184,13 +213,13 @@ def gen_instruction_data_impl(fmt):
        with fmt.indented(
                'pub fn take_value_list(&mut self) -> Option<ir::ValueList> {',
                '}'):
-            with fmt.indented('match *self {', '}'):
-                for f in InstructionFormat.all_formats:
-                    n = 'InstructionData::' + f.name
-                    if f.has_value_list:
-                        fmt.line(
-                            n + ' { ref mut args, .. } => Some(args.take()),')
-                fmt.line('_ => None,')
+            m = srcgen.Match('*self')
+            for f in InstructionFormat.all_formats:
+                n = 'InstructionData::' + f.name
+                if f.has_value_list:
+                    m.arm(n, ['ref mut args', '..'], 'Some(args.take())')
+            m.arm('_', [], 'None')
+            fmt.match(m)
        fmt.line()

        fmt.doc_comment(
@@ -275,14 +304,12 @@ def gen_opcodes(groups, fmt):
            fmt.doc_comment(Instruction.ATTRIBS[attr])
            with fmt.indented('pub fn {}(self) -> bool {{'
                              .format(attr), '}'):
-                with fmt.indented('match self {', '}'):
-                    for i in instrs:
-                        if getattr(i, attr):
-                            fmt.format(
-                                    'Opcode::{} => true,',
-                                    i.camel_name, i.name)
-
-                    fmt.line('_ => false,')
+                m = srcgen.Match('self')
+                for i in instrs:
+                    if getattr(i, attr):
+                        m.arm('Opcode::' + i.camel_name, [], 'true')
+                m.arm('_', [], 'false')
+                fmt.match(m)
            fmt.line()
    fmt.line()

@@ -299,9 +326,10 @@ def gen_opcodes(groups, fmt):

    # Generate a private opcode_name function.
    with fmt.indented('fn opcode_name(opc: Opcode) -> &\'static str {', '}'):
-        with fmt.indented('match opc {', '}'):
-            for i in instrs:
-                fmt.format('Opcode::{} => "{}",', i.camel_name, i.name)
+        m = srcgen.Match('opc')
+        for i in instrs:
+            m.arm('Opcode::' + i.camel_name, [], '"{}"'.format(i.name))
+        fmt.match(m)
    fmt.line()

    # Generate an opcode hash table for looking up opcodes by name.
@@ -655,7 +683,7 @@ def gen_builder(insts, fmt):
    fmt.doc_comment("""
            Convenience methods for building instructions.

-            The `InstrBuilder` trait has one method per instruction opcode for
+            The `InstBuilder` trait has one method per instruction opcode for
            conveniently constructing the instruction with minimum arguments.
            Polymorphic instructions infer their result types from the input
            arguments when possible. In some cases, an explicit `ctrl_typevar`
@@ -682,13 +710,15 @@ def generate(isas, out_dir):
    # opcodes.rs
    fmt = srcgen.Formatter()
    gen_formats(fmt)
+    gen_instruction_data(fmt)
+    fmt.line()
    gen_instruction_data_impl(fmt)
    fmt.line()
    instrs = gen_opcodes(groups, fmt)
    gen_type_constraints(fmt, instrs)
    fmt.update_file('opcodes.rs', out_dir)

-    # builder.rs
+    # inst_builder.rs
    fmt = srcgen.Formatter()
    gen_builder(instrs, fmt)
-    fmt.update_file('builder.rs', out_dir)
+    fmt.update_file('inst_builder.rs', out_dir)
--- a/lib/cretonne/meta/gen_legalizer.py
+++ b/lib/cretonne/meta/gen_legalizer.py
@@ -103,18 +103,19 @@ def emit_runtime_typecheck(check, fmt, type_sets):

        base_exp = build_derived_expr(tv.base)
        if (tv.derived_func == TypeVar.LANEOF):
-            return "{}.map(|t: Type| -> t.lane_type())".format(base_exp)
+            return "{}.map(|t: ir::Type| t.lane_type())".format(base_exp)
        elif (tv.derived_func == TypeVar.ASBOOL):
-            return "{}.map(|t: Type| -> t.as_bool())".format(base_exp)
+            return "{}.map(|t: ir::Type| t.as_bool())".format(base_exp)
        elif (tv.derived_func == TypeVar.HALFWIDTH):
-            return "{}.and_then(|t: Type| -> t.half_width())".format(base_exp)
+            return "{}.and_then(|t: ir::Type| t.half_width())".format(base_exp)
        elif (tv.derived_func == TypeVar.DOUBLEWIDTH):
-            return "{}.and_then(|t: Type| -> t.double_width())"\
+            return "{}.and_then(|t: ir::Type| t.double_width())"\
                .format(base_exp)
        elif (tv.derived_func == TypeVar.HALFVECTOR):
-            return "{}.and_then(|t: Type| -> t.half_vector())".format(base_exp)
+            return "{}.and_then(|t: ir::Type| t.half_vector())"\
+                .format(base_exp)
        elif (tv.derived_func == TypeVar.DOUBLEVECTOR):
-            return "{}.and_then(|t: Type| -> t.by(2))".format(base_exp)
+            return "{}.and_then(|t: ir::Type| t.by(2))".format(base_exp)
        else:
            assert False, "Unknown derived function {}".format(tv.derived_func)

--- a/lib/cretonne/meta/gen_settings.py
+++ b/lib/cretonne/meta/gen_settings.py
@@ -28,7 +28,7 @@ def gen_enum_types(sgrp, fmt):
        if not isinstance(setting, EnumSetting):
            continue
        ty = camel_case(setting.name)
-        fmt.doc_comment('Values for {}.'.format(setting))
+        fmt.doc_comment('Values for `{}`.'.format(setting))
        fmt.line('#[derive(Debug, PartialEq, Eq)]')
        with fmt.indented('pub enum {} {{'.format(ty), '}'):
            for v in setting.values:
@@ -57,12 +57,11 @@ def gen_getter(setting, sgrp, fmt):
        ty = camel_case(setting.name)
        proto = 'pub fn {}(&self) -> {}'.format(setting.name, ty)
        with fmt.indented(proto + ' {', '}'):
-            with fmt.indented(
-                    'match self.bytes[{}] {{'
-                    .format(setting.byte_offset), '}'):
-                for i, v in enumerate(setting.values):
-                    fmt.line('{} => {}::{},'.format(i, ty, camel_case(v)))
-                fmt.line('_ => panic!("Invalid enum value"),')
+            m = srcgen.Match('self.bytes[{}]'.format(setting.byte_offset))
+            for i, v in enumerate(setting.values):
+                m.arm(str(i), [], '{}::{}'.format(ty, camel_case(v)))
+            m.arm('_', [], 'panic!("Invalid enum value")')
+            fmt.match(m)
    else:
        raise AssertionError("Unknown setting kind")

--- a/lib/cretonne/meta/isa/intel/defs.py
+++ b/lib/cretonne/meta/isa/intel/defs.py
@@ -12,8 +12,8 @@ from base.immediates import floatcc
 ISA = TargetISA('intel', [base.instructions.GROUP, x86.GROUP])

 # CPU modes for 32-bit and 64-bit operation.
-I64 = CPUMode('I64', ISA)
-I32 = CPUMode('I32', ISA)
+X86_64 = CPUMode('I64', ISA)
+X86_32 = CPUMode('I32', ISA)

 # The set of floating point condition codes that are directly supported.
 # Other condition codes need to be reversed or expressed as two tests.
--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -5,7 +5,7 @@ from __future__ import absolute_import
 from cdsl.predicates import IsUnsignedInt, Not, And
 from base import instructions as base
 from base.formats import UnaryImm
-from .defs import I32, I64
+from .defs import X86_64, X86_32
 from . import recipes as r
 from . import settings as cfg
 from . import instructions as x86
@@ -22,83 +22,83 @@ except ImportError:
    pass


-I32.legalize_monomorphic(expand_flags)
-I32.legalize_type(
-        default=narrow,
-        b1=expand_flags,
-        i32=intel_expand,
-        f32=intel_expand,
-        f64=intel_expand)
+X86_32.legalize_monomorphic(expand_flags)
+X86_32.legalize_type(
+    default=narrow,
+    b1=expand_flags,
+    i32=intel_expand,
+    f32=intel_expand,
+    f64=intel_expand)

-I64.legalize_monomorphic(expand_flags)
-I64.legalize_type(
-        default=narrow,
-        b1=expand_flags,
-        i32=intel_expand,
-        i64=intel_expand,
-        f32=intel_expand,
-        f64=intel_expand)
+X86_64.legalize_monomorphic(expand_flags)
+X86_64.legalize_type(
+    default=narrow,
+    b1=expand_flags,
+    i32=intel_expand,
+    i64=intel_expand,
+    f32=intel_expand,
+    f64=intel_expand)


 #
 # Helper functions for generating encodings.
 #

-def enc_i64(inst, recipe, *args, **kwargs):
+def enc_x86_64(inst, recipe, *args, **kwargs):
    # type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
    """
-    Add encodings for `inst` to I64 with and without a REX prefix.
+    Add encodings for `inst` to X86_64 with and without a REX prefix.
    """
-    I64.enc(inst, *recipe.rex(*args, **kwargs))
-    I64.enc(inst, *recipe(*args, **kwargs))
+    X86_64.enc(inst, *recipe.rex(*args, **kwargs))
+    X86_64.enc(inst, *recipe(*args, **kwargs))


 def enc_both(inst, recipe, *args, **kwargs):
    # type: (MaybeBoundInst, r.TailRecipe, *int, **Any) -> None
    """
-    Add encodings for `inst` to both I32 and I64.
+    Add encodings for `inst` to both X86_32 and X86_64.
    """
-    I32.enc(inst, *recipe(*args, **kwargs))
-    enc_i64(inst, recipe, *args, **kwargs)
+    X86_32.enc(inst, *recipe(*args, **kwargs))
+    enc_x86_64(inst, recipe, *args, **kwargs)


 def enc_i32_i64(inst, recipe, *args, **kwargs):
    # type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
    """
-    Add encodings for `inst.i32` to I32.
-    Add encodings for `inst.i32` to I64 with and without REX.
-    Add encodings for `inst.i64` to I64 with a REX.W prefix.
+    Add encodings for `inst.i32` to X86_32.
+    Add encodings for `inst.i32` to X86_64 with and without REX.
+    Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
    """
-    I32.enc(inst.i32, *recipe(*args, **kwargs))
+    X86_32.enc(inst.i32, *recipe(*args, **kwargs))

    # REX-less encoding must come after REX encoding so we don't use it by
    # default. Otherwise reg-alloc would never use r8 and up.
-    I64.enc(inst.i32, *recipe.rex(*args, **kwargs))
-    I64.enc(inst.i32, *recipe(*args, **kwargs))
+    X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs))
+    X86_64.enc(inst.i32, *recipe(*args, **kwargs))

-    I64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))
+    X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))


 def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
    # type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None
    """
-    Add encodings for `inst.i32` to I32.
-    Add encodings for `inst.i32` to I64 with and without REX.
-    Add encodings for `inst.i64` to I64 with a REX prefix, using the `w_bit`
+    Add encodings for `inst.i32` to X86_32.
+    Add encodings for `inst.i32` to X86_64 with and without REX.
+    Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
    argument to determine whether or not to set the REX.W bit.
    """
-    I32.enc(inst.i32.any, *recipe(*args, **kwargs))
+    X86_32.enc(inst.i32.any, *recipe(*args, **kwargs))

    # REX-less encoding must come after REX encoding so we don't use it by
    # default. Otherwise reg-alloc would never use r8 and up.
-    I64.enc(inst.i32.any, *recipe.rex(*args, **kwargs))
-    I64.enc(inst.i32.any, *recipe(*args, **kwargs))
+    X86_64.enc(inst.i32.any, *recipe.rex(*args, **kwargs))
+    X86_64.enc(inst.i32.any, *recipe(*args, **kwargs))

    if w_bit:
-        I64.enc(inst.i64.any, *recipe.rex(*args, w=1, **kwargs))
+        X86_64.enc(inst.i64.any, *recipe.rex(*args, w=1, **kwargs))
    else:
-        I64.enc(inst.i64.any, *recipe.rex(*args, **kwargs))
-        I64.enc(inst.i64.any, *recipe(*args, **kwargs))
+        X86_64.enc(inst.i64.any, *recipe.rex(*args, **kwargs))
+        X86_64.enc(inst.i64.any, *recipe(*args, **kwargs))


 for inst,           opc in [
@@ -141,19 +141,22 @@ for inst,               rrr in [
 # band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks.

 # Immediate constants.
-I32.enc(base.iconst.i32, *r.puid(0xb8))
+X86_32.enc(base.iconst.i32, *r.puid(0xb8))

-I64.enc(base.iconst.i32, *r.puid.rex(0xb8))
-I64.enc(base.iconst.i32, *r.puid(0xb8))
+X86_64.enc(base.iconst.i32, *r.puid.rex(0xb8))
+X86_64.enc(base.iconst.i32, *r.puid(0xb8))
 # The 32-bit immediate movl also zero-extends to 64 bits.
-I64.enc(base.iconst.i64, *r.puid.rex(0xb8),
-        instp=IsUnsignedInt(UnaryImm.imm, 32))
-I64.enc(base.iconst.i64, *r.puid(0xb8),
-        instp=IsUnsignedInt(UnaryImm.imm, 32))
+X86_64.enc(base.iconst.i64, *r.puid.rex(0xb8),
+           instp=IsUnsignedInt(UnaryImm.imm, 32))
+X86_64.enc(base.iconst.i64, *r.puid(0xb8),
+           instp=IsUnsignedInt(UnaryImm.imm, 32))
 # Sign-extended 32-bit immediate.
-I64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1))
+X86_64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1))
 # Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
-I64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1))
+X86_64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1))
+
+# bool constants.
+enc_both(base.bconst.b1, r.puid_bool, 0xb8)

 # Shifts and rotates.
 # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
@@ -164,38 +167,46 @@ for inst,           rrr in [
        (base.ishl, 4),
        (base.ushr, 5),
        (base.sshr, 7)]:
-    I32.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
-    I64.enc(inst.i64.any, *r.rc.rex(0xd3, rrr=rrr, w=1))
-    I64.enc(inst.i32.any, *r.rc.rex(0xd3, rrr=rrr))
-    I64.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
+    # Cannot use enc_i32_i64 for this pattern because instructions require
+    # .any suffix.
+    X86_32.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
+    X86_64.enc(inst.i64.any, *r.rc.rex(0xd3, rrr=rrr, w=1))
+    X86_64.enc(inst.i32.any, *r.rc.rex(0xd3, rrr=rrr))
+    X86_64.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
+
+for inst,           rrr in [
+        (base.ishl_imm, 4),
+        (base.ushr_imm, 5),
+        (base.sshr_imm, 7)]:
+    enc_i32_i64(inst, r.rib, 0xc1, rrr=rrr)

 # Population count.
-I32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
-I64.enc(base.popcnt.i64, *r.urm.rex(0xf3, 0x0f, 0xb8, w=1),
-        isap=cfg.use_popcnt)
-I64.enc(base.popcnt.i32, *r.urm.rex(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
-I64.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
+X86_32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
+X86_64.enc(base.popcnt.i64, *r.urm.rex(0xf3, 0x0f, 0xb8, w=1),
+           isap=cfg.use_popcnt)
+X86_64.enc(base.popcnt.i32, *r.urm.rex(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
+X86_64.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)

 # Count leading zero bits.
-I32.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
-I64.enc(base.clz.i64, *r.urm.rex(0xf3, 0x0f, 0xbd, w=1),
-        isap=cfg.use_lzcnt)
-I64.enc(base.clz.i32, *r.urm.rex(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
-I64.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
+X86_32.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
+X86_64.enc(base.clz.i64, *r.urm.rex(0xf3, 0x0f, 0xbd, w=1),
+           isap=cfg.use_lzcnt)
+X86_64.enc(base.clz.i32, *r.urm.rex(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
+X86_64.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)

 # Count trailing zero bits.
-I32.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
-I64.enc(base.ctz.i64, *r.urm.rex(0xf3, 0x0f, 0xbc, w=1),
-        isap=cfg.use_bmi1)
-I64.enc(base.ctz.i32, *r.urm.rex(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
-I64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
+X86_32.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
+X86_64.enc(base.ctz.i64, *r.urm.rex(0xf3, 0x0f, 0xbc, w=1),
+           isap=cfg.use_bmi1)
+X86_64.enc(base.ctz.i32, *r.urm.rex(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
+X86_64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)

 #
 # Loads and stores.
 #
 for recipe in [r.st, r.stDisp8, r.stDisp32]:
    enc_i32_i64_ld_st(base.store, True, recipe, 0x89)
-    enc_i64(base.istore32.i64.any, recipe, 0x89)
+    enc_x86_64(base.istore32.i64.any, recipe, 0x89)
    enc_i32_i64_ld_st(base.istore16, False, recipe, 0x66, 0x89)

 # Byte stores are more complicated because the registers they can address
@@ -203,121 +214,121 @@ for recipe in [r.st, r.stDisp8, r.stDisp32]:
 # the corresponding st* recipes when a REX prefix is applied.
 for recipe in [r.st_abcd, r.stDisp8_abcd, r.stDisp32_abcd]:
    enc_both(base.istore8.i32.any, recipe, 0x88)
-    enc_i64(base.istore8.i64.any, recipe, 0x88)
+    enc_x86_64(base.istore8.i64.any, recipe, 0x88)

-enc_i32_i64(base.spill, r.spSib32, 0x89)
-enc_i32_i64(base.regspill, r.rsp32, 0x89)
+enc_i32_i64(base.spill, r.spillSib32, 0x89)
+enc_i32_i64(base.regspill, r.regspill32, 0x89)

 # Use a 32-bit write for spilling `b1` to avoid constraining the permitted
 # registers.
 # See MIN_SPILL_SLOT_SIZE which makes this safe.
-enc_both(base.spill.b1, r.spSib32, 0x89)
-enc_both(base.regspill.b1, r.rsp32, 0x89)
+enc_both(base.spill.b1, r.spillSib32, 0x89)
+enc_both(base.regspill.b1, r.regspill32, 0x89)

 for recipe in [r.ld, r.ldDisp8, r.ldDisp32]:
    enc_i32_i64_ld_st(base.load, True, recipe, 0x8b)
-    enc_i64(base.uload32.i64, recipe, 0x8b)
-    I64.enc(base.sload32.i64, *recipe.rex(0x63, w=1))
+    enc_x86_64(base.uload32.i64, recipe, 0x8b)
+    X86_64.enc(base.sload32.i64, *recipe.rex(0x63, w=1))
    enc_i32_i64_ld_st(base.uload16, True, recipe, 0x0f, 0xb7)
    enc_i32_i64_ld_st(base.sload16, True, recipe, 0x0f, 0xbf)
    enc_i32_i64_ld_st(base.uload8, True, recipe, 0x0f, 0xb6)
    enc_i32_i64_ld_st(base.sload8, True, recipe, 0x0f, 0xbe)

-enc_i32_i64(base.fill, r.fiSib32, 0x8b)
-enc_i32_i64(base.regfill, r.rfi32, 0x8b)
+enc_i32_i64(base.fill, r.fillSib32, 0x8b)
+enc_i32_i64(base.regfill, r.regfill32, 0x8b)

 # Load 32 bits from `b1` spill slots. See `spill.b1` above.
-enc_both(base.fill.b1, r.fiSib32, 0x8b)
-enc_both(base.regfill.b1, r.rfi32, 0x8b)
+enc_both(base.fill.b1, r.fillSib32, 0x8b)
+enc_both(base.regfill.b1, r.regfill32, 0x8b)

 # Push and Pop
-I32.enc(x86.push.i32, *r.pushq(0x50))
-enc_i64(x86.push.i64, r.pushq, 0x50)
+X86_32.enc(x86.push.i32, *r.pushq(0x50))
+enc_x86_64(x86.push.i64, r.pushq, 0x50)

-I32.enc(x86.pop.i32, *r.popq(0x58))
-enc_i64(x86.pop.i64, r.popq, 0x58)
+X86_32.enc(x86.pop.i32, *r.popq(0x58))
+enc_x86_64(x86.pop.i64, r.popq, 0x58)

 # Copy Special
-I64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
-I32.enc(base.copy_special, *r.copysp(0x89))
+X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
+X86_32.enc(base.copy_special, *r.copysp(0x89))

 # Adjust SP Imm
-I32.enc(base.adjust_sp_imm, *r.adjustsp8(0x83))
-I32.enc(base.adjust_sp_imm, *r.adjustsp32(0x81))
-I64.enc(base.adjust_sp_imm, *r.adjustsp8.rex(0x83, w=1))
-I64.enc(base.adjust_sp_imm, *r.adjustsp32.rex(0x81, w=1))
+X86_32.enc(base.adjust_sp_imm, *r.adjustsp8(0x83))
+X86_32.enc(base.adjust_sp_imm, *r.adjustsp32(0x81))
+X86_64.enc(base.adjust_sp_imm, *r.adjustsp8.rex(0x83, w=1))
+X86_64.enc(base.adjust_sp_imm, *r.adjustsp32.rex(0x81, w=1))

 #
 # Float loads and stores.
 #

-enc_both(base.load.f32.any, r.fld, 0x66, 0x0f, 0x6e)
-enc_both(base.load.f32.any, r.fldDisp8, 0x66, 0x0f, 0x6e)
-enc_both(base.load.f32.any, r.fldDisp32, 0x66, 0x0f, 0x6e)
+enc_both(base.load.f32.any, r.fld, 0xf3, 0x0f, 0x10)
+enc_both(base.load.f32.any, r.fldDisp8, 0xf3, 0x0f, 0x10)
+enc_both(base.load.f32.any, r.fldDisp32, 0xf3, 0x0f, 0x10)

-enc_both(base.load.f64.any, r.fld, 0xf3, 0x0f, 0x7e)
-enc_both(base.load.f64.any, r.fldDisp8, 0xf3, 0x0f, 0x7e)
-enc_both(base.load.f64.any, r.fldDisp32, 0xf3, 0x0f, 0x7e)
+enc_both(base.load.f64.any, r.fld, 0xf2, 0x0f, 0x10)
+enc_both(base.load.f64.any, r.fldDisp8, 0xf2, 0x0f, 0x10)
+enc_both(base.load.f64.any, r.fldDisp32, 0xf2, 0x0f, 0x10)

-enc_both(base.store.f32.any, r.fst, 0x66, 0x0f, 0x7e)
-enc_both(base.store.f32.any, r.fstDisp8, 0x66, 0x0f, 0x7e)
-enc_both(base.store.f32.any, r.fstDisp32, 0x66, 0x0f, 0x7e)
+enc_both(base.store.f32.any, r.fst, 0xf3, 0x0f, 0x11)
+enc_both(base.store.f32.any, r.fstDisp8, 0xf3, 0x0f, 0x11)
+enc_both(base.store.f32.any, r.fstDisp32, 0xf3, 0x0f, 0x11)

-enc_both(base.store.f64.any, r.fst, 0x66, 0x0f, 0xd6)
-enc_both(base.store.f64.any, r.fstDisp8, 0x66, 0x0f, 0xd6)
-enc_both(base.store.f64.any, r.fstDisp32, 0x66, 0x0f, 0xd6)
+enc_both(base.store.f64.any, r.fst, 0xf2, 0x0f, 0x11)
+enc_both(base.store.f64.any, r.fstDisp8, 0xf2, 0x0f, 0x11)
+enc_both(base.store.f64.any, r.fstDisp32, 0xf2, 0x0f, 0x11)

-enc_both(base.fill.f32, r.ffiSib32, 0x66, 0x0f, 0x6e)
-enc_both(base.regfill.f32, r.frfi32, 0x66, 0x0f, 0x6e)
-enc_both(base.fill.f64, r.ffiSib32, 0xf3, 0x0f, 0x7e)
-enc_both(base.regfill.f64, r.frfi32, 0xf3, 0x0f, 0x7e)
+enc_both(base.fill.f32, r.ffillSib32, 0xf3, 0x0f, 0x10)
+enc_both(base.regfill.f32, r.fregfill32, 0xf3, 0x0f, 0x10)
+enc_both(base.fill.f64, r.ffillSib32, 0xf2, 0x0f, 0x10)
+enc_both(base.regfill.f64, r.fregfill32, 0xf2, 0x0f, 0x10)

-enc_both(base.spill.f32, r.fspSib32, 0x66, 0x0f, 0x7e)
-enc_both(base.regspill.f32, r.frsp32, 0x66, 0x0f, 0x7e)
-enc_both(base.spill.f64, r.fspSib32, 0x66, 0x0f, 0xd6)
-enc_both(base.regspill.f64, r.frsp32, 0x66, 0x0f, 0xd6)
+enc_both(base.spill.f32, r.fspillSib32, 0xf3, 0x0f, 0x11)
+enc_both(base.regspill.f32, r.fregspill32, 0xf3, 0x0f, 0x11)
+enc_both(base.spill.f64, r.fspillSib32, 0xf2, 0x0f, 0x11)
+enc_both(base.regspill.f64, r.fregspill32, 0xf2, 0x0f, 0x11)

 #
 # Function addresses.
 #

-I32.enc(base.func_addr.i32, *r.fnaddr4(0xb8),
-        isap=Not(allones_funcaddrs))
-I64.enc(base.func_addr.i64, *r.fnaddr8.rex(0xb8, w=1),
-        isap=And(Not(allones_funcaddrs), Not(is_pic)))
+X86_32.enc(base.func_addr.i32, *r.fnaddr4(0xb8),
+           isap=Not(allones_funcaddrs))
+X86_64.enc(base.func_addr.i64, *r.fnaddr8.rex(0xb8, w=1),
+           isap=And(Not(allones_funcaddrs), Not(is_pic)))

-I32.enc(base.func_addr.i32, *r.allones_fnaddr4(0xb8),
-        isap=allones_funcaddrs)
-I64.enc(base.func_addr.i64, *r.allones_fnaddr8.rex(0xb8, w=1),
-        isap=And(allones_funcaddrs, Not(is_pic)))
+X86_32.enc(base.func_addr.i32, *r.allones_fnaddr4(0xb8),
+           isap=allones_funcaddrs)
+X86_64.enc(base.func_addr.i64, *r.allones_fnaddr8.rex(0xb8, w=1),
+           isap=And(allones_funcaddrs, Not(is_pic)))

-I64.enc(base.func_addr.i64, *r.got_fnaddr8.rex(0x8b, w=1),
-        isap=is_pic)
+X86_64.enc(base.func_addr.i64, *r.got_fnaddr8.rex(0x8b, w=1),
+           isap=is_pic)

 #
 # Global addresses.
 #

-I32.enc(base.globalsym_addr.i32, *r.gvaddr4(0xb8))
-I64.enc(base.globalsym_addr.i64, *r.gvaddr8.rex(0xb8, w=1),
-        isap=Not(is_pic))
+X86_32.enc(base.globalsym_addr.i32, *r.gvaddr4(0xb8))
+X86_64.enc(base.globalsym_addr.i64, *r.gvaddr8.rex(0xb8, w=1),
+           isap=Not(is_pic))

-I64.enc(base.globalsym_addr.i64, *r.got_gvaddr8.rex(0x8b, w=1),
-        isap=is_pic)
+X86_64.enc(base.globalsym_addr.i64, *r.got_gvaddr8.rex(0x8b, w=1),
+           isap=is_pic)

 #
 # Call/return
 #
-I32.enc(base.call, *r.call_id(0xe8))
-I64.enc(base.call, *r.call_id(0xe8), isap=Not(is_pic))
-I64.enc(base.call, *r.call_plt_id(0xe8), isap=is_pic)
+X86_32.enc(base.call, *r.call_id(0xe8))
+X86_64.enc(base.call, *r.call_id(0xe8), isap=Not(is_pic))
+X86_64.enc(base.call, *r.call_plt_id(0xe8), isap=is_pic)

-I32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2))
-I64.enc(base.call_indirect.i64, *r.call_r.rex(0xff, rrr=2))
-I64.enc(base.call_indirect.i64, *r.call_r(0xff, rrr=2))
+X86_32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2))
+X86_64.enc(base.call_indirect.i64, *r.call_r.rex(0xff, rrr=2))
+X86_64.enc(base.call_indirect.i64, *r.call_r(0xff, rrr=2))

-I32.enc(base.x_return, *r.ret(0xc3))
-I64.enc(base.x_return, *r.ret(0xc3))
+X86_32.enc(base.x_return, *r.ret(0xc3))
+X86_64.enc(base.x_return, *r.ret(0xc3))

 #
 # Branches
@@ -341,10 +352,10 @@ enc_i32_i64(base.brnz, r.tjccd, 0x85)
 # Branch on a b1 value in a register only looks at the low 8 bits. See also
 # bint encodings below.
 #
-# Start with the worst-case encoding for I32 only. The register allocator can't
-# handle a branch with an ABCD-constrained operand.
-I32.enc(base.brz.b1, *r.t8jccd_long(0x84))
-I32.enc(base.brnz.b1, *r.t8jccd_long(0x85))
+# Start with the worst-case encoding for X86_32 only. The register allocator
+# can't handle a branch with an ABCD-constrained operand.
+X86_32.enc(base.brz.b1, *r.t8jccd_long(0x84))
+X86_32.enc(base.brnz.b1, *r.t8jccd_long(0x85))

 enc_both(base.brz.b1, r.t8jccb_abcd, 0x74)
 enc_both(base.brz.b1, r.t8jccd_abcd, 0x84)
@@ -354,26 +365,28 @@ enc_both(base.brnz.b1, r.t8jccd_abcd, 0x85)
 #
 # Trap as ud2
 #
-I32.enc(base.trap, *r.trap(0x0f, 0x0b))
-I64.enc(base.trap, *r.trap(0x0f, 0x0b))
+X86_32.enc(base.trap, *r.trap(0x0f, 0x0b))
+X86_64.enc(base.trap, *r.trap(0x0f, 0x0b))

 # Using a standard EncRecipe, not the TailRecipe.
-I32.enc(base.trapif, r.trapif, 0)
-I64.enc(base.trapif, r.trapif, 0)
-I32.enc(base.trapff, r.trapff, 0)
-I64.enc(base.trapff, r.trapff, 0)
+X86_32.enc(base.trapif, r.trapif, 0)
+X86_64.enc(base.trapif, r.trapif, 0)
+X86_32.enc(base.trapff, r.trapff, 0)
+X86_64.enc(base.trapff, r.trapff, 0)

 #
 # Comparisons
 #
 enc_i32_i64(base.icmp, r.icscc, 0x39)
+enc_i32_i64(base.icmp_imm, r.icsccib, 0x83, rrr=7)
+enc_i32_i64(base.icmp_imm, r.icsccid, 0x81, rrr=7)
 enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
 enc_i32_i64(base.ifcmp_imm, r.rcmpib, 0x83, rrr=7)
 enc_i32_i64(base.ifcmp_imm, r.rcmpid, 0x81, rrr=7)
 # TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).

-I32.enc(base.ifcmp_sp.i32, *r.rcmp_sp(0x39))
-I64.enc(base.ifcmp_sp.i64, *r.rcmp_sp.rex(0x39, w=1))
+X86_32.enc(base.ifcmp_sp.i32, *r.rcmp_sp(0x39))
+X86_64.enc(base.ifcmp_sp.i64, *r.rcmp_sp.rex(0x39, w=1))

 #
 # Convert flags to bool.
@@ -398,66 +411,68 @@ enc_i32_i64(x86.bsr, r.bsf_and_bsr, 0x0F, 0xBD)
 #
 # This assumes that b1 is represented as an 8-bit low register with the value 0
 # or 1.
-I32.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))
-I64.enc(base.bint.i64.b1, *r.urm.rex(0x0f, 0xb6))   # zext to i64 implicit.
-I64.enc(base.bint.i64.b1, *r.urm_abcd(0x0f, 0xb6))  # zext to i64 implicit.
-I64.enc(base.bint.i32.b1, *r.urm.rex(0x0f, 0xb6))
-I64.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))
+#
+# Encode movzbq as movzbl, because it's equivalent and shorter.
+X86_32.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
+X86_64.enc(base.bint.i64.b1, *r.urm_noflags.rex(0x0f, 0xb6))
+X86_64.enc(base.bint.i64.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
+X86_64.enc(base.bint.i32.b1, *r.urm_noflags.rex(0x0f, 0xb6))
+X86_64.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))

 # Numerical conversions.

 # Reducing an integer is a no-op.
-I32.enc(base.ireduce.i8.i32, r.null, 0)
-I32.enc(base.ireduce.i16.i32, r.null, 0)
-I64.enc(base.ireduce.i8.i32, r.null, 0)
-I64.enc(base.ireduce.i16.i32, r.null, 0)
-I64.enc(base.ireduce.i8.i64, r.null, 0)
-I64.enc(base.ireduce.i16.i64, r.null, 0)
-I64.enc(base.ireduce.i32.i64, r.null, 0)
+X86_32.enc(base.ireduce.i8.i32, r.null, 0)
+X86_32.enc(base.ireduce.i16.i32, r.null, 0)
+X86_64.enc(base.ireduce.i8.i32, r.null, 0)
+X86_64.enc(base.ireduce.i16.i32, r.null, 0)
+X86_64.enc(base.ireduce.i8.i64, r.null, 0)
+X86_64.enc(base.ireduce.i16.i64, r.null, 0)
+X86_64.enc(base.ireduce.i32.i64, r.null, 0)

 # TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
 # instructions for %al/%ax/%eax to %ax/%eax/%rax.

 # movsbl
-I32.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
-I64.enc(base.sextend.i32.i8, *r.urm.rex(0x0f, 0xbe))
-I64.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
+X86_32.enc(base.sextend.i32.i8, *r.urm_noflags(0x0f, 0xbe))
+X86_64.enc(base.sextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xbe))
+X86_64.enc(base.sextend.i32.i8, *r.urm_noflags(0x0f, 0xbe))

 # movswl
-I32.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
-I64.enc(base.sextend.i32.i16, *r.urm.rex(0x0f, 0xbf))
-I64.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
+X86_32.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
+X86_64.enc(base.sextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xbf))
+X86_64.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))

 # movsbq
-I64.enc(base.sextend.i64.i8, *r.urm.rex(0x0f, 0xbe, w=1))
+X86_64.enc(base.sextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xbe, w=1))

 # movswq
-I64.enc(base.sextend.i64.i16, *r.urm.rex(0x0f, 0xbf, w=1))
+X86_64.enc(base.sextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xbf, w=1))

 # movslq
-I64.enc(base.sextend.i64.i32, *r.urm.rex(0x63, w=1))
+X86_64.enc(base.sextend.i64.i32, *r.urm_noflags.rex(0x63, w=1))

 # movzbl
-I32.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
-I64.enc(base.uextend.i32.i8, *r.urm.rex(0x0f, 0xb6))
-I64.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
+X86_32.enc(base.uextend.i32.i8, *r.urm_noflags(0x0f, 0xb6))
+X86_64.enc(base.uextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xb6))
+X86_64.enc(base.uextend.i32.i8, *r.urm_noflags(0x0f, 0xb6))

 # movzwl
-I32.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
-I64.enc(base.uextend.i32.i16, *r.urm.rex(0x0f, 0xb7))
-I64.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
+X86_32.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
+X86_64.enc(base.uextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xb7))
+X86_64.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))

 # movzbq, encoded as movzbl because it's equivalent and shorter
-I64.enc(base.uextend.i64.i8, *r.urm.rex(0x0f, 0xb6))
-I64.enc(base.uextend.i64.i8, *r.urm(0x0f, 0xb6))
+X86_64.enc(base.uextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xb6))
+X86_64.enc(base.uextend.i64.i8, *r.urm_noflags(0x0f, 0xb6))

 # movzwq, encoded as movzwl because it's equivalent and shorter
-I64.enc(base.uextend.i64.i16, *r.urm.rex(0x0f, 0xb7))
-I64.enc(base.uextend.i64.i16, *r.urm(0x0f, 0xb7))
+X86_64.enc(base.uextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xb7))
+X86_64.enc(base.uextend.i64.i16, *r.urm_noflags(0x0f, 0xb7))

 # A 32-bit register copy clears the high 32 bits.
-I64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
-I64.enc(base.uextend.i64.i32, *r.umr(0x89))
+X86_64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
+X86_64.enc(base.uextend.i64.i32, *r.umr(0x89))


 #
@@ -469,8 +484,8 @@ enc_both(base.bitcast.f32.i32, r.frurm, 0x66, 0x0f, 0x6e)
 enc_both(base.bitcast.i32.f32, r.rfumr, 0x66, 0x0f, 0x7e)

 # movq
-I64.enc(base.bitcast.f64.i64, *r.frurm.rex(0x66, 0x0f, 0x6e, w=1))
-I64.enc(base.bitcast.i64.f64, *r.rfumr.rex(0x66, 0x0f, 0x7e, w=1))
+X86_64.enc(base.bitcast.f64.i64, *r.frurm.rex(0x66, 0x0f, 0x6e, w=1))
+X86_64.enc(base.bitcast.i64.f64, *r.rfumr.rex(0x66, 0x0f, 0x7e, w=1))

 # movaps
 enc_both(base.copy.f32, r.furm, 0x0f, 0x28)
@@ -492,11 +507,11 @@ enc_both(base.fdemote.f32.f64, r.furm, 0xf2, 0x0f, 0x5a)

 # cvttss2si
 enc_both(x86.cvtt2si.i32.f32, r.rfurm, 0xf3, 0x0f, 0x2c)
-I64.enc(x86.cvtt2si.i64.f32, *r.rfurm.rex(0xf3, 0x0f, 0x2c, w=1))
+X86_64.enc(x86.cvtt2si.i64.f32, *r.rfurm.rex(0xf3, 0x0f, 0x2c, w=1))

 # cvttsd2si
 enc_both(x86.cvtt2si.i32.f64, r.rfurm, 0xf2, 0x0f, 0x2c)
-I64.enc(x86.cvtt2si.i64.f64, *r.rfurm.rex(0xf2, 0x0f, 0x2c, w=1))
+X86_64.enc(x86.cvtt2si.i64.f64, *r.rfurm.rex(0xf2, 0x0f, 0x2c, w=1))

 # Exact square roots.
 enc_both(base.sqrt.f32, r.furm, 0xf3, 0x0f, 0x51)
--- a/lib/cretonne/meta/isa/intel/recipes.py
+++ b/lib/cretonne/meta/isa/intel/recipes.py
@@ -5,9 +5,11 @@ from __future__ import absolute_import
 from cdsl.isa import EncRecipe
 from cdsl.predicates import IsSignedInt, IsEqual, Or
 from cdsl.registers import RegClass
-from base.formats import Unary, UnaryImm, Binary, BinaryImm, MultiAry, NullAry
+from base.formats import Unary, UnaryImm, UnaryBool, Binary, BinaryImm
+from base.formats import MultiAry, NullAry
 from base.formats import Trap, Call, IndirectCall, Store, Load
-from base.formats import IntCompare, FloatCompare, IntCond, FloatCond
+from base.formats import IntCompare, IntCompareImm, FloatCompare
+from base.formats import IntCond, FloatCond
 from base.formats import IntSelect, IntCondTrap, FloatCondTrap
 from base.formats import Jump, Branch, BranchInt, BranchFloat
 from base.formats import Ternary, FuncAddr, UnaryGlobalVar
@@ -277,23 +279,27 @@ null = EncRecipe('null', Unary, size=0, ins=GPR, outs=0, emit='')
 # XX opcode, no ModR/M.
 trap = TailRecipe(
        'trap', Trap, size=0, ins=(), outs=(),
-        emit='PUT_OP(bits, BASE_REX, sink);')
+        emit='''
+        sink.trap(code, func.srclocs[inst]);
+        PUT_OP(bits, BASE_REX, sink);
+        ''')

 # Macro: conditional jump over a ud2.
 trapif = EncRecipe(
-        'trapif', IntCondTrap, size=4, ins=FLAG.eflags, outs=(),
+        'trapif', IntCondTrap, size=4, ins=FLAG.rflags, outs=(),
        clobbers_flags=False,
        emit='''
        // Jump over a 2-byte ud2.
        sink.put1(0x70 | (icc2opc(cond.inverse()) as u8));
        sink.put1(2);
        // ud2.
+        sink.trap(code, func.srclocs[inst]);
        sink.put1(0x0f);
        sink.put1(0x0b);
        ''')

 trapff = EncRecipe(
-        'trapff', FloatCondTrap, size=4, ins=FLAG.eflags, outs=(),
+        'trapff', FloatCondTrap, size=4, ins=FLAG.rflags, outs=(),
        clobbers_flags=False,
        instp=floatccs(FloatCondTrap),
        emit='''
@@ -301,6 +307,7 @@ trapff = EncRecipe(
        sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8));
        sink.put1(2);
        // ud2.
+        sink.trap(code, func.srclocs[inst]);
        sink.put1(0x0f);
        sink.put1(0x0b);
        ''')
@@ -358,7 +365,7 @@ rfumr = TailRecipe(
        ''')

 # XX /r, but for a unary operator with separate input/output register.
-# RM form.
+# RM form. Clobbers FLAGS.
 urm = TailRecipe(
        'urm', Unary, size=1, ins=GPR, outs=GPR,
        emit='''
@@ -366,10 +373,19 @@ urm = TailRecipe(
        modrm_rr(in_reg0, out_reg0, sink);
        ''')

-# XX /r. Same as urm, but input limited to ABCD.
-urm_abcd = TailRecipe(
-        'urm_abcd', Unary, size=1, ins=ABCD, outs=GPR,
-        when_prefixed=urm,
+# XX /r. Same as urm, but doesn't clobber FLAGS.
+urm_noflags = TailRecipe(
+        'urm_noflags', Unary, size=1, ins=GPR, outs=GPR,
+        clobbers_flags=False,
+        emit='''
+        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
+        modrm_rr(in_reg0, out_reg0, sink);
+        ''')
+
+# XX /r. Same as urm_noflags, but input limited to ABCD.
+urm_noflags_abcd = TailRecipe(
+        'urm_noflags_abcd', Unary, size=1, ins=ABCD, outs=GPR,
+        when_prefixed=urm_noflags,
        emit='''
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_rr(in_reg0, out_reg0, sink);
@@ -449,6 +465,7 @@ div = TailRecipe(
        'div', Ternary, size=1,
        ins=(GPR.rax, GPR.rdx, GPR), outs=(GPR.rax, GPR.rdx),
        emit='''
+        sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]);
        PUT_OP(bits, rex1(in_reg2), sink);
        modrm_r_bits(in_reg2, bits, sink);
        ''')
@@ -506,6 +523,17 @@ puid = TailRecipe(
        sink.put4(imm as u32);
        ''')

+# XX+rd id unary with bool immediate. Note no recipe predicate.
+puid_bool = TailRecipe(
+        'puid_bool', UnaryBool, size=4, ins=(), outs=GPR,
+        emit='''
+        // The destination register is encoded in the low bits of the opcode.
+        // No ModR/M.
+        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+        let imm: u32 = if imm.into() { 1 } else { 0 };
+        sink.put4(imm);
+        ''')
+
 # XX+rd iq unary with 64-bit immediate.
 puiq = TailRecipe(
        'puiq', UnaryImm, size=8, ins=(), outs=GPR,
@@ -666,6 +694,9 @@ st = TailRecipe(
        instp=IsEqual(Store.offset, 0),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_rm(in_reg1, in_reg0, sink);
        ''')
@@ -678,6 +709,9 @@ st_abcd = TailRecipe(
        when_prefixed=st,
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_rm(in_reg1, in_reg0, sink);
        ''')
@@ -688,6 +722,9 @@ fst = TailRecipe(
        instp=IsEqual(Store.offset, 0),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_rm(in_reg1, in_reg0, sink);
        ''')
@@ -698,6 +735,9 @@ stDisp8 = TailRecipe(
        instp=IsSignedInt(Store.offset, 8),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_disp8(in_reg1, in_reg0, sink);
        let offset: i32 = offset.into();
@@ -709,6 +749,9 @@ stDisp8_abcd = TailRecipe(
        when_prefixed=stDisp8,
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_disp8(in_reg1, in_reg0, sink);
        let offset: i32 = offset.into();
@@ -719,6 +762,9 @@ fstDisp8 = TailRecipe(
        instp=IsSignedInt(Store.offset, 8),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_disp8(in_reg1, in_reg0, sink);
        let offset: i32 = offset.into();
@@ -730,6 +776,9 @@ stDisp32 = TailRecipe(
        'stDisp32', Store, size=5, ins=(GPR, GPR_DEREF_SAFE), outs=(),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_disp32(in_reg1, in_reg0, sink);
        let offset: i32 = offset.into();
@@ -740,6 +789,9 @@ stDisp32_abcd = TailRecipe(
        when_prefixed=stDisp32,
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_disp32(in_reg1, in_reg0, sink);
        let offset: i32 = offset.into();
@@ -749,6 +801,9 @@ fstDisp32 = TailRecipe(
        'fstDisp32', Store, size=5, ins=(FPR, GPR_DEREF_SAFE), outs=(),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_disp32(in_reg1, in_reg0, sink);
        let offset: i32 = offset.into();
@@ -756,8 +811,8 @@ fstDisp32 = TailRecipe(
        ''')

 # Unary spill with SIB and 32-bit displacement.
-spSib32 = TailRecipe(
-        'spSib32', Unary, size=6, ins=GPR, outs=StackGPR32,
+spillSib32 = TailRecipe(
+        'spillSib32', Unary, size=6, ins=GPR, outs=StackGPR32,
        clobbers_flags=False,
        emit='''
        let base = stk_base(out_stk0.base);
@@ -766,8 +821,10 @@ spSib32 = TailRecipe(
        sib_noindex(base, sink);
        sink.put4(out_stk0.offset as u32);
        ''')
-fspSib32 = TailRecipe(
-        'fspSib32', Unary, size=6, ins=FPR, outs=StackFPR32,
+
+# Like spillSib32, but targeting an FPR rather than a GPR.
+fspillSib32 = TailRecipe(
+        'fspillSib32', Unary, size=6, ins=FPR, outs=StackFPR32,
        clobbers_flags=False,
        emit='''
        let base = stk_base(out_stk0.base);
@@ -778,8 +835,8 @@ fspSib32 = TailRecipe(
        ''')

 # Regspill using RSP-relative addressing.
-rsp32 = TailRecipe(
-        'rsp32', RegSpill, size=6, ins=GPR, outs=(),
+regspill32 = TailRecipe(
+        'regspill32', RegSpill, size=6, ins=GPR, outs=(),
        clobbers_flags=False,
        emit='''
        let dst = StackRef::sp(dst, &func.stack_slots);
@@ -789,8 +846,10 @@ rsp32 = TailRecipe(
        sib_noindex(base, sink);
        sink.put4(dst.offset as u32);
        ''')
-frsp32 = TailRecipe(
-        'frsp32', RegSpill, size=6, ins=FPR, outs=(),
+
+# Like regspill32, but targeting an FPR rather than a GPR.
+fregspill32 = TailRecipe(
+        'fregspill32', RegSpill, size=6, ins=FPR, outs=(),
        clobbers_flags=False,
        emit='''
        let dst = StackRef::sp(dst, &func.stack_slots);
@@ -811,6 +870,9 @@ ld = TailRecipe(
        instp=IsEqual(Load.offset, 0),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_rm(in_reg0, out_reg0, sink);
        ''')
@@ -821,6 +883,9 @@ fld = TailRecipe(
        instp=IsEqual(Load.offset, 0),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_rm(in_reg0, out_reg0, sink);
        ''')
@@ -831,6 +896,9 @@ ldDisp8 = TailRecipe(
        instp=IsSignedInt(Load.offset, 8),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_disp8(in_reg0, out_reg0, sink);
        let offset: i32 = offset.into();
@@ -843,6 +911,9 @@ fldDisp8 = TailRecipe(
        instp=IsSignedInt(Load.offset, 8),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_disp8(in_reg0, out_reg0, sink);
        let offset: i32 = offset.into();
@@ -855,6 +926,9 @@ ldDisp32 = TailRecipe(
        instp=IsSignedInt(Load.offset, 32),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_disp32(in_reg0, out_reg0, sink);
        let offset: i32 = offset.into();
@@ -867,6 +941,9 @@ fldDisp32 = TailRecipe(
        instp=IsSignedInt(Load.offset, 32),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_disp32(in_reg0, out_reg0, sink);
        let offset: i32 = offset.into();
@@ -874,8 +951,8 @@ fldDisp32 = TailRecipe(
        ''')

 # Unary fill with SIB and 32-bit displacement.
-fiSib32 = TailRecipe(
-        'fiSib32', Unary, size=6, ins=StackGPR32, outs=GPR,
+fillSib32 = TailRecipe(
+        'fillSib32', Unary, size=6, ins=StackGPR32, outs=GPR,
        clobbers_flags=False,
        emit='''
        let base = stk_base(in_stk0.base);
@@ -884,8 +961,10 @@ fiSib32 = TailRecipe(
        sib_noindex(base, sink);
        sink.put4(in_stk0.offset as u32);
        ''')
-ffiSib32 = TailRecipe(
-        'ffiSib32', Unary, size=6, ins=StackFPR32, outs=FPR,
+
+# Like fillSib32, but targeting an FPR rather than a GPR.
+ffillSib32 = TailRecipe(
+        'ffillSib32', Unary, size=6, ins=StackFPR32, outs=FPR,
        clobbers_flags=False,
        emit='''
        let base = stk_base(in_stk0.base);
@@ -896,8 +975,8 @@ ffiSib32 = TailRecipe(
        ''')

 # Regfill with RSP-relative 32-bit displacement.
-rfi32 = TailRecipe(
-        'rfi32', RegFill, size=6, ins=StackGPR32, outs=(),
+regfill32 = TailRecipe(
+        'regfill32', RegFill, size=6, ins=StackGPR32, outs=(),
        clobbers_flags=False,
        emit='''
        let src = StackRef::sp(src, &func.stack_slots);
@@ -907,8 +986,10 @@ rfi32 = TailRecipe(
        sib_noindex(base, sink);
        sink.put4(src.offset as u32);
        ''')
-frfi32 = TailRecipe(
-        'frfi32', RegFill, size=6, ins=StackFPR32, outs=(),
+
+# Like regfill32, but targeting an FPR rather than a GPR.
+fregfill32 = TailRecipe(
+        'fregfill32', RegFill, size=6, ins=StackFPR32, outs=(),
        clobbers_flags=False,
        emit='''
        let src = StackRef::sp(src, &func.stack_slots);
@@ -977,7 +1058,7 @@ jmpd = TailRecipe(
        ''')

 brib = TailRecipe(
-        'brib', BranchInt, size=1, ins=FLAG.eflags, outs=(),
+        'brib', BranchInt, size=1, ins=FLAG.rflags, outs=(),
        branch_range=8,
        clobbers_flags=False,
        emit='''
@@ -986,7 +1067,7 @@ brib = TailRecipe(
        ''')

 brid = TailRecipe(
-        'brid', BranchInt, size=4, ins=FLAG.eflags, outs=(),
+        'brid', BranchInt, size=4, ins=FLAG.rflags, outs=(),
        branch_range=32,
        clobbers_flags=False,
        emit='''
@@ -995,7 +1076,7 @@ brid = TailRecipe(
        ''')

 brfb = TailRecipe(
-        'brfb', BranchFloat, size=1, ins=FLAG.eflags, outs=(),
+        'brfb', BranchFloat, size=1, ins=FLAG.rflags, outs=(),
        branch_range=8,
        clobbers_flags=False,
        instp=floatccs(BranchFloat),
@@ -1005,7 +1086,7 @@ brfb = TailRecipe(
        ''')

 brfd = TailRecipe(
-        'brfd', BranchFloat, size=4, ins=FLAG.eflags, outs=(),
+        'brfd', BranchFloat, size=4, ins=FLAG.rflags, outs=(),
        branch_range=32,
        clobbers_flags=False,
        instp=floatccs(BranchFloat),
@@ -1025,7 +1106,7 @@ brfd = TailRecipe(
 #

 seti = TailRecipe(
-        'seti', IntCond, size=1, ins=FLAG.eflags, outs=GPR,
+        'seti', IntCond, size=1, ins=FLAG.rflags, outs=GPR,
        requires_prefix=True,
        clobbers_flags=False,
        emit='''
@@ -1033,7 +1114,7 @@ seti = TailRecipe(
        modrm_r_bits(out_reg0, bits, sink);
        ''')
 seti_abcd = TailRecipe(
-        'seti_abcd', IntCond, size=1, ins=FLAG.eflags, outs=ABCD,
+        'seti_abcd', IntCond, size=1, ins=FLAG.rflags, outs=ABCD,
        when_prefixed=seti,
        clobbers_flags=False,
        emit='''
@@ -1042,7 +1123,7 @@ seti_abcd = TailRecipe(
        ''')

 setf = TailRecipe(
-        'setf', FloatCond, size=1, ins=FLAG.eflags, outs=GPR,
+        'setf', FloatCond, size=1, ins=FLAG.rflags, outs=GPR,
        requires_prefix=True,
        clobbers_flags=False,
        emit='''
@@ -1050,7 +1131,7 @@ setf = TailRecipe(
        modrm_r_bits(out_reg0, bits, sink);
        ''')
 setf_abcd = TailRecipe(
-        'setf_abcd', FloatCond, size=1, ins=FLAG.eflags, outs=ABCD,
+        'setf_abcd', FloatCond, size=1, ins=FLAG.rflags, outs=ABCD,
        when_prefixed=setf,
        clobbers_flags=False,
        emit='''
@@ -1064,7 +1145,7 @@ setf_abcd = TailRecipe(
 # 1 byte, modrm(r,r), is after the opcode
 #
 cmov = TailRecipe(
-        'cmov', IntSelect, size=1, ins=(FLAG.eflags, GPR, GPR), outs=2,
+        'cmov', IntSelect, size=1, ins=(FLAG.rflags, GPR, GPR), outs=2,
        requires_prefix=False,
        clobbers_flags=False,
        emit='''
@@ -1076,7 +1157,7 @@ cmov = TailRecipe(
 # Bit scan forwards and reverse
 #
 bsf_and_bsr = TailRecipe(
-        'bsf_and_bsr', Unary, size=1, ins=GPR, outs=(GPR, FLAG.eflags),
+        'bsf_and_bsr', Unary, size=1, ins=GPR, outs=(GPR, FLAG.rflags),
        requires_prefix=False,
        clobbers_flags=True,
        emit='''
@@ -1090,7 +1171,7 @@ bsf_and_bsr = TailRecipe(

 # XX /r, MR form. Compare two GPR registers and set flags.
 rcmp = TailRecipe(
-        'rcmp', Binary, size=1, ins=(GPR, GPR), outs=FLAG.eflags,
+        'rcmp', Binary, size=1, ins=(GPR, GPR), outs=FLAG.rflags,
        emit='''
        PUT_OP(bits, rex2(in_reg0, in_reg1), sink);
        modrm_rr(in_reg0, in_reg1, sink);
@@ -1098,7 +1179,7 @@ rcmp = TailRecipe(

 # XX /r, RM form. Compare two FPR registers and set flags.
 fcmp = TailRecipe(
-        'fcmp', Binary, size=1, ins=(FPR, FPR), outs=FLAG.eflags,
+        'fcmp', Binary, size=1, ins=(FPR, FPR), outs=FLAG.rflags,
        emit='''
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_rr(in_reg1, in_reg0, sink);
@@ -1106,7 +1187,7 @@ fcmp = TailRecipe(

 # XX /n, MI form with imm8.
 rcmpib = TailRecipe(
-        'rcmpib', BinaryImm, size=2, ins=GPR, outs=FLAG.eflags,
+        'rcmpib', BinaryImm, size=2, ins=GPR, outs=FLAG.rflags,
        instp=IsSignedInt(BinaryImm.imm, 8),
        emit='''
        PUT_OP(bits, rex1(in_reg0), sink);
@@ -1117,7 +1198,7 @@ rcmpib = TailRecipe(

 # XX /n, MI form with imm32.
 rcmpid = TailRecipe(
-        'rcmpid', BinaryImm, size=5, ins=GPR, outs=FLAG.eflags,
+        'rcmpid', BinaryImm, size=5, ins=GPR, outs=FLAG.rflags,
        instp=IsSignedInt(BinaryImm.imm, 32),
        emit='''
        PUT_OP(bits, rex1(in_reg0), sink);
@@ -1128,7 +1209,7 @@ rcmpid = TailRecipe(

 # Same as rcmp, but second operand is the stack pointer.
 rcmp_sp = TailRecipe(
-        'rcmp_sp', Unary, size=1, ins=GPR, outs=FLAG.eflags,
+        'rcmp_sp', Unary, size=1, ins=GPR, outs=FLAG.rflags,
        emit='''
        PUT_OP(bits, rex2(in_reg0, RU::rsp.into()), sink);
        modrm_rr(in_reg0, RU::rsp.into(), sink);
@@ -1289,12 +1370,67 @@ icscc = TailRecipe(
        modrm_rr(out_reg0, 0, sink);
        ''')

+icsccib = TailRecipe(
+        'icsccib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD,
+        instp=IsSignedInt(IntCompareImm.imm, 8),
+        emit='''
+        // Comparison instruction.
+        PUT_OP(bits, rex1(in_reg0), sink);
+        modrm_r_bits(in_reg0, bits, sink);
+        let imm: i64 = imm.into();
+        sink.put1(imm as u8);
+        // `setCC` instruction, no REX.
+        use ir::condcodes::IntCC::*;
+        let setcc = match cond {
+            Equal => 0x94,
+            NotEqual => 0x95,
+            SignedLessThan => 0x9c,
+            SignedGreaterThanOrEqual => 0x9d,
+            SignedGreaterThan => 0x9f,
+            SignedLessThanOrEqual => 0x9e,
+            UnsignedLessThan => 0x92,
+            UnsignedGreaterThanOrEqual => 0x93,
+            UnsignedGreaterThan => 0x97,
+            UnsignedLessThanOrEqual => 0x96,
+        };
+        sink.put1(0x0f);
+        sink.put1(setcc);
+        modrm_rr(out_reg0, 0, sink);
+        ''')
+
+icsccid = TailRecipe(
+        'icsccid', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD,
+        instp=IsSignedInt(IntCompareImm.imm, 32),
+        emit='''
+        // Comparison instruction.
+        PUT_OP(bits, rex1(in_reg0), sink);
+        modrm_r_bits(in_reg0, bits, sink);
+        let imm: i64 = imm.into();
+        sink.put4(imm as u32);
+        // `setCC` instruction, no REX.
+        use ir::condcodes::IntCC::*;
+        let setcc = match cond {
+            Equal => 0x94,
+            NotEqual => 0x95,
+            SignedLessThan => 0x9c,
+            SignedGreaterThanOrEqual => 0x9d,
+            SignedGreaterThan => 0x9f,
+            SignedLessThanOrEqual => 0x9e,
+            UnsignedLessThan => 0x92,
+            UnsignedGreaterThanOrEqual => 0x93,
+            UnsignedGreaterThan => 0x97,
+            UnsignedLessThanOrEqual => 0x96,
+        };
+        sink.put1(0x0f);
+        sink.put1(setcc);
+        modrm_rr(out_reg0, 0, sink);
+        ''')

 # Make a FloatCompare instruction predicate with the supported condition codes.

 # Same thing for floating point.
 #
-# The ucomiss/ucomisd instructions set the EFLAGS bits CF/PF/CF like this:
+# The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
 #
 #    ZPC OSA
 # UN 111 000
--- a/lib/cretonne/meta/isa/intel/registers.py
+++ b/lib/cretonne/meta/isa/intel/registers.py
@@ -43,7 +43,7 @@ FlagRegs = RegBank(
        'Flag registers',
        units=1,
        pressure_tracking=False,
-        names=['eflags'])
+        names=['rflags'])

 GPR = RegClass(IntRegs)
 # Certain types of deref encodings cannot be used with all registers.
--- a/lib/cretonne/meta/srcgen.py
+++ b/lib/cretonne/meta/srcgen.py
@@ -8,9 +8,10 @@ source code.
 from __future__ import absolute_import
 import sys
 import os
+from collections import OrderedDict

 try:
-    from typing import Any, List  # noqa
+    from typing import Any, List, Set, Tuple  # noqa
 except ImportError:
    pass

@@ -146,6 +147,52 @@ class Formatter(object):
        for l in parse_multiline(s):
            self.line('/// ' + l if l else '///')

+    def match(self, m):
+        # type: (Match) -> None
+        """
+        Add a match expression.
+
+        Example:
+
+            >>> f = Formatter()
+            >>> m = Match('x')
+            >>> m.arm('Orange', ['a', 'b'], 'some body')
+            >>> m.arm('Yellow', ['a', 'b'], 'some body')
+            >>> m.arm('Green', ['a', 'b'], 'different body')
+            >>> m.arm('Blue', ['x', 'y'], 'some body')
+            >>> f.match(m)
+            >>> f.writelines()
+            match x {
+                Orange { a, b } |
+                Yellow { a, b } => {
+                    some body
+                }
+                Green { a, b } => {
+                    different body
+                }
+                Blue { x, y } => {
+                    some body
+                }
+            }
+
+        """
+        with self.indented('match {} {{'.format(m.expr), '}'):
+            for (fields, body), names in m.arms.items():
+                with self.indented('', '}'):
+                    names_left = len(names)
+                    for name in names.keys():
+                        fields_str = ', '.join(fields)
+                        if len(fields) != 0:
+                            fields_str = '{{ {} }} '.format(fields_str)
+                        names_left -= 1
+                        if names_left > 0:
+                            suffix = '|'
+                        else:
+                            suffix = '=> {'
+                        self.outdented_line(name + ' ' + fields_str + suffix)
+                        if names_left == 0:
+                            self.multi_line(body)
+

 def _indent(s):
    # type: (str) -> int
@@ -195,3 +242,36 @@ def parse_multiline(s):
    while trimmed and not trimmed[0]:
        trimmed.pop(0)
    return trimmed
+
+
+class Match(object):
+    """
+    Match formatting class.
+
+    Match objects collect all the information needed to emit a Rust `match`
+    expression, automatically deduplicating overlapping identical arms.
+
+    Example:
+
+        >>> m = Match('x')
+        >>> m.arm('Orange', ['a', 'b'], 'some body')
+        >>> m.arm('Yellow', ['a', 'b'], 'some body')
+        >>> m.arm('Green', ['a', 'b'], 'different body')
+        >>> m.arm('Blue', ['x', 'y'], 'some body')
+        >>> assert(len(m.arms) == 3)
+
+    Note that this class is ignorant of Rust types, and considers two fields
+    with the same name to be equivalent.
+    """
+
+    def __init__(self, expr):
+        # type: (str) -> None
+        self.expr = expr
+        self.arms = OrderedDict()  # type: OrderedDict[Tuple[Tuple[str, ...], str], OrderedDict[str, None]]  # noqa
+
+    def arm(self, name, fields, body):
+        # type: (str, List[str], str) -> None
+        key = (tuple(fields), body)
+        if key not in self.arms:
+            self.arms[key] = OrderedDict()
+        self.arms[key][name] = None
--- a/lib/cretonne/meta/test_gen_legalizer.py
+++ b/lib/cretonne/meta/test_gen_legalizer.py
@@ -148,9 +148,9 @@ class TestRuntimeChecks(TestCase):
                self.v5 << vselect(self.v1, self.v3, self.v4),
        )
        x = XForm(r, r)
-        tv2_exp = 'Some({}).map(|t: Type| -> t.as_bool())'\
+        tv2_exp = 'Some({}).map(|t: ir::Type| t.as_bool())'\
            .format(self.v2.get_typevar().name)
-        tv3_exp = 'Some({}).map(|t: Type| -> t.as_bool())'\
+        tv3_exp = 'Some({}).map(|t: ir::Type| t.as_bool())'\
            .format(self.v3.get_typevar().name)

        self.check_yo_check(
--- a/lib/cretonne/src/abi.rs
+++ b/lib/cretonne/src/abi.rs
@@ -3,7 +3,7 @@
 //! This module provides functions and data structures that are useful for implementing the
 //! `TargetIsa::legalize_signature()` method.

-use ir::{ArgumentLoc, AbiParam, ArgumentExtension, Type};
+use ir::{AbiParam, ArgumentExtension, ArgumentLoc, Type};
 use std::cmp::Ordering;
 use std::vec::Vec;

@@ -186,8 +186,8 @@ pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use ir::types;
    use ir::AbiParam;
+    use ir::types;

    #[test]
    fn legalize() {
--- a/lib/cretonne/src/bforest/map.rs
+++ b/lib/cretonne/src/bforest/map.rs
@@ -1,8 +1,8 @@
 //! Forest of maps.

+use super::{Comparator, Forest, Node, NodeData, NodePool, Path, INNER_SIZE};
 use packed_option::PackedOption;
 use std::marker::PhantomData;
-use super::{INNER_SIZE, Comparator, Forest, NodePool, Node, NodeData, Path};

 /// Tag type defining forest types for a map.
 struct MapTypes<K, V, C>(PhantomData<(K, V, C)>);
@@ -424,10 +424,10 @@ where

 #[cfg(test)]
 mod test {
+    use super::super::NodeData;
+    use super::*;
    use std::mem;
    use std::vec::Vec;
-    use super::*;
-    use super::super::NodeData;

    #[test]
    fn node_size() {
--- a/lib/cretonne/src/bforest/mod.rs
+++ b/lib/cretonne/src/bforest/mod.rs
@@ -22,8 +22,8 @@ mod path;
 mod pool;
 mod set;

-pub use self::map::{MapForest, Map, MapCursor, MapIter};
-pub use self::set::{SetForest, Set, SetCursor, SetIter};
+pub use self::map::{Map, MapCursor, MapForest, MapIter};
+pub use self::set::{Set, SetCursor, SetForest, SetIter};

 use self::node::NodeData;
 use self::path::Path;
--- a/lib/cretonne/src/bforest/node.rs
+++ b/lib/cretonne/src/bforest/node.rs
@@ -1,8 +1,8 @@
 //! B+-tree nodes.

+use super::{slice_insert, slice_shift, Forest, Node, SetValue, INNER_SIZE};
 use std::borrow::{Borrow, BorrowMut};
 use std::fmt;
-use super::{Forest, Node, INNER_SIZE, SetValue, slice_insert, slice_shift};

 /// B+-tree node.
 ///
@@ -579,9 +579,9 @@ where

 #[cfg(test)]
 mod test {
+    use super::*;
    use std::mem;
    use std::string::ToString;
-    use super::*;

    // Forest impl for a set implementation.
    struct TF();
--- a/lib/cretonne/src/bforest/path.rs
+++ b/lib/cretonne/src/bforest/path.rs
@@ -1,9 +1,9 @@
 //! A path from the root of a B+-tree to a leaf node.

+use super::node::Removed;
+use super::{slice_insert, slice_shift, Comparator, Forest, Node, NodeData, NodePool, MAX_PATH};
 use std::borrow::Borrow;
 use std::marker::PhantomData;
-use super::{Forest, Node, NodeData, NodePool, MAX_PATH, Comparator, slice_insert, slice_shift};
-use super::node::Removed;

 #[cfg(test)]
 use std::fmt;
@@ -55,8 +55,8 @@ impl<F: Forest> Path<F> {
        for level in 0.. {
            self.size = level + 1;
            self.node[level] = node;
-            match &pool[node] {
-                &NodeData::Inner { size, keys, tree } => {
+            match pool[node] {
+                NodeData::Inner { size, keys, tree } => {
                    // Invariant: `tree[i]` contains keys smaller than
                    // `keys[i]`, greater or equal to `keys[i-1]`.
                    let i = match comp.search(key, &keys[0..size.into()]) {
@@ -68,7 +68,7 @@ impl<F: Forest> Path<F> {
                    self.entry[level] = i as u8;
                    node = tree[i];
                }
-                &NodeData::Leaf { size, keys, vals } => {
+                NodeData::Leaf { size, keys, vals } => {
                    // For a leaf we want either the found key or an insert position.
                    return match comp.search(key, &keys.borrow()[0..size.into()]) {
                        Ok(i) => {
@@ -81,7 +81,7 @@ impl<F: Forest> Path<F> {
                        }
                    };
                }
-                &NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
+                NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
            }
        }
        unreachable!();
@@ -94,10 +94,10 @@ impl<F: Forest> Path<F> {
            self.size = level + 1;
            self.node[level] = node;
            self.entry[level] = 0;
-            match &pool[node] {
-                &NodeData::Inner { tree, .. } => node = tree[0],
-                &NodeData::Leaf { keys, vals, .. } => return (keys.borrow()[0], vals.borrow()[0]),
-                &NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
+            match pool[node] {
+                NodeData::Inner { tree, .. } => node = tree[0],
+                NodeData::Leaf { keys, vals, .. } => return (keys.borrow()[0], vals.borrow()[0]),
+                NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
            }
        }
        unreachable!();
@@ -205,17 +205,17 @@ impl<F: Forest> Path<F> {
        let mut node = root;
        for l in level.. {
            self.node[l] = node;
-            match &pool[node] {
-                &NodeData::Inner { size, ref tree, .. } => {
+            match pool[node] {
+                NodeData::Inner { size, ref tree, .. } => {
                    self.entry[l] = size;
                    node = tree[usize::from(size)];
                }
-                &NodeData::Leaf { size, .. } => {
+                NodeData::Leaf { size, .. } => {
                    self.entry[l] = size - 1;
                    self.size = l + 1;
                    break;
                }
-                &NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
+                NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
            }
        }
        node
@@ -405,8 +405,8 @@ impl<F: Forest> Path<F> {
        let crit_key = pool[self.leaf_node()].leaf_crit_key();
        let crit_node = self.node[crit_level];

-        match &mut pool[crit_node] {
-            &mut NodeData::Inner { size, ref mut keys, .. } => {
+        match pool[crit_node] {
+            NodeData::Inner { size, ref mut keys, .. } => {
                debug_assert!(crit_kidx < size);
                keys[usize::from(crit_kidx)] = crit_key;
            }
@@ -414,7 +414,6 @@ impl<F: Forest> Path<F> {
        }
    }

-
    /// Given that the current leaf node is in an unhealthy (underflowed or even empty) status,
    /// balance it with sibling nodes.
    ///
@@ -437,7 +436,7 @@ impl<F: Forest> Path<F> {

        // Discard the root node if it has shrunk to a single sub-tree.
        let mut ns = 0;
-        while let &NodeData::Inner { size: 0, ref tree, .. } = &pool[self.node[ns]] {
+        while let NodeData::Inner { size: 0, ref tree, .. } = pool[self.node[ns]] {
            ns += 1;
            self.node[ns] = tree[0];
        }
@@ -529,12 +528,10 @@ impl<F: Forest> Path<F> {
            // current entry[level] was one off the end of the node, it will now point at a proper
            // entry.
            debug_assert!(usize::from(self.entry[level]) < pool[self.node[level]].entries());
-        } else {
+        } else if usize::from(self.entry[level]) >= pool[self.node[level]].entries() {
            // There's no right sibling at this level, so the node can't be rebalanced.
            // Check if we are in an off-the-end position.
-            if usize::from(self.entry[level]) >= pool[self.node[level]].entries() {
-                self.size = 0;
-            }
+            self.size = 0;
        }
    }

@@ -581,8 +578,8 @@ impl<F: Forest> Path<F> {
    ///
    /// Returns `None` if the current node is a right-most node so no right sibling exists.
    fn right_sibling_branch_level(&self, level: usize, pool: &NodePool<F>) -> Option<usize> {
-        (0..level).rposition(|l| match &pool[self.node[l]] {
-            &NodeData::Inner { size, .. } => self.entry[l] < size,
+        (0..level).rposition(|l| match pool[self.node[l]] {
+            NodeData::Inner { size, .. } => self.entry[l] < size,
            _ => panic!("Expected inner node"),
        })
    }
@@ -622,8 +619,8 @@ impl<F: Forest> Path<F> {
        let bl = self.right_sibling_branch_level(level, pool).expect(
            "No right sibling exists",
        );
-        match &mut pool[self.node[bl]] {
-            &mut NodeData::Inner { ref mut keys, .. } => {
+        match pool[self.node[bl]] {
+            NodeData::Inner { ref mut keys, .. } => {
                keys[usize::from(self.entry[bl])] = crit_key;
            }
            _ => panic!("Expected inner node"),
@@ -647,8 +644,8 @@ impl<F: Forest> Path<F> {
    /// Check the internal consistency of this path.
    pub fn verify(&self, pool: &NodePool<F>) {
        for level in 0..self.size {
-            match &pool[self.node[level]] {
-                &NodeData::Inner { size, tree, .. } => {
+            match pool[self.node[level]] {
+                NodeData::Inner { size, tree, .. } => {
                    assert!(
                        level < self.size - 1,
                        "Expected leaf node at level {}",
@@ -668,7 +665,7 @@ impl<F: Forest> Path<F> {
                        level
                    );
                }
-                &NodeData::Leaf { size, .. } => {
+                NodeData::Leaf { size, .. } => {
                    assert_eq!(level, self.size - 1, "Expected inner node");
                    assert!(
                        self.entry[level] <= size,
@@ -677,7 +674,7 @@ impl<F: Forest> Path<F> {
                        size,
                    );
                }
-                &NodeData::Free { .. } => {
+                NodeData::Free { .. } => {
                    panic!("Free {} in path", self.node[level]);
                }
            }
@@ -702,9 +699,9 @@ impl<F: Forest> fmt::Display for Path<F> {

 #[cfg(test)]
 mod test {
-    use std::cmp::Ordering;
+    use super::super::{Forest, NodeData, NodePool};
    use super::*;
-    use super::super::{Forest, NodePool, NodeData};
+    use std::cmp::Ordering;

    struct TC();

--- a/lib/cretonne/src/bforest/pool.rs
+++ b/lib/cretonne/src/bforest/pool.rs
@@ -1,8 +1,8 @@
 //! B+-tree node pool.

+use super::{Forest, Node, NodeData};
 use entity::PrimaryMap;
 use std::ops::{Index, IndexMut};
-use super::{Forest, Node, NodeData};

 /// A pool of nodes, including a free list.
 pub(super) struct NodePool<F: Forest> {
@@ -57,6 +57,7 @@ impl<F: Forest> NodePool<F> {
    pub fn free_tree(&mut self, node: Node) {
        if let NodeData::Inner { size, tree, .. } = self[node] {
            // Note that we have to capture `tree` by value to avoid borrow checker trouble.
+            #[cfg_attr(feature = "cargo-clippy", allow(needless_range_loop))]
            for i in 0..usize::from(size + 1) {
                // Recursively free sub-trees. This recursion can never be deeper than `MAX_PATH`,
                // and since most trees have less than a handful of nodes, it is worthwhile to
@@ -76,11 +77,11 @@ impl<F: Forest> NodePool<F> {
        NodeData<F>: ::std::fmt::Display,
        F::Key: ::std::fmt::Display,
    {
+        use super::Comparator;
+        use entity::SparseSet;
        use std::borrow::Borrow;
        use std::cmp::Ordering;
        use std::vec::Vec;
-        use super::Comparator;
-        use entity::SparseSet;

        // The root node can't be an inner node with just a single sub-tree. It should have been
        // pruned.
@@ -105,8 +106,8 @@ impl<F: Forest> NodePool<F> {
            );
            let mut lower = lkey;

-            match &self[node] {
-                &NodeData::Inner { size, keys, tree } => {
+            match self[node] {
+                NodeData::Inner { size, keys, tree } => {
                    let size = size as usize;
                    let capacity = tree.len();
                    let keys = &keys[0..size];
@@ -148,7 +149,7 @@ impl<F: Forest> NodePool<F> {
                        lower = upper;
                    }
                }
-                &NodeData::Leaf { size, keys, .. } => {
+                NodeData::Leaf { size, keys, .. } => {
                    let size = size as usize;
                    let capacity = keys.borrow().len();
                    let keys = &keys.borrow()[0..size];
@@ -191,7 +192,7 @@ impl<F: Forest> NodePool<F> {
                        lower = upper;
                    }
                }
-                &NodeData::Free { .. } => panic!("Free {} reached", node),
+                NodeData::Free { .. } => panic!("Free {} reached", node),
            }
        }
    }
--- a/lib/cretonne/src/bforest/set.rs
+++ b/lib/cretonne/src/bforest/set.rs
@@ -1,8 +1,8 @@
 //! Forest of sets.

+use super::{Comparator, Forest, Node, NodeData, NodePool, Path, SetValue, INNER_SIZE};
 use packed_option::PackedOption;
 use std::marker::PhantomData;
-use super::{INNER_SIZE, Comparator, Forest, NodePool, Node, NodeData, Path, SetValue};

 /// Tag type defining forest types for a set.
 struct SetTypes<K, C>(PhantomData<(K, C)>);
@@ -351,10 +351,10 @@ where

 #[cfg(test)]
 mod test {
+    use super::super::NodeData;
+    use super::*;
    use std::mem;
    use std::vec::Vec;
-    use super::*;
-    use super::super::NodeData;

    #[test]
    fn node_size() {
--- a/lib/cretonne/src/binemit/memorysink.rs
+++ b/lib/cretonne/src/binemit/memorysink.rs
@@ -14,13 +14,13 @@
 //! relocations to a `RelocSink` trait object. Relocations are less frequent than the
 //! `CodeSink::put*` methods, so the performance impact of the virtual callbacks is less severe.

-use ir::{ExternalName, JumpTable};
-use super::{CodeSink, CodeOffset, Reloc, Addend};
+use super::{Addend, CodeOffset, CodeSink, Reloc};
+use ir::{ExternalName, JumpTable, SourceLoc, TrapCode};
 use std::ptr::write_unaligned;

 /// A `CodeSink` that writes binary machine code directly into memory.
 ///
-/// A `MemoryCodeSink` object should be used when emitting a Cretonne IL function into executable
+/// A `MemoryCodeSink` object should be used when emitting a Cretonne IR function into executable
 /// memory. It writes machine code directly to a raw pointer without any bounds checking, so make
 /// sure to allocate enough memory for the whole function. The number of bytes required is returned
 /// by the `Context::compile()` function.
@@ -33,15 +33,21 @@ pub struct MemoryCodeSink<'a> {
    data: *mut u8,
    offset: isize,
    relocs: &'a mut RelocSink,
+    traps: &'a mut TrapSink,
 }

 impl<'a> MemoryCodeSink<'a> {
    /// Create a new memory code sink that writes a function to the memory pointed to by `data`.
-    pub fn new(data: *mut u8, relocs: &mut RelocSink) -> MemoryCodeSink {
+    pub fn new<'sink>(
+        data: *mut u8,
+        relocs: &'sink mut RelocSink,
+        traps: &'sink mut TrapSink,
+    ) -> MemoryCodeSink<'sink> {
        MemoryCodeSink {
            data,
            offset: 0,
            relocs,
+            traps,
        }
    }
 }
@@ -58,6 +64,12 @@ pub trait RelocSink {
    fn reloc_jt(&mut self, CodeOffset, Reloc, JumpTable);
 }

+/// A trait for receiving trap codes and offsets.
+pub trait TrapSink {
+    /// Add trap information for a specific offset.
+    fn trap(&mut self, CodeOffset, SourceLoc, TrapCode);
+}
+
 impl<'a> CodeSink for MemoryCodeSink<'a> {
    fn offset(&self) -> CodeOffset {
        self.offset as CodeOffset
@@ -105,4 +117,9 @@ impl<'a> CodeSink for MemoryCodeSink<'a> {
        let ofs = self.offset();
        self.relocs.reloc_jt(ofs, rel, jt);
    }
+
+    fn trap(&mut self, code: TrapCode, srcloc: SourceLoc) {
+        let ofs = self.offset();
+        self.traps.trap(ofs, srcloc, code);
+    }
 }
--- a/lib/cretonne/src/binemit/mod.rs
+++ b/lib/cretonne/src/binemit/mod.rs
@@ -3,14 +3,14 @@
 //! The `binemit` module contains code for translating Cretonne's intermediate representation into
 //! binary machine code.

-mod relaxation;
 mod memorysink;
+mod relaxation;

-pub use regalloc::RegDiversions;
+pub use self::memorysink::{MemoryCodeSink, RelocSink, TrapSink};
 pub use self::relaxation::relax_branches;
-pub use self::memorysink::{MemoryCodeSink, RelocSink};
+pub use regalloc::RegDiversions;

-use ir::{ExternalName, JumpTable, Function, Inst};
+use ir::{ExternalName, Function, Inst, JumpTable, SourceLoc, TrapCode};
 use std::fmt;

 /// Offset in bytes from the beginning of the function.
@@ -86,10 +86,13 @@ pub trait CodeSink {

    /// Add a relocation referencing a jump table.
    fn reloc_jt(&mut self, Reloc, JumpTable);
+
+    /// Add trap information for the current offset.
+    fn trap(&mut self, TrapCode, SourceLoc);
 }

 /// Report a bad encoding error.
-#[inline(never)]
+#[cold]
 pub fn bad_encoding(func: &Function, inst: Inst) -> ! {
    panic!(
        "Bad encoding {} for {}",
--- a/lib/cretonne/src/binemit/relaxation.rs
+++ b/lib/cretonne/src/binemit/relaxation.rs
@@ -30,7 +30,7 @@
 use binemit::CodeOffset;
 use cursor::{Cursor, FuncCursor};
 use ir::{Function, InstructionData, Opcode};
-use isa::{TargetIsa, EncInfo};
+use isa::{EncInfo, TargetIsa};
 use iterators::IteratorExtras;
 use result::CtonError;

@@ -76,14 +76,13 @@ pub fn relax_branches(func: &mut Function, isa: &TargetIsa) -> Result<CodeOffset
                if let Some(range) = encinfo.branch_range(enc) {
                    if let Some(dest) = cur.func.dfg[inst].branch_destination() {
                        let dest_offset = cur.func.offsets[dest];
-                        if !range.contains(offset, dest_offset) {
-                            // This is an out-of-range branch.
-                            // Relax it unless the destination offset has not been computed yet.
-                            if dest_offset != 0 || Some(dest) == cur.func.layout.entry_block() {
-                                offset +=
-                                    relax_branch(&mut cur, offset, dest_offset, &encinfo, isa);
-                                continue;
-                            }
+                        // This could be an out-of-range branch.
+                        // Relax it unless the destination offset has not been computed yet.
+                        if !range.contains(offset, dest_offset) &&
+                            (dest_offset != 0 || Some(dest) == cur.func.layout.entry_block())
+                        {
+                            offset += relax_branch(&mut cur, offset, dest_offset, &encinfo, isa);
+                            continue;
                        }
                    }
                }
--- a/lib/cretonne/src/bitset.rs
+++ b/lib/cretonne/src/bitset.rs
@@ -5,9 +5,9 @@
 //!
 //! If you would like to add support for larger bitsets in the future, you need to change the trait
 //! bound Into<u32> and the u32 in the implementation of `max_bits()`.
+use std::convert::{From, Into};
 use std::mem::size_of;
-use std::ops::{Shl, BitOr, Sub, Add};
-use std::convert::{Into, From};
+use std::ops::{Add, BitOr, Shl, Sub};

 /// A small bitset built on a single primitive integer type
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
--- a/lib/cretonne/src/cfg_printer.rs
+++ b/lib/cretonne/src/cfg_printer.rs
@@ -0,0 +1,76 @@
+//! The `CFGPrinter` utility.
+
+use std::fmt::{Display, Formatter, Result, Write};
+
+use flowgraph::ControlFlowGraph;
+use ir::Function;
+use ir::instructions::BranchInfo;
+
+/// A utility for pretty-printing the CFG of a `Function`.
+pub struct CFGPrinter<'a> {
+    func: &'a Function,
+    cfg: ControlFlowGraph,
+}
+
+/// A utility for pretty-printing the CFG of a `Function`.
+impl<'a> CFGPrinter<'a> {
+    /// Create a new CFGPrinter.
+    pub fn new(func: &'a Function) -> CFGPrinter<'a> {
+        CFGPrinter {
+            func,
+            cfg: ControlFlowGraph::with_function(func),
+        }
+    }
+
+    /// Write the CFG for this function to `w`.
+    pub fn write(&self, w: &mut Write) -> Result {
+        self.header(w)?;
+        self.ebb_nodes(w)?;
+        self.cfg_connections(w)?;
+        writeln!(w, "}}")
+    }
+
+    fn header(&self, w: &mut Write) -> Result {
+        writeln!(w, "digraph \"{}\" {{", self.func.name)?;
+        if let Some(entry) = self.func.layout.entry_block() {
+            writeln!(w, "    {{rank=min; {}}}", entry)?;
+        }
+        Ok(())
+    }
+
+    fn ebb_nodes(&self, w: &mut Write) -> Result {
+        for ebb in &self.func.layout {
+            write!(w, "    {} [shape=record, label=\"{{{}", ebb, ebb)?;
+            // Add all outgoing branch instructions to the label.
+            for inst in self.func.layout.ebb_insts(ebb) {
+                let idata = &self.func.dfg[inst];
+                match idata.analyze_branch(&self.func.dfg.value_lists) {
+                    BranchInfo::SingleDest(dest, _) => {
+                        write!(w, " | <{}>{} {}", inst, idata.opcode(), dest)?
+                    }
+                    BranchInfo::Table(table) => {
+                        write!(w, " | <{}>{} {}", inst, idata.opcode(), table)?
+                    }
+                    BranchInfo::NotABranch => {}
+                }
+            }
+            writeln!(w, "}}\"]")?
+        }
+        Ok(())
+    }
+
+    fn cfg_connections(&self, w: &mut Write) -> Result {
+        for ebb in &self.func.layout {
+            for (parent, inst) in self.cfg.pred_iter(ebb) {
+                writeln!(w, "    {}:{} -> {}", parent, inst, ebb)?;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl<'a> Display for CFGPrinter<'a> {
+    fn fmt(&self, f: &mut Formatter) -> Result {
+        self.write(f)
+    }
+}
--- a/lib/cretonne/src/constant_hash.rs
+++ b/lib/cretonne/src/constant_hash.rs
@@ -18,7 +18,6 @@ pub trait Table<K: Copy + Eq> {
    fn key(&self, idx: usize) -> Option<K>;
 }

-
 /// Look for `key` in `table`.
 ///
 /// The provided `hash` value must have been computed from `key` using the same hash function that
--- a/lib/cretonne/src/context.rs
+++ b/lib/cretonne/src/context.rs
@@ -9,22 +9,24 @@
 //! contexts concurrently. Typically, you would have one context per compilation thread and only a
 //! single ISA instance.

-use binemit::{CodeOffset, relax_branches, MemoryCodeSink, RelocSink};
+use binemit::{relax_branches, CodeOffset, MemoryCodeSink, RelocSink, TrapSink};
+use dce::do_dce;
 use dominator_tree::DominatorTree;
 use flowgraph::ControlFlowGraph;
 use ir::Function;
-use loop_analysis::LoopAnalysis;
 use isa::TargetIsa;
 use legalize_function;
+use licm::do_licm;
+use loop_analysis::LoopAnalysis;
+use postopt::do_postopt;
+use preopt::do_preopt;
 use regalloc;
 use result::{CtonError, CtonResult};
 use settings::{FlagsOrIsa, OptLevel};
+use simple_gvn::do_simple_gvn;
+use timing;
 use unreachable_code::eliminate_unreachable_code;
 use verifier;
-use simple_gvn::do_simple_gvn;
-use licm::do_licm;
-use preopt::do_preopt;
-use timing;

 /// Persistent data structures and compilation pipeline.
 pub struct Context {
@@ -88,8 +90,13 @@ impl Context {
        self.verify_if(isa)?;

        self.compute_cfg();
-        self.preopt(isa)?;
+        if isa.flags().opt_level() != OptLevel::Fastest {
+            self.preopt(isa)?;
+        }
        self.legalize(isa)?;
+        if isa.flags().opt_level() != OptLevel::Fastest {
+            self.postopt(isa)?;
+        }
        if isa.flags().opt_level() == OptLevel::Best {
            self.compute_domtree();
            self.compute_loop_analysis();
@@ -98,6 +105,9 @@ impl Context {
        }
        self.compute_domtree();
        self.eliminate_unreachable_code(isa)?;
+        if isa.flags().opt_level() != OptLevel::Fastest {
+            self.dce(isa)?;
+        }
        self.regalloc(isa)?;
        self.prologue_epilogue(isa)?;
        self.relax_branches(isa)
@@ -109,9 +119,15 @@ impl Context {
    /// code is returned by `compile` above.
    ///
    /// The machine code is not relocated. Instead, any relocations are emitted into `relocs`.
-    pub fn emit_to_memory(&self, mem: *mut u8, relocs: &mut RelocSink, isa: &TargetIsa) {
+    pub fn emit_to_memory(
+        &self,
+        mem: *mut u8,
+        relocs: &mut RelocSink,
+        traps: &mut TrapSink,
+        isa: &TargetIsa,
+    ) {
        let _tt = timing::binemit();
-        isa.emit_function(&self.func, &mut MemoryCodeSink::new(mem, relocs));
+        isa.emit_function(&self.func, &mut MemoryCodeSink::new(mem, relocs, traps));
    }

    /// Run the verifier on the function.
@@ -132,12 +148,12 @@ impl Context {
    }

    /// Run the locations verifier on the function.
-    pub fn verify_locations<'a>(&self, isa: &TargetIsa) -> verifier::Result {
+    pub fn verify_locations(&self, isa: &TargetIsa) -> verifier::Result {
        verifier::verify_locations(isa, &self.func, None)
    }

    /// Run the locations verifier only if the `enable_verifier` setting is true.
-    pub fn verify_locations_if<'a>(&self, isa: &TargetIsa) -> CtonResult {
+    pub fn verify_locations_if(&self, isa: &TargetIsa) -> CtonResult {
        if isa.flags().enable_verifier() {
            self.verify_locations(isa).map_err(Into::into)
        } else {
@@ -145,6 +161,13 @@ impl Context {
        }
    }

+    /// Perform dead-code elimination on the function.
+    pub fn dce<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CtonResult {
+        do_dce(&mut self.func, &mut self.domtree);
+        self.verify_if(fisa)?;
+        Ok(())
+    }
+
    /// Perform pre-legalization rewrites on the function.
    pub fn preopt(&mut self, isa: &TargetIsa) -> CtonResult {
        do_preopt(&mut self.func);
@@ -162,6 +185,13 @@ impl Context {
        self.verify_if(isa)
    }

+    /// Perform post-legalization rewrites on the function.
+    pub fn postopt(&mut self, isa: &TargetIsa) -> CtonResult {
+        do_postopt(&mut self.func, isa);
+        self.verify_if(isa)?;
+        Ok(())
+    }
+
    /// Compute the control flow graph.
    pub fn compute_cfg(&mut self) {
        self.cfg.compute(&self.func)
@@ -189,7 +219,7 @@ impl Context {

    /// Perform simple GVN on the function.
    pub fn simple_gvn<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CtonResult {
-        do_simple_gvn(&mut self.func, &mut self.cfg, &mut self.domtree);
+        do_simple_gvn(&mut self.func, &mut self.domtree);
        self.verify_if(fisa)
    }

--- a/lib/cretonne/src/cursor.rs
+++ b/lib/cretonne/src/cursor.rs
@@ -637,7 +637,6 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut FuncCursor<'f> {
    }
 }

-
 /// Encoding cursor.
 ///
 /// An `EncCursor` can be used to insert instructions that are immediately assigned an encoding.
@@ -744,8 +743,9 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
        if !self.srcloc.is_default() {
            self.func.srclocs[inst] = self.srcloc;
        }
-
        // Assign an encoding.
+        // XXX Is there a way to describe this error to the user?
+        #[cfg_attr(feature = "cargo-clippy", allow(match_wild_err_arm))]
        match self.isa.encode(
            &self.func.dfg,
            &self.func.dfg[inst],
--- a/lib/cretonne/src/dce.rs
+++ b/lib/cretonne/src/dce.rs
@@ -0,0 +1,68 @@
+//! A Dead-Code Elimination (DCE) pass.
+//!
+//! Dead code here means instructions that have no side effects and have no
+//! result values used by other instructions.
+
+use cursor::{Cursor, FuncCursor};
+use dominator_tree::DominatorTree;
+use entity::EntityRef;
+use ir::instructions::InstructionData;
+use ir::{DataFlowGraph, Function, Inst, Opcode};
+use std::vec::Vec;
+use timing;
+
+/// Test whether the given opcode is unsafe to even consider for DCE.
+fn trivially_unsafe_for_dce(opcode: Opcode) -> bool {
+    opcode.is_call() || opcode.is_branch() || opcode.is_terminator() ||
+        opcode.is_return() || opcode.can_trap() || opcode.other_side_effects() ||
+        opcode.can_store()
+}
+
+/// Preserve instructions with used result values.
+fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
+    dfg.inst_results(inst).iter().any(|v| live[v.index()])
+}
+
+/// Load instructions without the `notrap` flag are defined to trap when
+/// operating on inaccessible memory, so we can't DCE them even if the
+/// loaded value is unused.
+fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
+    if !opcode.can_load() {
+        return false;
+    }
+    match *data {
+        InstructionData::StackLoad { .. } => false,
+        InstructionData::Load { flags, .. } => !flags.notrap(),
+        _ => true,
+    }
+}
+
+/// Perform DCE on `func`.
+pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) {
+    let _tt = timing::dce();
+    debug_assert!(domtree.is_valid());
+
+    let mut live = Vec::with_capacity(func.dfg.num_values());
+    live.resize(func.dfg.num_values(), false);
+
+    for &ebb in domtree.cfg_postorder().iter() {
+        let mut pos = FuncCursor::new(func).at_bottom(ebb);
+        while let Some(inst) = pos.prev_inst() {
+            {
+                let data = &pos.func.dfg[inst];
+                let opcode = data.opcode();
+                if trivially_unsafe_for_dce(opcode) ||
+                    is_load_with_defined_trapping(opcode, &data) ||
+                    any_inst_results_used(inst, &live, &pos.func.dfg)
+                {
+                    for arg in pos.func.dfg.inst_args(inst) {
+                        let v = pos.func.dfg.resolve_aliases(*arg);
+                        live[v.index()] = true;
+                    }
+                    continue;
+                }
+            }
+            pos.remove_inst();
+        }
+    }
+}
--- a/lib/cretonne/src/divconst_magic_numbers.rs
+++ b/lib/cretonne/src/divconst_magic_numbers.rs
@@ -1,17 +1,15 @@
 //! Compute "magic numbers" for division-by-constants transformations.
+//!
+//! Math helpers for division by (non-power-of-2) constants. This is based
+//! on the presentation in "Hacker's Delight" by Henry Warren, 2003. There
+//! are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size
+//! makes little difference, but the signed-vs-unsigned aspect has a large
+//! effect. Therefore everything is presented in the order U32 U64 S32 S64
+//! so as to emphasise the similarity of the U32 and U64 cases and the S32
+//! and S64 cases.

 #![allow(non_snake_case)]

-//----------------------------------------------------------------------
-//
-// Math helpers for division by (non-power-of-2) constants. This is based
-// on the presentation in "Hacker's Delight" by Henry Warren, 2003. There
-// are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size
-// makes little difference, but the signed-vs-unsigned aspect has a large
-// effect. Therefore everything is presented in the order U32 U64 S32 S64
-// so as to emphasise the similarity of the U32 and U64 cases and the S32
-// and S64 cases.
-
 // Structures to hold the "magic numbers" computed.

 #[derive(PartialEq, Debug)]
@@ -222,8 +220,8 @@ pub fn magicS64(d: i64) -> MS64 {

 #[cfg(test)]
 mod tests {
-    use super::{magicU32, magicU64, magicS32, magicS64};
-    use super::{MU32, MU64, MS32, MS64};
+    use super::{MS32, MS64, MU32, MU64};
+    use super::{magicS32, magicS64, magicU32, magicU64};

    fn mkMU32(mulBy: u32, doAdd: bool, shiftBy: i32) -> MU32 {
        MU32 {
--- a/lib/cretonne/src/dominator_tree.rs
+++ b/lib/cretonne/src/dominator_tree.rs
@@ -1,38 +1,38 @@
 //! A Dominator Tree represented as mappings of Ebbs to their immediate dominator.

 use entity::EntityMap;
-use flowgraph::{ControlFlowGraph, BasicBlock};
-use ir::{Ebb, Inst, Value, Function, Layout, ProgramOrder, ExpandedProgramPoint};
+use flowgraph::{BasicBlock, ControlFlowGraph};
 use ir::instructions::BranchInfo;
+use ir::{Ebb, ExpandedProgramPoint, Function, Inst, Layout, ProgramOrder, Value};
 use packed_option::PackedOption;
 use std::cmp;
-use std::mem;
-use timing;
 use std::cmp::Ordering;
+use std::mem;
 use std::vec::Vec;
+use timing;

-// RPO numbers are not first assigned in a contiguous way but as multiples of STRIDE, to leave
-// room for modifications of the dominator tree.
+/// RPO numbers are not first assigned in a contiguous way but as multiples of STRIDE, to leave
+/// room for modifications of the dominator tree.
 const STRIDE: u32 = 4;

-// Special RPO numbers used during `compute_postorder`.
+/// Special RPO numbers used during `compute_postorder`.
 const DONE: u32 = 1;
 const SEEN: u32 = 2;

-// Dominator tree node. We keep one of these per EBB.
+/// Dominator tree node. We keep one of these per EBB.
 #[derive(Clone, Default)]
 struct DomNode {
-    // Number of this node in a reverse post-order traversal of the CFG, starting from 1.
-    // This number is monotonic in the reverse postorder but not contiguous, since we leave
-    // holes for later localized modifications of the dominator tree.
-    // Unreachable nodes get number 0, all others are positive.
+    /// Number of this node in a reverse post-order traversal of the CFG, starting from 1.
+    /// This number is monotonic in the reverse postorder but not contiguous, since we leave
+    /// holes for later localized modifications of the dominator tree.
+    /// Unreachable nodes get number 0, all others are positive.
    rpo_number: u32,

-    // The immediate dominator of this EBB, represented as the branch or jump instruction at the
-    // end of the dominating basic block.
-    //
-    // This is `None` for unreachable blocks and the entry block which doesn't have an immediate
-    // dominator.
+    /// The immediate dominator of this EBB, represented as the branch or jump instruction at the
+    /// end of the dominating basic block.
+    ///
+    /// This is `None` for unreachable blocks and the entry block which doesn't have an immediate
+    /// dominator.
    idom: PackedOption<Inst>,
 }

@@ -40,10 +40,10 @@ struct DomNode {
 pub struct DominatorTree {
    nodes: EntityMap<Ebb, DomNode>,

-    // CFG post-order of all reachable EBBs.
+    /// CFG post-order of all reachable EBBs.
    postorder: Vec<Ebb>,

-    // Scratch memory used by `compute_postorder()`.
+    /// Scratch memory used by `compute_postorder()`.
    stack: Vec<Ebb>,

    valid: bool,
@@ -144,12 +144,12 @@ impl DominatorTree {
    {
        let (mut ebb_b, mut inst_b) = match b.into() {
            ExpandedProgramPoint::Ebb(ebb) => (ebb, None),
-            ExpandedProgramPoint::Inst(inst) => {
-                (
-                    layout.inst_ebb(inst).expect("Instruction not in layout."),
-                    Some(inst),
-                )
-            }
+            ExpandedProgramPoint::Inst(inst) => (
+                layout.inst_ebb(inst).expect(
+                    "Instruction not in layout.",
+                ),
+                Some(inst),
+            ),
        };
        let rpo_a = self.nodes[a].rpo_number;

@@ -460,7 +460,6 @@ impl DominatorTree {
            rpo_number: new_ebb_rpo,
            idom: Some(split_jump_inst).into(),
        };
-
    }

    // Insert new_ebb just after ebb in the RPO. This function checks
@@ -667,12 +666,12 @@ impl DominatorTreePreorder {

 #[cfg(test)]
 mod test {
+    use super::*;
    use cursor::{Cursor, FuncCursor};
    use flowgraph::ControlFlowGraph;
    use ir::types::*;
    use ir::{Function, InstBuilder, TrapCode};
    use settings;
-    use super::*;
    use verifier::verify_context;

    #[test]
--- a/lib/cretonne/src/entity/iter.rs
+++ b/lib/cretonne/src/entity/iter.rs
@@ -0,0 +1,109 @@
+//! A double-ended iterator over entity references and entities.
+
+use entity::EntityRef;
+use std::marker::PhantomData;
+use std::slice;
+
+/// Iterate over all keys in order.
+pub struct Iter<'a, K: EntityRef, V>
+where
+    V: 'a,
+{
+    pos: usize,
+    iter: slice::Iter<'a, V>,
+    unused: PhantomData<K>,
+}
+
+impl<'a, K: EntityRef, V> Iter<'a, K, V> {
+    /// Create an `Iter` iterator that visits the `PrimaryMap` keys and values
+    /// of `iter`.
+    pub fn new(key: K, iter: slice::Iter<'a, V>) -> Self {
+        Self {
+            pos: key.index(),
+            iter,
+            unused: PhantomData,
+        }
+    }
+}
+
+impl<'a, K: EntityRef, V> Iterator for Iter<'a, K, V> {
+    type Item = (K, &'a V);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(next) = self.iter.next() {
+            let pos = self.pos;
+            self.pos += 1;
+            Some((K::new(pos), next))
+        } else {
+            None
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.iter.size_hint()
+    }
+}
+
+impl<'a, K: EntityRef, V> DoubleEndedIterator for Iter<'a, K, V> {
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if let Some(next_back) = self.iter.next_back() {
+            Some((K::new(self.pos), next_back))
+        } else {
+            None
+        }
+    }
+}
+
+impl<'a, K: EntityRef, V> ExactSizeIterator for Iter<'a, K, V> {}
+
+/// Iterate over all keys in order.
+pub struct IterMut<'a, K: EntityRef, V>
+where
+    V: 'a,
+{
+    pos: usize,
+    iter: slice::IterMut<'a, V>,
+    unused: PhantomData<K>,
+}
+
+impl<'a, K: EntityRef, V> IterMut<'a, K, V> {
+    /// Create an `IterMut` iterator that visits the `PrimaryMap` keys and values
+    /// of `iter`.
+    pub fn new(key: K, iter: slice::IterMut<'a, V>) -> Self {
+        Self {
+            pos: key.index(),
+            iter,
+            unused: PhantomData,
+        }
+    }
+}
+
+impl<'a, K: EntityRef, V> Iterator for IterMut<'a, K, V> {
+    type Item = (K, &'a mut V);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(next) = self.iter.next() {
+            let pos = self.pos;
+            self.pos += 1;
+            Some((K::new(pos), next))
+        } else {
+            None
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.iter.size_hint()
+    }
+}
+
+impl<'a, K: EntityRef, V> DoubleEndedIterator for IterMut<'a, K, V> {
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if let Some(next_back) = self.iter.next_back() {
+            Some((K::new(self.pos), next_back))
+        } else {
+            None
+        }
+    }
+}
+
+impl<'a, K: EntityRef, V> ExactSizeIterator for IterMut<'a, K, V> {}
--- a/lib/cretonne/src/entity/list.rs
+++ b/lib/cretonne/src/entity/list.rs
@@ -480,9 +480,9 @@ impl<T: EntityRef> EntityList<T> {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use super::{sclass_size, sclass_for_length};
-    use ir::Inst;
+    use super::{sclass_for_length, sclass_size};
    use entity::EntityRef;
+    use ir::Inst;

    #[test]
    fn size_classes() {
--- a/lib/cretonne/src/entity/map.rs
+++ b/lib/cretonne/src/entity/map.rs
@@ -1,8 +1,9 @@
 //! Densely numbered entity references as mapping keys.

-use entity::{EntityRef, Keys};
+use entity::{EntityRef, Iter, IterMut, Keys};
 use std::marker::PhantomData;
 use std::ops::{Index, IndexMut};
+use std::slice;
 use std::vec::Vec;

 /// A mapping `K -> V` for densely indexed entity references.
@@ -68,11 +69,31 @@ where
        self.elems.clear()
    }

+    /// Iterate over all the keys and values in this map.
+    pub fn iter(&self) -> Iter<K, V> {
+        Iter::new(K::new(0), self.elems.iter())
+    }
+
+    /// Iterate over all the keys and values in this map, mutable edition.
+    pub fn iter_mut(&mut self) -> IterMut<K, V> {
+        IterMut::new(K::new(0), self.elems.iter_mut())
+    }
+
    /// Iterate over all the keys in this map.
    pub fn keys(&self) -> Keys<K> {
        Keys::new(self.elems.len())
    }

+    /// Iterate over all the keys in this map.
+    pub fn values(&self) -> slice::Iter<V> {
+        self.elems.iter()
+    }
+
+    /// Iterate over all the keys in this map, mutable edition.
+    pub fn values_mut(&mut self) -> slice::IterMut<V> {
+        self.elems.iter_mut()
+    }
+
    /// Resize the map to have `n` entries by adding default entries as needed.
    pub fn resize(&mut self, n: usize) {
        self.elems.resize(n, self.default.clone());
--- a/lib/cretonne/src/entity/mod.rs
+++ b/lib/cretonne/src/entity/mod.rs
@@ -29,19 +29,21 @@
 //!   references allocated from an associated memory pool. It has a much smaller footprint than
 //!   `Vec`.

+mod iter;
 mod keys;
 mod list;
 mod map;
 mod primary;
-mod sparse;
 mod set;
+mod sparse;

+pub use self::iter::{Iter, IterMut};
 pub use self::keys::Keys;
 pub use self::list::{EntityList, ListPool};
 pub use self::map::EntityMap;
 pub use self::primary::PrimaryMap;
 pub use self::set::EntitySet;
-pub use self::sparse::{SparseSet, SparseMap, SparseMapValue};
+pub use self::sparse::{SparseMap, SparseMapValue, SparseSet};

 /// A type wrapping a small integer index should implement `EntityRef` so it can be used as the key
 /// of an `EntityMap` or `SparseMap`.
@@ -93,5 +95,5 @@ macro_rules! entity_impl {
                (self as &::std::fmt::Display).fmt(f)
            }
        }
-    }
+    };
 }
--- a/lib/cretonne/src/entity/primary.rs
+++ b/lib/cretonne/src/entity/primary.rs
@@ -1,7 +1,8 @@
 //! Densely numbered entity references as mapping keys.
-use entity::{EntityRef, Keys};
+use entity::{EntityRef, Iter, IterMut, Keys};
 use std::marker::PhantomData;
 use std::ops::{Index, IndexMut};
+use std::slice;
 use std::vec::Vec;

 /// A primary mapping `K -> V` allocating dense entity references.
@@ -59,6 +60,26 @@ where
        Keys::new(self.elems.len())
    }

+    /// Iterate over all the values in this map.
+    pub fn values(&self) -> slice::Iter<V> {
+        self.elems.iter()
+    }
+
+    /// Iterate over all the values in this map, mutable edition.
+    pub fn values_mut(&mut self) -> slice::IterMut<V> {
+        self.elems.iter_mut()
+    }
+
+    /// Iterate over all the keys and values in this map.
+    pub fn iter(&self) -> Iter<K, V> {
+        Iter::new(K::new(0), self.elems.iter())
+    }
+
+    /// Iterate over all the keys and values in this map, mutable edition.
+    pub fn iter_mut(&mut self) -> IterMut<K, V> {
+        IterMut::new(K::new(0), self.elems.iter_mut())
+    }
+
    /// Remove all entries from this map.
    pub fn clear(&mut self) {
        self.elems.clear()
@@ -133,13 +154,80 @@ mod tests {
    #[test]
    fn push() {
        let mut m = PrimaryMap::new();
-        let k1: E = m.push(12);
-        let k2 = m.push(33);
+        let k0: E = m.push(12);
+        let k1 = m.push(33);

-        assert_eq!(m[k1], 12);
-        assert_eq!(m[k2], 33);
+        assert_eq!(m[k0], 12);
+        assert_eq!(m[k1], 33);

        let v: Vec<E> = m.keys().collect();
-        assert_eq!(v, [k1, k2]);
+        assert_eq!(v, [k0, k1]);
+    }
+
+    #[test]
+    fn iter() {
+        let mut m: PrimaryMap<E, usize> = PrimaryMap::new();
+        m.push(12);
+        m.push(33);
+
+        let mut i = 0;
+        for (key, value) in m.iter() {
+            assert_eq!(key.index(), i);
+            match i {
+                0 => assert_eq!(*value, 12),
+                1 => assert_eq!(*value, 33),
+                _ => panic!(),
+            }
+            i += 1;
+        }
+        i = 0;
+        for (key_mut, value_mut) in m.iter_mut() {
+            assert_eq!(key_mut.index(), i);
+            match i {
+                0 => assert_eq!(*value_mut, 12),
+                1 => assert_eq!(*value_mut, 33),
+                _ => panic!(),
+            }
+            i += 1;
+        }
+    }
+
+    #[test]
+    fn keys() {
+        let mut m: PrimaryMap<E, usize> = PrimaryMap::new();
+        m.push(12);
+        m.push(33);
+
+        let mut i = 0;
+        for key in m.keys() {
+            assert_eq!(key.index(), i);
+            i += 1;
+        }
+    }
+
+    #[test]
+    fn values() {
+        let mut m: PrimaryMap<E, usize> = PrimaryMap::new();
+        m.push(12);
+        m.push(33);
+
+        let mut i = 0;
+        for value in m.values() {
+            match i {
+                0 => assert_eq!(*value, 12),
+                1 => assert_eq!(*value, 33),
+                _ => panic!(),
+            }
+            i += 1;
+        }
+        i = 0;
+        for value_mut in m.values_mut() {
+            match i {
+                0 => assert_eq!(*value_mut, 12),
+                1 => assert_eq!(*value_mut, 33),
+                _ => panic!(),
+            }
+            i += 1;
+        }
    }
 }
--- a/lib/cretonne/src/entity/sparse.rs
+++ b/lib/cretonne/src/entity/sparse.rs
@@ -7,7 +7,7 @@
 //! > Briggs, Torczon, *An efficient representation for sparse sets*,
 //!   ACM Letters on Programming Languages and Systems, Volume 2, Issue 1-4, March-Dec. 1993.

-use entity::{EntityRef, EntityMap};
+use entity::{EntityMap, EntityRef};
 use std::mem;
 use std::slice;
 use std::u32;
--- a/lib/cretonne/src/flowgraph.rs
+++ b/lib/cretonne/src/flowgraph.rs
@@ -24,9 +24,9 @@
 //! and `(Ebb0, jmp Ebb2)` respectively.

 use bforest;
-use ir::{Function, Inst, Ebb};
-use ir::instructions::BranchInfo;
 use entity::EntityMap;
+use ir::instructions::BranchInfo;
+use ir::{Ebb, Function, Inst};
 use std::mem;
 use timing;

@@ -203,7 +203,7 @@ pub type SuccIter<'a> = bforest::SetIter<'a, Ebb, ()>;
 mod tests {
    use super::*;
    use cursor::{Cursor, FuncCursor};
-    use ir::{Function, InstBuilder, types};
+    use ir::{types, Function, InstBuilder};
    use std::vec::Vec;

    #[test]
--- a/lib/cretonne/src/ir/builder.rs
+++ b/lib/cretonne/src/ir/builder.rs
@@ -5,8 +5,8 @@

 use ir;
 use ir::types;
-use ir::{InstructionData, DataFlowGraph};
-use ir::{Opcode, Type, Inst, Value};
+use ir::{DataFlowGraph, InstructionData};
+use ir::{Inst, Opcode, Type, Value};
 use isa;

 /// Base trait for instruction builders.
@@ -36,7 +36,7 @@ pub trait InstBuilderBase<'f>: Sized {
 //
 // This file defines the `InstBuilder` trait as an extension of `InstBuilderBase` with methods per
 // instruction format and per opcode.
-include!(concat!(env!("OUT_DIR"), "/builder.rs"));
+include!(concat!(env!("OUT_DIR"), "/inst_builder.rs"));

 /// Any type implementing `InstBuilderBase` gets all the `InstBuilder` methods for free.
 impl<'f, T: InstBuilderBase<'f>> InstBuilder<'f> for T {}
@@ -145,8 +145,9 @@ where
 }

 impl<'f, IIB, Array> InstBuilderBase<'f> for InsertReuseBuilder<'f, IIB, Array>
-    where IIB: InstInserterBase<'f>,
-          Array: AsRef<[Option<Value>]>
+where
+    IIB: InstInserterBase<'f>,
+    Array: AsRef<[Option<Value>]>,
 {
    fn data_flow_graph(&self) -> &DataFlowGraph {
        self.inserter.data_flow_graph()
@@ -215,9 +216,9 @@ impl<'f> InstBuilderBase<'f> for ReplaceBuilder<'f> {
 #[cfg(test)]
 mod tests {
    use cursor::{Cursor, FuncCursor};
-    use ir::{Function, InstBuilder, ValueDef};
-    use ir::types::*;
    use ir::condcodes::*;
+    use ir::types::*;
+    use ir::{Function, InstBuilder, ValueDef};

    #[test]
    fn types() {
--- a/lib/cretonne/src/ir/condcodes.rs
+++ b/lib/cretonne/src/ir/condcodes.rs
@@ -13,12 +13,14 @@ pub trait CondCode: Copy {
    ///
    /// The inverse condition code produces the opposite result for all comparisons.
    /// That is, `cmp CC, x, y` is true if and only if `cmp CC.inverse(), x, y` is false.
+    #[must_use]
    fn inverse(self) -> Self;

    /// Get the reversed condition code for `self`.
    ///
    /// The reversed condition code produces the same result as swapping `x` and `y` in the
    /// comparison. That is, `cmp CC, x, y` is the same as `cmp CC.reverse(), y, x`.
+    #[must_use]
    fn reverse(self) -> Self;
 }

--- a/lib/cretonne/src/ir/dfg.rs
+++ b/lib/cretonne/src/ir/dfg.rs
@@ -1,20 +1,20 @@
 //! Data flow graph tracking Instructions, Values, and EBBs.

-use entity::{PrimaryMap, EntityMap};
-use isa::TargetIsa;
+use entity::{EntityMap, PrimaryMap};
 use ir;
 use ir::builder::ReplaceBuilder;
 use ir::extfunc::ExtFuncData;
-use ir::instructions::{InstructionData, CallInfo, BranchInfo};
+use ir::instructions::{BranchInfo, CallInfo, InstructionData};
 use ir::types;
-use ir::{Ebb, Inst, Value, Type, SigRef, Signature, FuncRef, ValueList, ValueListPool};
+use ir::{Ebb, FuncRef, Inst, SigRef, Signature, Type, Value, ValueList, ValueListPool};
+use isa::{Encoding, Legalize, TargetIsa};
 use packed_option::ReservedValue;
-use write::write_operands;
 use std::fmt;
 use std::iter;
 use std::mem;
 use std::ops::{Index, IndexMut};
 use std::u16;
+use write::write_operands;

 /// A data flow graph defines all instructions and extended basic blocks in a function as well as
 /// the data flow dependencies between them. The DFG also tracks values which can be either
@@ -121,8 +121,9 @@ impl DataFlowGraph {

 /// Resolve value aliases.
 ///
-/// Find the original SSA value that `value` aliases.
-fn resolve_aliases(values: &PrimaryMap<Value, ValueData>, value: Value) -> Value {
+/// Find the original SSA value that `value` aliases, or None if an
+/// alias cycle is detected.
+fn maybe_resolve_aliases(values: &PrimaryMap<Value, ValueData>, value: Value) -> Option<Value> {
    let mut v = value;

    // Note that values may be empty here.
@@ -130,10 +131,22 @@ fn resolve_aliases(values: &PrimaryMap<Value, ValueData>, value: Value) -> Value
        if let ValueData::Alias { original, .. } = values[v] {
            v = original;
        } else {
-            return v;
+            return Some(v);
        }
    }
-    panic!("Value alias loop detected for {}", value);
+
+    None
+}
+
+/// Resolve value aliases.
+///
+/// Find the original SSA value that `value` aliases.
+fn resolve_aliases(values: &PrimaryMap<Value, ValueData>, value: Value) -> Value {
+    if let Some(v) = maybe_resolve_aliases(values, value) {
+        v
+    } else {
+        panic!("Value alias loop detected for {}", value);
+    }
 }

 /// Handling values.
@@ -238,6 +251,7 @@ impl DataFlowGraph {
            self.value_type(dest),
            ty
        );
+        debug_assert_ne!(ty, types::VOID);

        self.values[dest] = ValueData::Alias { ty, original };
    }
@@ -282,6 +296,7 @@ impl DataFlowGraph {
                self.value_type(dest),
                ty
            );
+            debug_assert_ne!(ty, types::VOID);

            self.values[dest] = ValueData::Alias { ty, original };
        }
@@ -333,18 +348,18 @@ impl ValueDef {
    }
 }

-// Internal table storage for extended values.
+/// Internal table storage for extended values.
 #[derive(Clone, Debug)]
 enum ValueData {
-    // Value is defined by an instruction.
+    /// Value is defined by an instruction.
    Inst { ty: Type, num: u16, inst: Inst },

-    // Value is an EBB parameter.
+    /// Value is an EBB parameter.
    Param { ty: Type, num: u16, ebb: Ebb },

-    // Value is an alias of another value.
-    // An alias value can't be linked as an instruction result or EBB parameter. It is used as a
-    // placeholder when the original instruction or EBB has been rewritten or modified.
+    /// Value is an alias of another value.
+    /// An alias value can't be linked as an instruction result or EBB parameter. It is used as a
+    /// placeholder when the original instruction or EBB has been rewritten or modified.
    Alias { ty: Type, original: Value },
 }

@@ -645,6 +660,12 @@ impl DataFlowGraph {
            self.value_type(self.first_result(inst))
        }
    }
+
+    /// Wrapper around `TargetIsa::encode` for encoding an existing instruction
+    /// in the `DataFlowGraph`.
+    pub fn encode(&self, inst: Inst, isa: &TargetIsa) -> Result<Encoding, Legalize> {
+        isa.encode(&self, &self[inst], self.ctrl_typevar(inst))
+    }
 }

 /// Allow immutable access to instructions via indexing.
@@ -754,7 +775,6 @@ impl DataFlowGraph {
        }
    }

-
    /// Append an existing value to `ebb`'s parameters.
    ///
    /// The appended value can't already be attached to something else.
@@ -808,14 +828,14 @@ impl DataFlowGraph {
    }
 }

-// Contents of an extended basic block.
-//
-// Parameters on an extended basic block are values that dominate everything in the EBB. All
-// branches to this EBB must provide matching arguments, and the arguments to the entry EBB must
-// match the function arguments.
+/// Contents of an extended basic block.
+///
+/// Parameters on an extended basic block are values that dominate everything in the EBB. All
+/// branches to this EBB must provide matching arguments, and the arguments to the entry EBB must
+/// match the function arguments.
 #[derive(Clone)]
 struct EbbData {
-    // List of parameters to this EBB.
+    /// List of parameters to this EBB.
    params: ValueList,
 }

@@ -842,7 +862,6 @@ impl<'a> fmt::Display for DisplayInst<'a> {
            write!(f, " = ")?;
        }

-
        let typevar = dfg.ctrl_typevar(inst);
        if typevar.is_void() {
            write!(f, "{}", dfg[inst].opcode())?;
@@ -859,8 +878,9 @@ impl DataFlowGraph {
    /// to create invalid values for index padding which may be reassigned later.
    #[cold]
    fn set_value_type_for_parser(&mut self, v: Value, t: Type) {
-        assert!(
-            self.value_type(v) == types::VOID,
+        assert_eq!(
+            self.value_type(v),
+            types::VOID,
            "this function is only for assigning types to previously invalid values"
        );
        match self.values[v] {
@@ -920,12 +940,38 @@ impl DataFlowGraph {
    /// aliases with specific values.
    #[cold]
    pub fn make_value_alias_for_parser(&mut self, src: Value, dest: Value) {
-        let ty = self.value_type(src);
+        assert_ne!(src, Value::reserved_value());
+        assert_ne!(dest, Value::reserved_value());

+        let ty = if self.values.is_valid(src) {
+            self.value_type(src)
+        } else {
+            // As a special case, if we can't resolve the aliasee yet, use VOID
+            // temporarily. It will be resolved later in parsing.
+            types::VOID
+        };
        let data = ValueData::Alias { ty, original: src };
        self.values[dest] = data;
    }

+    /// Compute the type of an alias. This is only for use in the parser.
+    /// Returns false if an alias cycle was encountered.
+    #[cold]
+    pub fn set_alias_type_for_parser(&mut self, v: Value) -> bool {
+        if let Some(resolved) = maybe_resolve_aliases(&self.values, v) {
+            let old_ty = self.value_type(v);
+            let new_ty = self.value_type(resolved);
+            if old_ty == types::VOID {
+                self.set_value_type_for_parser(v, new_ty);
+            } else {
+                assert_eq!(old_ty, new_ty);
+            }
+            true
+        } else {
+            false
+        }
+    }
+
    /// Create an invalid value, to pad the index space. This is only for use by
    /// the parser to pad out the value index space.
    #[cold]
@@ -936,6 +982,20 @@ impl DataFlowGraph {
        };
        self.make_value(data);
    }
+
+    /// Check if a value reference is valid, while being aware of aliases which
+    /// may be unresolved while parsing.
+    #[cold]
+    pub fn value_is_valid_for_parser(&self, v: Value) -> bool {
+        if !self.value_is_valid(v) {
+            return false;
+        }
+        if let ValueData::Alias { ty, .. } = self.values[v] {
+            ty != types::VOID
+        } else {
+            true
+        }
+    }
 }

 #[cfg(test)]
@@ -943,7 +1003,7 @@ mod tests {
    use super::*;
    use cursor::{Cursor, FuncCursor};
    use ir::types;
-    use ir::{Function, Opcode, InstructionData, TrapCode};
+    use ir::{Function, InstructionData, Opcode, TrapCode};
    use std::string::ToString;

    #[test]
--- a/lib/cretonne/src/ir/entities.rs
+++ b/lib/cretonne/src/ir/entities.rs
@@ -1,6 +1,6 @@
-//! IL entity references.
+//! Cretonne IR entity references.
 //!
-//! Instructions in Cretonne IL need to reference other entities in the function. This can be other
+//! Instructions in Cretonne IR need to reference other entities in the function. This can be other
 //! parts of the function like extended basic blocks or stack slots, or it can be external entities
 //! that are declared in the function preamble in the text format.
 //!
@@ -16,7 +16,7 @@
 //! data structures use the `PackedOption<EntityRef>` representation, while function arguments and
 //! return values prefer the more Rust-like `Option<EntityRef>` variant.
 //!
-//! The entity references all implement the `Display` trait in a way that matches the textual IL
+//! The entity references all implement the `Display` trait in a way that matches the textual IR
 //! format.

 use std::fmt;
@@ -261,8 +261,8 @@ impl From<Heap> for AnyEntity {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use std::u32;
    use std::string::ToString;
+    use std::u32;

    #[test]
    fn value_with_number() {
@@ -275,8 +275,8 @@ mod tests {

    #[test]
    fn memory() {
-        use std::mem;
        use packed_option::PackedOption;
+        use std::mem;
        // This is the whole point of `PackedOption`.
        assert_eq!(
            mem::size_of::<Value>(),
--- a/lib/cretonne/src/ir/extfunc.rs
+++ b/lib/cretonne/src/ir/extfunc.rs
@@ -5,7 +5,7 @@
 //!
 //! This module declares the data types used to represent external functions and call signatures.

-use ir::{Type, ExternalName, SigRef, ArgumentLoc};
+use ir::{ArgumentLoc, ExternalName, SigRef, Type};
 use isa::{RegInfo, RegUnit};
 use std::cmp;
 use std::fmt;
@@ -343,10 +343,11 @@ impl fmt::Display for ExtFuncData {
 /// determined by a `(TargetIsa, CallConv)` tuple.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum CallConv {
-    /// The C calling convention.
+    /// The System V-style calling convention.
    ///
-    /// This is the native calling convention that a C compiler would use on the platform.
-    Native,
+    /// This is the System V-style calling convention that a C compiler would
+    /// use on many platforms.
+    SystemV,

    /// A JIT-compiled WebAssembly function in the SpiderMonkey VM.
    SpiderWASM,
@@ -356,7 +357,7 @@ impl fmt::Display for CallConv {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        use self::CallConv::*;
        f.write_str(match *self {
-            Native => "native",
+            SystemV => "system_v",
            SpiderWASM => "spiderwasm",
        })
    }
@@ -368,7 +369,7 @@ impl FromStr for CallConv {
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        use self::CallConv::*;
        match s {
-            "native" => Ok(Native),
+            "system_v" => Ok(SystemV),
            "spiderwasm" => Ok(SpiderWASM),
            _ => Err(()),
        }
@@ -378,7 +379,7 @@ impl FromStr for CallConv {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use ir::types::{I32, F32, B8};
+    use ir::types::{B8, F32, I32};
    use std::string::ToString;

    #[test]
@@ -410,7 +411,7 @@ mod tests {

    #[test]
    fn call_conv() {
-        for &cc in &[CallConv::Native, CallConv::SpiderWASM] {
+        for &cc in &[CallConv::SystemV, CallConv::SpiderWASM] {
            assert_eq!(Ok(cc), cc.to_string().parse())
        }
    }
--- a/lib/cretonne/src/ir/extname.rs
+++ b/lib/cretonne/src/ir/extname.rs
@@ -16,7 +16,7 @@ const TESTCASE_NAME_LENGTH: usize = 16;
 /// to keep track of a sy mbol table.
 ///
 /// External names are primarily used as keys by code using Cretonne to map
-/// from a cretonne::ir::FuncRef or similar to additional associated data.
+/// from a `cretonne::ir::FuncRef` or similar to additional associated data.
 ///
 /// External names can also serve as a primitive testing and debugging tool.
 /// In particular, many `.cton` test files use function names to identify
--- a/lib/cretonne/src/ir/function.rs
+++ b/lib/cretonne/src/ir/function.rs
@@ -4,13 +4,13 @@
 //! instructions.

 use binemit::CodeOffset;
-use entity::{PrimaryMap, EntityMap};
+use entity::{EntityMap, PrimaryMap};
 use ir;
-use ir::{ExternalName, CallConv, Signature, DataFlowGraph, Layout};
-use ir::{InstEncodings, ValueLocations, JumpTables, StackSlots, EbbOffsets, SourceLocs};
-use ir::{Ebb, JumpTableData, JumpTable, StackSlotData, StackSlot, SigRef, ExtFuncData, FuncRef,
-         GlobalVarData, GlobalVar, HeapData, Heap};
-use isa::{TargetIsa, EncInfo};
+use ir::{CallConv, DataFlowGraph, ExternalName, Layout, Signature};
+use ir::{Ebb, ExtFuncData, FuncRef, GlobalVar, GlobalVarData, Heap, HeapData, JumpTable,
+         JumpTableData, SigRef, StackSlot, StackSlotData};
+use ir::{EbbOffsets, InstEncodings, JumpTables, SourceLocs, StackSlots, ValueLocations};
+use isa::{EncInfo, Legalize, TargetIsa};
 use std::fmt;
 use write::write_function;

@@ -55,7 +55,7 @@ pub struct Function {
    ///
    /// This information is only transiently available after the `binemit::relax_branches` function
    /// computes it, and it can easily be recomputed by calling that function. It is not included
-    /// in the textual IL format.
+    /// in the textual IR format.
    pub offsets: EbbOffsets,

    /// Source locations.
@@ -86,7 +86,7 @@ impl Function {

    /// Clear all data structures in this function.
    pub fn clear(&mut self) {
-        self.signature.clear(ir::CallConv::Native);
+        self.signature.clear(ir::CallConv::SystemV);
        self.stack_slots.clear();
        self.global_vars.clear();
        self.heaps.clear();
@@ -99,9 +99,9 @@ impl Function {
        self.srclocs.clear();
    }

-    /// Create a new empty, anonymous function with a native calling convention.
+    /// Create a new empty, anonymous function with a SystemV calling convention.
    pub fn new() -> Self {
-        Self::with_name_signature(ExternalName::default(), Signature::new(CallConv::Native))
+        Self::with_name_signature(ExternalName::default(), Signature::new(CallConv::SystemV))
    }

    /// Creates a jump table in the function, to be used by `br_table` instructions.
@@ -176,6 +176,13 @@ impl Function {
            iter: self.layout.ebb_insts(ebb),
        }
    }
+
+    /// Wrapper around `DataFlowGraph::encode` which assigns `inst` the resulting encoding.
+    pub fn update_encoding(&mut self, inst: ir::Inst, isa: &TargetIsa) -> Result<(), Legalize> {
+        self.dfg.encode(inst, isa).map(
+            |e| { self.encodings[inst] = e; },
+        )
+    }
 }

 /// Wrapper type capable of displaying a `Function` with correct ISA annotations.
--- a/lib/cretonne/src/ir/globalvar.rs
+++ b/lib/cretonne/src/ir/globalvar.rs
@@ -1,7 +1,7 @@
 //! Global variables.

-use ir::{ExternalName, GlobalVar};
 use ir::immediates::Offset32;
+use ir::{ExternalName, GlobalVar};
 use std::fmt;

 /// Information about a global variable declaration.
@@ -17,7 +17,8 @@ pub enum GlobalVarData {
    /// Variable is part of a struct pointed to by another global variable.
    ///
    /// The `base` global variable is assumed to contain a pointer to a struct. This global
-    /// variable lives at an offset into the struct.
+    /// variable lives at an offset into the struct. The memory must be accessible, and
+    /// naturally aligned to hold a pointer value.
    Deref {
        /// The base pointer global variable.
        base: GlobalVar,
--- a/lib/cretonne/src/ir/heap.rs
+++ b/lib/cretonne/src/ir/heap.rs
@@ -1,7 +1,7 @@
 //! Heaps.

-use ir::immediates::Imm64;
 use ir::GlobalVar;
+use ir::immediates::Imm64;
 use std::fmt;

 /// Information about a heap declaration.
@@ -25,9 +25,12 @@ pub struct HeapData {
 #[derive(Clone)]
 pub enum HeapBase {
    /// The heap base lives in a reserved register.
+    ///
+    /// This feature is not yet implemented.
    ReservedReg,

-    /// The heap base is in a global variable.
+    /// The heap base is in a global variable. The variable must be accessible and naturally
+    /// aligned for a pointer.
    GlobalVar(GlobalVar),
 }

@@ -36,7 +39,8 @@ pub enum HeapBase {
 pub enum HeapStyle {
    /// A dynamic heap can be relocated to a different base address when it is grown.
    Dynamic {
-        /// Global variable holding the current bound of the heap in bytes.
+        /// Global variable holding the current bound of the heap in bytes. It is
+        /// required to be accessible and naturally aligned for a pointer-sized integer.
        bound_gv: GlobalVar,
    },

--- a/lib/cretonne/src/ir/immediates.rs
+++ b/lib/cretonne/src/ir/immediates.rs
@@ -21,6 +21,11 @@ impl Imm64 {
    pub fn new(x: i64) -> Imm64 {
        Imm64(x)
    }
+
+    /// Return self negated.
+    pub fn wrapping_neg(self) -> Imm64 {
+        Imm64(self.0.wrapping_neg())
+    }
 }

 impl Into<i64> for Imm64 {
@@ -35,12 +40,12 @@ impl From<i64> for Imm64 {
    }
 }

-// Hexadecimal with a multiple of 4 digits and group separators:
-//
-//   0xfff0
-//   0x0001_ffff
-//   0xffff_ffff_fff8_4400
-//
+/// Hexadecimal with a multiple of 4 digits and group separators:
+///
+///   0xfff0
+///   0x0001_ffff
+///   0xffff_ffff_fff8_4400
+///
 fn write_hex(x: i64, f: &mut Formatter) -> fmt::Result {
    let mut pos = (64 - x.leading_zeros() - 1) & 0xf0;
    write!(f, "0x{:04x}", (x >> pos) & 0xffff)?;
@@ -179,7 +184,6 @@ impl Display for Uimm32 {
        } else {
            write_hex(i64::from(self.0), f)
        }
-
    }
 }

@@ -244,7 +248,6 @@ impl Display for Offset32 {
        } else {
            write_hex(val, f)
        }
-
    }
 }

@@ -280,16 +283,16 @@ pub struct Ieee32(u32);
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
 pub struct Ieee64(u64);

-// Format a floating point number in a way that is reasonably human-readable, and that can be
-// converted back to binary without any rounding issues. The hexadecimal formatting of normal and
-// subnormal numbers is compatible with C99 and the `printf "%a"` format specifier. The NaN and Inf
-// formats are not supported by C99.
-//
-// The encoding parameters are:
-//
-// w - exponent field width in bits
-// t - trailing significand field width in bits
-//
+/// Format a floating point number in a way that is reasonably human-readable, and that can be
+/// converted back to binary without any rounding issues. The hexadecimal formatting of normal and
+/// subnormal numbers is compatible with C99 and the `printf "%a"` format specifier. The NaN and Inf
+/// formats are not supported by C99.
+///
+/// The encoding parameters are:
+///
+/// w - exponent field width in bits
+/// t - trailing significand field width in bits
+///
 fn format_float(bits: u64, w: u8, t: u8, f: &mut Formatter) -> fmt::Result {
    debug_assert!(w > 0 && w <= 16, "Invalid exponent range");
    debug_assert!(1 + w + t <= 64, "Too large IEEE format for u64");
@@ -358,13 +361,13 @@ fn format_float(bits: u64, w: u8, t: u8, f: &mut Formatter) -> fmt::Result {
    }
 }

-// Parse a float using the same format as `format_float` above.
-//
-// The encoding parameters are:
-//
-// w - exponent field width in bits
-// t - trailing significand field width in bits
-//
+/// Parse a float using the same format as `format_float` above.
+///
+/// The encoding parameters are:
+///
+/// w - exponent field width in bits
+/// t - trailing significand field width in bits
+///
 fn parse_float(s: &str, w: u8, t: u8) -> Result<u64, &'static str> {
    debug_assert!(w > 0 && w <= 16, "Invalid exponent range");
    debug_assert!(1 + w + t <= 64, "Too large IEEE format for u64");
@@ -456,7 +459,6 @@ fn parse_float(s: &str, w: u8, t: u8) -> Result<u64, &'static str> {
                    None => return Err("Invalid character"),
                }
            }
-
        }
    }

@@ -649,10 +651,10 @@ impl FromStr for Ieee64 {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use std::{f32, f64};
-    use std::str::FromStr;
    use std::fmt::Display;
+    use std::str::FromStr;
    use std::string::ToString;
+    use std::{f32, f64};

    #[test]
    fn format_imm64() {
--- a/lib/cretonne/src/ir/instructions.rs
+++ b/lib/cretonne/src/ir/instructions.rs
@@ -1,25 +1,23 @@
 //! Instruction formats and opcodes.
 //!
 //! The `instructions` module contains definitions for instruction formats, opcodes, and the
-//! in-memory representation of IL instructions.
+//! in-memory representation of IR instructions.
 //!
 //! A large part of this module is auto-generated from the instruction descriptions in the meta
 //! directory.

 use std::fmt::{self, Display, Formatter};
-use std::str::FromStr;
 use std::ops::{Deref, DerefMut};
+use std::str::FromStr;
 use std::vec::Vec;

 use ir;
-use ir::{Value, Type, Ebb, JumpTable, SigRef, FuncRef, StackSlot, MemFlags};
-use ir::immediates::{Imm64, Uimm8, Uimm32, Ieee32, Ieee64, Offset32};
-use ir::condcodes::*;
 use ir::types;
-use isa::RegUnit;
+use ir::{Ebb, FuncRef, JumpTable, SigRef, Type, Value};
+use isa;

-use entity;
 use bitset::BitSet;
+use entity;
 use ref_slice::{ref_slice, ref_slice_mut};

 /// Some instructions use an external list of argument values because there is not enough space in
@@ -33,6 +31,7 @@ pub type ValueListPool = entity::ListPool<Value>;
 // Include code generated by `lib/cretonne/meta/gen_instr.py`. This file contains:
 //
 // - The `pub enum InstructionFormat` enum with all the instruction formats.
+// - The `pub enum InstructionData` enum with all the instruction data fields.
 // - The `pub enum Opcode` definition with all known opcodes,
 // - The `const OPCODE_FORMAT: [InstructionFormat; N]` table.
 // - The private `fn opcode_name(Opcode) -> &'static str` function, and
@@ -74,7 +73,7 @@ impl FromStr for Opcode {

    /// Parse an Opcode name from a string.
    fn from_str(s: &str) -> Result<Opcode, &'static str> {
-        use constant_hash::{Table, simple_hash, probe};
+        use constant_hash::{probe, simple_hash, Table};

        impl<'a> Table<&'a str> for [Option<Opcode>] {
            fn len(&self) -> usize {
@@ -95,190 +94,6 @@ impl FromStr for Opcode {
    }
 }

-/// Contents on an instruction.
-///
-/// Every variant must contain `opcode` and `ty` fields. An instruction that doesn't produce a
-/// value should have its `ty` field set to `VOID`. The size of `InstructionData` should be kept at
-/// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a
-/// `Box<AuxData>` to store the additional information out of line.
-#[derive(Clone, Debug, Hash, PartialEq, Eq)]
-#[allow(missing_docs)]
-pub enum InstructionData {
-    Unary { opcode: Opcode, arg: Value },
-    UnaryImm { opcode: Opcode, imm: Imm64 },
-    UnaryIeee32 { opcode: Opcode, imm: Ieee32 },
-    UnaryIeee64 { opcode: Opcode, imm: Ieee64 },
-    UnaryBool { opcode: Opcode, imm: bool },
-    UnaryGlobalVar {
-        opcode: Opcode,
-        global_var: ir::GlobalVar,
-    },
-    Binary { opcode: Opcode, args: [Value; 2] },
-    BinaryImm {
-        opcode: Opcode,
-        arg: Value,
-        imm: Imm64,
-    },
-    Ternary { opcode: Opcode, args: [Value; 3] },
-    MultiAry { opcode: Opcode, args: ValueList },
-    NullAry { opcode: Opcode },
-    InsertLane {
-        opcode: Opcode,
-        lane: Uimm8,
-        args: [Value; 2],
-    },
-    ExtractLane {
-        opcode: Opcode,
-        lane: Uimm8,
-        arg: Value,
-    },
-    IntCompare {
-        opcode: Opcode,
-        cond: IntCC,
-        args: [Value; 2],
-    },
-    IntCompareImm {
-        opcode: Opcode,
-        cond: IntCC,
-        arg: Value,
-        imm: Imm64,
-    },
-    IntCond {
-        opcode: Opcode,
-        cond: IntCC,
-        arg: Value,
-    },
-    FloatCompare {
-        opcode: Opcode,
-        cond: FloatCC,
-        args: [Value; 2],
-    },
-    FloatCond {
-        opcode: Opcode,
-        cond: FloatCC,
-        arg: Value,
-    },
-    IntSelect {
-        opcode: Opcode,
-        cond: IntCC,
-        args: [Value; 3],
-    },
-    Jump {
-        opcode: Opcode,
-        destination: Ebb,
-        args: ValueList,
-    },
-    Branch {
-        opcode: Opcode,
-        destination: Ebb,
-        args: ValueList,
-    },
-    BranchIcmp {
-        opcode: Opcode,
-        cond: IntCC,
-        destination: Ebb,
-        args: ValueList,
-    },
-    BranchInt {
-        opcode: Opcode,
-        cond: IntCC,
-        destination: Ebb,
-        args: ValueList,
-    },
-    BranchFloat {
-        opcode: Opcode,
-        cond: FloatCC,
-        destination: Ebb,
-        args: ValueList,
-    },
-    BranchTable {
-        opcode: Opcode,
-        arg: Value,
-        table: JumpTable,
-    },
-    Call {
-        opcode: Opcode,
-        func_ref: FuncRef,
-        args: ValueList,
-    },
-    IndirectCall {
-        opcode: Opcode,
-        sig_ref: SigRef,
-        args: ValueList,
-    },
-    FuncAddr { opcode: Opcode, func_ref: FuncRef },
-    StackLoad {
-        opcode: Opcode,
-        stack_slot: StackSlot,
-        offset: Offset32,
-    },
-    StackStore {
-        opcode: Opcode,
-        arg: Value,
-        stack_slot: StackSlot,
-        offset: Offset32,
-    },
-    HeapAddr {
-        opcode: Opcode,
-        heap: ir::Heap,
-        arg: Value,
-        imm: Uimm32,
-    },
-    Load {
-        opcode: Opcode,
-        flags: MemFlags,
-        arg: Value,
-        offset: Offset32,
-    },
-    Store {
-        opcode: Opcode,
-        flags: MemFlags,
-        args: [Value; 2],
-        offset: Offset32,
-    },
-    RegMove {
-        opcode: Opcode,
-        arg: Value,
-        src: RegUnit,
-        dst: RegUnit,
-    },
-    CopySpecial {
-        opcode: Opcode,
-        src: RegUnit,
-        dst: RegUnit,
-    },
-    RegSpill {
-        opcode: Opcode,
-        arg: Value,
-        src: RegUnit,
-        dst: StackSlot,
-    },
-    RegFill {
-        opcode: Opcode,
-        arg: Value,
-        src: StackSlot,
-        dst: RegUnit,
-    },
-    Trap { opcode: Opcode, code: ir::TrapCode },
-    CondTrap {
-        opcode: Opcode,
-        arg: Value,
-        code: ir::TrapCode,
-    },
-    IntCondTrap {
-        opcode: Opcode,
-        cond: IntCC,
-        arg: Value,
-        code: ir::TrapCode,
-    },
-    FloatCondTrap {
-        opcode: Opcode,
-        cond: FloatCC,
-        arg: Value,
-        code: ir::TrapCode,
-    },
-}
-
 /// A variable list of `Value` operands used for function call arguments and passing arguments to
 /// basic blocks.
 #[derive(Clone, Debug)]
@@ -697,16 +512,12 @@ impl OperandConstraint {
            LaneOf => Bound(ctrl_type.lane_type()),
            AsBool => Bound(ctrl_type.as_bool()),
            HalfWidth => Bound(ctrl_type.half_width().expect("invalid type for half_width")),
-            DoubleWidth => {
-                Bound(ctrl_type.double_width().expect(
-                    "invalid type for double_width",
-                ))
-            }
-            HalfVector => {
-                Bound(ctrl_type.half_vector().expect(
-                    "invalid type for half_vector",
-                ))
-            }
+            DoubleWidth => Bound(ctrl_type.double_width().expect(
+                "invalid type for double_width",
+            )),
+            HalfVector => Bound(ctrl_type.half_vector().expect(
+                "invalid type for half_vector",
+            )),
            DoubleVector => Bound(ctrl_type.by(2).expect("invalid type for double_vector")),
        }
    }
--- a/lib/cretonne/src/ir/jumptable.rs
+++ b/lib/cretonne/src/ir/jumptable.rs
@@ -3,11 +3,11 @@
 //! Jump tables are declared in the preamble and assigned an `ir::entities::JumpTable` reference.
 //! The actual table of destinations is stored in a `JumpTableData` struct defined in this module.

-use packed_option::PackedOption;
 use ir::entities::Ebb;
+use packed_option::PackedOption;
+use std::fmt::{self, Display, Formatter};
 use std::iter;
 use std::slice;
-use std::fmt::{self, Display, Formatter};
 use std::vec::Vec;

 /// Contents of a jump table.
@@ -140,10 +140,10 @@ impl Display for JumpTableData {
 #[cfg(test)]
 mod tests {
    use super::JumpTableData;
-    use ir::Ebb;
    use entity::EntityRef;
-    use std::vec::Vec;
+    use ir::Ebb;
    use std::string::ToString;
+    use std::vec::Vec;

    #[test]
    fn empty() {
--- a/lib/cretonne/src/ir/layout.rs
+++ b/lib/cretonne/src/ir/layout.rs
@@ -4,11 +4,11 @@
 //! determined by the `Layout` data structure defined in this module.

 use entity::EntityMap;
+use ir::progpoint::{ExpandedProgramPoint, ProgramOrder};
 use ir::{Ebb, Inst};
-use ir::progpoint::{ProgramOrder, ExpandedProgramPoint};
 use packed_option::PackedOption;
 use std::cmp;
-use std::iter::{Iterator, IntoIterator};
+use std::iter::{IntoIterator, Iterator};
 use timing;

 /// The `Layout` struct determines the layout of EBBs and instructions in a function. It does not
@@ -26,18 +26,18 @@ use timing;
 ///
 #[derive(Clone)]
 pub struct Layout {
-    // Linked list nodes for the layout order of EBBs Forms a doubly linked list, terminated in
-    // both ends by `None`.
+    /// Linked list nodes for the layout order of EBBs Forms a doubly linked list, terminated in
+    /// both ends by `None`.
    ebbs: EntityMap<Ebb, EbbNode>,

-    // Linked list nodes for the layout order of instructions. Forms a double linked list per EBB,
-    // terminated in both ends by `None`.
+    /// Linked list nodes for the layout order of instructions. Forms a double linked list per EBB,
+    /// terminated in both ends by `None`.
    insts: EntityMap<Inst, InstNode>,

-    // First EBB in the layout order, or `None` when no EBBs have been laid out.
+    /// First EBB in the layout order, or `None` when no EBBs have been laid out.
    first_ebb: Option<Ebb>,

-    // Last EBB in the layout order, or `None` when no EBBs have been laid out.
+    /// Last EBB in the layout order, or `None` when no EBBs have been laid out.
    last_ebb: Option<Ebb>,
 }

@@ -61,32 +61,31 @@ impl Layout {
    }
 }

-// Sequence numbers.
-//
-// All instructions and EBBs are given a sequence number that can be used to quickly determine
-// their relative position in the layout. The sequence numbers are not contiguous, but are assigned
-// like line numbers in BASIC: 10, 20, 30, ...
-//
-// The EBB sequence numbers are strictly increasing, and so are the instruction sequence numbers
-// within an EBB. The instruction sequence numbers are all between the sequence number of their
-// containing EBB and the following EBB.
-//
-// The result is that sequence numbers work like BASIC line numbers for the textual representation
-// of the IL.
+/// Sequence numbers.
+///
+/// All instructions and EBBs are given a sequence number that can be used to quickly determine
+/// their relative position in the layout. The sequence numbers are not contiguous, but are assigned
+/// like line numbers in BASIC: 10, 20, 30, ...
+///
+/// The EBB sequence numbers are strictly increasing, and so are the instruction sequence numbers
+/// within an EBB. The instruction sequence numbers are all between the sequence number of their
+/// containing EBB and the following EBB.
+///
+/// The result is that sequence numbers work like BASIC line numbers for the textual form of the IR.
 type SequenceNumber = u32;

-// Initial stride assigned to new sequence numbers.
+/// Initial stride assigned to new sequence numbers.
 const MAJOR_STRIDE: SequenceNumber = 10;

-// Secondary stride used when renumbering locally.
+/// Secondary stride used when renumbering locally.
 const MINOR_STRIDE: SequenceNumber = 2;

-// Limit on the sequence number range we'll renumber locally. If this limit is exceeded, we'll
-// switch to a full function renumbering.
+/// Limit on the sequence number range we'll renumber locally. If this limit is exceeded, we'll
+/// switch to a full function renumbering.
 const LOCAL_LIMIT: SequenceNumber = 100 * MINOR_STRIDE;

-// Compute the midpoint between `a` and `b`.
-// Return `None` if the midpoint would be equal to either.
+/// Compute the midpoint between `a` and `b`.
+/// Return `None` if the midpoint would be equal to either.
 fn midpoint(a: SequenceNumber, b: SequenceNumber) -> Option<SequenceNumber> {
    debug_assert!(a < b);
    // Avoid integer overflow.
@@ -428,7 +427,7 @@ impl Layout {
    }

    /// Return an iterator over all EBBs in layout order.
-    pub fn ebbs<'f>(&'f self) -> Ebbs<'f> {
+    pub fn ebbs(&self) -> Ebbs {
        Ebbs {
            layout: self,
            next: self.first_ebb,
@@ -611,7 +610,7 @@ impl Layout {
    }

    /// Iterate over the instructions in `ebb` in layout order.
-    pub fn ebb_insts<'f>(&'f self, ebb: Ebb) -> Insts<'f> {
+    pub fn ebb_insts(&self, ebb: Ebb) -> Insts {
        Insts {
            layout: self,
            head: self.ebbs[ebb].first_inst.into(),
@@ -735,11 +734,10 @@ impl<'f> DoubleEndedIterator for Insts<'f> {
    }
 }

-
 #[cfg(test)]
 mod tests {
-    use cursor::{Cursor, CursorPosition};
    use super::Layout;
+    use cursor::{Cursor, CursorPosition};
    use entity::EntityRef;
    use ir::{Ebb, Inst, ProgramOrder, SourceLoc};
    use std::cmp::Ordering;
--- a/lib/cretonne/src/ir/libcall.rs
+++ b/lib/cretonne/src/ir/libcall.rs
@@ -6,7 +6,7 @@ use std::str::FromStr;

 /// The name of a runtime library routine.
 ///
-/// Runtime library calls are generated for Cretonne IL instructions that don't have an equivalent
+/// Runtime library calls are generated for Cretonne IR instructions that don't have an equivalent
 /// ISA instruction or an easy macro expansion. A `LibCall` is used as a well-known name to refer to
 /// the runtime library routine. This way, Cretonne doesn't have to know about the naming
 /// convention in the embedding VM's runtime library.
--- a/lib/cretonne/src/ir/mod.rs
+++ b/lib/cretonne/src/ir/mod.rs
@@ -1,50 +1,50 @@
-//! Representation of Cretonne IL functions.
+//! Representation of Cretonne IR functions.

-pub mod types;
-pub mod entities;
-pub mod condcodes;
-pub mod immediates;
-pub mod instructions;
-pub mod stackslot;
-pub mod jumptable;
-pub mod dfg;
-pub mod layout;
-pub mod function;
 mod builder;
+pub mod condcodes;
+pub mod dfg;
+pub mod entities;
 mod extfunc;
 mod extname;
+pub mod function;
 mod globalvar;
 mod heap;
+pub mod immediates;
+pub mod instructions;
+pub mod jumptable;
+pub mod layout;
 mod libcall;
 mod memflags;
 mod progpoint;
 mod sourceloc;
+pub mod stackslot;
 mod trapcode;
+pub mod types;
 mod valueloc;

-pub use ir::builder::{InstBuilder, InstBuilderBase, InstInserterBase, InsertBuilder};
+pub use ir::builder::{InsertBuilder, InstBuilder, InstBuilderBase, InstInserterBase};
 pub use ir::dfg::{DataFlowGraph, ValueDef};
-pub use ir::entities::{Ebb, Inst, Value, StackSlot, GlobalVar, JumpTable, FuncRef, SigRef, Heap};
-pub use ir::extfunc::{Signature, CallConv, AbiParam, ArgumentExtension, ArgumentPurpose,
-                      ExtFuncData};
+pub use ir::entities::{Ebb, FuncRef, GlobalVar, Heap, Inst, JumpTable, SigRef, StackSlot, Value};
+pub use ir::extfunc::{AbiParam, ArgumentExtension, ArgumentPurpose, CallConv, ExtFuncData,
+                      Signature};
 pub use ir::extname::ExternalName;
 pub use ir::function::Function;
 pub use ir::globalvar::GlobalVarData;
-pub use ir::heap::{HeapData, HeapStyle, HeapBase};
-pub use ir::instructions::{Opcode, InstructionData, VariableArgs, ValueList, ValueListPool};
+pub use ir::heap::{HeapBase, HeapData, HeapStyle};
+pub use ir::instructions::{InstructionData, Opcode, ValueList, ValueListPool, VariableArgs};
 pub use ir::jumptable::JumpTableData;
 pub use ir::layout::Layout;
 pub use ir::libcall::LibCall;
 pub use ir::memflags::MemFlags;
-pub use ir::progpoint::{ProgramPoint, ProgramOrder, ExpandedProgramPoint};
+pub use ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint};
 pub use ir::sourceloc::SourceLoc;
-pub use ir::stackslot::{StackSlots, StackSlotKind, StackSlotData};
+pub use ir::stackslot::{StackSlotData, StackSlotKind, StackSlots};
 pub use ir::trapcode::TrapCode;
 pub use ir::types::Type;
-pub use ir::valueloc::{ValueLoc, ArgumentLoc};
+pub use ir::valueloc::{ArgumentLoc, ValueLoc};

 use binemit;
-use entity::{PrimaryMap, EntityMap};
+use entity::{EntityMap, PrimaryMap};
 use isa;

 /// Map of value locations.
--- a/lib/cretonne/src/ir/progpoint.rs
+++ b/lib/cretonne/src/ir/progpoint.rs
@@ -2,9 +2,9 @@

 use entity::EntityRef;
 use ir::{Ebb, Inst, ValueDef};
+use std::cmp;
 use std::fmt;
 use std::u32;
-use std::cmp;

 /// A `ProgramPoint` represents a position in a function where the live range of an SSA value can
 /// begin or end. It can be either:
@@ -12,7 +12,7 @@ use std::cmp;
 /// 1. An instruction or
 /// 2. An EBB header.
 ///
-/// This corresponds more or less to the lines in the textual representation of Cretonne IL.
+/// This corresponds more or less to the lines in the textual form of Cretonne IR.
 #[derive(PartialEq, Eq, Clone, Copy)]
 pub struct ProgramPoint(u32);

@@ -147,7 +147,7 @@ pub trait ProgramOrder {
 mod tests {
    use super::*;
    use entity::EntityRef;
-    use ir::{Inst, Ebb};
+    use ir::{Ebb, Inst};
    use std::string::ToString;

    #[test]
--- a/lib/cretonne/src/ir/sourceloc.rs
+++ b/lib/cretonne/src/ir/sourceloc.rs
@@ -7,7 +7,7 @@ use std::fmt;

 /// A source location.
 ///
-/// This is an opaque 32-bit number attached to each Cretonne IL instruction. Cretonne does not
+/// This is an opaque 32-bit number attached to each Cretonne IR instruction. Cretonne does not
 /// interpret source locations in any way, they are simply preserved from the input to the output.
 ///
 /// The default source location uses the all-ones bit pattern `!0`. It is used for instructions
--- a/lib/cretonne/src/ir/stackslot.rs
+++ b/lib/cretonne/src/ir/stackslot.rs
@@ -3,12 +3,13 @@
 //! The `StackSlotData` struct keeps track of a single stack slot in a function.
 //!

-use entity::{PrimaryMap, Keys};
-use ir::{Type, StackSlot};
+use entity::{Iter, IterMut, Keys, PrimaryMap};
+use ir::{StackSlot, Type};
 use packed_option::PackedOption;
 use std::cmp;
 use std::fmt;
 use std::ops::{Index, IndexMut};
+use std::slice;
 use std::str::FromStr;
 use std::vec::Vec;

@@ -208,6 +209,26 @@ impl StackSlots {
        self.slots[ss].offset = Some(offset);
    }

+    /// Get an iterator over all the stack slot keys.
+    pub fn iter(&self) -> Iter<StackSlot, StackSlotData> {
+        self.slots.iter()
+    }
+
+    /// Get an iterator over all the stack slot keys, mutable edition.
+    pub fn iter_mut(&mut self) -> IterMut<StackSlot, StackSlotData> {
+        self.slots.iter_mut()
+    }
+
+    /// Get an iterator over all the stack slot records.
+    pub fn values(&self) -> slice::Iter<StackSlotData> {
+        self.slots.values()
+    }
+
+    /// Get an iterator over all the stack slot records, mutable edition.
+    pub fn values_mut(&mut self) -> slice::IterMut<StackSlotData> {
+        self.slots.values_mut()
+    }
+
    /// Get an iterator over all the stack slot keys.
    pub fn keys(&self) -> Keys<StackSlot> {
        self.slots.keys()
@@ -317,9 +338,9 @@ impl StackSlots {

 #[cfg(test)]
 mod tests {
+    use super::*;
    use ir::Function;
    use ir::types;
-    use super::*;
    use std::string::ToString;

    #[test]
--- a/lib/cretonne/src/ir/trapcode.rs
+++ b/lib/cretonne/src/ir/trapcode.rs
@@ -38,6 +38,10 @@ pub enum TrapCode {
    /// Failed float-to-int conversion.
    BadConversionToInteger,

+    /// Execution has potentially run too long and may be interrupted.
+    /// This trap is resumable.
+    Interrupt,
+
    /// A user-defined trap code.
    User(u16),
 }
@@ -54,6 +58,7 @@ impl Display for TrapCode {
            IntegerOverflow => "int_ovf",
            IntegerDivisionByZero => "int_divz",
            BadConversionToInteger => "bad_toint",
+            Interrupt => "interrupt",
            User(x) => return write!(f, "user{}", x),
        };
        f.write_str(identifier)
@@ -74,6 +79,7 @@ impl FromStr for TrapCode {
            "int_ovf" => Ok(IntegerOverflow),
            "int_divz" => Ok(IntegerDivisionByZero),
            "bad_toint" => Ok(BadConversionToInteger),
+            "interrupt" => Ok(Interrupt),
            _ if s.starts_with("user") => s[4..].parse().map(User).map_err(|_| ()),
            _ => Err(()),
        }
--- a/lib/cretonne/src/ir/types.rs
+++ b/lib/cretonne/src/ir/types.rs
@@ -1,7 +1,7 @@
 //! Common types for the Cretonne code generator.

 use std::default::Default;
-use std::fmt::{self, Display, Debug, Formatter};
+use std::fmt::{self, Debug, Display, Formatter};

 /// The type of an SSA value.
 ///
@@ -24,10 +24,10 @@ pub struct Type(u8);
 /// a SIMD vector.
 pub const VOID: Type = Type(0);

-// Start of the lane types. See also `meta/cdsl.types.py`.
+/// Start of the lane types. See also `meta/cdsl.types.py`.
 const LANE_BASE: u8 = 0x70;

-// Start of the 2-lane vector types.
+/// Start of the 2-lane vector types.
 const VECTOR_BASE: u8 = LANE_BASE + 16;

 // Include code generated by `lib/cretonne/meta/gen_types.py`. This file contains constant
--- a/lib/cretonne/src/ir/valueloc.rs
+++ b/lib/cretonne/src/ir/valueloc.rs
@@ -3,8 +3,8 @@
 //! The register allocator assigns every SSA value to either a register or a stack slot. This
 //! assignment is represented by a `ValueLoc` object.

-use isa::{RegInfo, RegUnit};
 use ir::StackSlot;
+use isa::{RegInfo, RegUnit};
 use std::fmt;

 /// Value location.
--- a/lib/cretonne/src/isa/arm32/abi.rs
+++ b/lib/cretonne/src/isa/arm32/abi.rs
@@ -1,10 +1,10 @@
 //! ARM ABI implementation.

+use super::registers::{D, GPR, Q, S};
 use ir;
 use isa::RegClass;
 use regalloc::AllocatableSet;
 use settings as shared_settings;
-use super::registers::{S, D, Q, GPR};

 /// Legalize `sig`.
 pub fn legalize_signature(
--- a/lib/cretonne/src/isa/arm32/binemit.rs
+++ b/lib/cretonne/src/isa/arm32/binemit.rs
@@ -1,6 +1,6 @@
 //! Emitting binary ARM32 machine code.

-use binemit::{CodeSink, bad_encoding};
+use binemit::{bad_encoding, CodeSink};
 use ir::{Function, Inst};
 use regalloc::RegDiversions;

--- a/lib/cretonne/src/isa/arm32/mod.rs
+++ b/lib/cretonne/src/isa/arm32/mod.rs
@@ -1,20 +1,20 @@
 //! ARM 32-bit Instruction Set Architecture.

-pub mod settings;
 mod abi;
 mod binemit;
 mod enc_tables;
 mod registers;
+pub mod settings;

-use binemit::{CodeSink, MemoryCodeSink, emit_function};
 use super::super::settings as shared_settings;
-use isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
-use isa::Builder as IsaBuilder;
-use isa::{TargetIsa, RegInfo, RegClass, EncInfo};
+use binemit::{emit_function, CodeSink, MemoryCodeSink};
 use ir;
+use isa::Builder as IsaBuilder;
+use isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
+use isa::{EncInfo, RegClass, RegInfo, TargetIsa};
 use regalloc;
-use std::fmt;
 use std::boxed::Box;
+use std::fmt;

 #[allow(dead_code)]
 struct Isa {
--- a/lib/cretonne/src/isa/arm32/registers.rs
+++ b/lib/cretonne/src/isa/arm32/registers.rs
@@ -6,7 +6,7 @@ include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs"));

 #[cfg(test)]
 mod tests {
-    use super::{INFO, GPR, S, D};
+    use super::{D, GPR, INFO, S};
    use isa::RegUnit;
    use std::string::{String, ToString};

--- a/lib/cretonne/src/isa/arm64/abi.rs
+++ b/lib/cretonne/src/isa/arm64/abi.rs
@@ -1,10 +1,10 @@
 //! ARM 64 ABI implementation.

+use super::registers::{FPR, GPR};
 use ir;
 use isa::RegClass;
 use regalloc::AllocatableSet;
 use settings as shared_settings;
-use super::registers::{GPR, FPR};

 /// Legalize `sig`.
 pub fn legalize_signature(
--- a/lib/cretonne/src/isa/arm64/binemit.rs
+++ b/lib/cretonne/src/isa/arm64/binemit.rs
@@ -1,6 +1,6 @@
 //! Emitting binary ARM64 machine code.

-use binemit::{CodeSink, bad_encoding};
+use binemit::{bad_encoding, CodeSink};
 use ir::{Function, Inst};
 use regalloc::RegDiversions;

--- a/lib/cretonne/src/isa/arm64/mod.rs
+++ b/lib/cretonne/src/isa/arm64/mod.rs
@@ -1,20 +1,20 @@
 //! ARM 64-bit Instruction Set Architecture.

-pub mod settings;
 mod abi;
 mod binemit;
 mod enc_tables;
 mod registers;
+pub mod settings;

-use binemit::{CodeSink, MemoryCodeSink, emit_function};
 use super::super::settings as shared_settings;
-use isa::enc_tables::{lookup_enclist, Encodings};
-use isa::Builder as IsaBuilder;
-use isa::{TargetIsa, RegInfo, RegClass, EncInfo};
+use binemit::{emit_function, CodeSink, MemoryCodeSink};
 use ir;
+use isa::Builder as IsaBuilder;
+use isa::enc_tables::{lookup_enclist, Encodings};
+use isa::{EncInfo, RegClass, RegInfo, TargetIsa};
 use regalloc;
-use std::fmt;
 use std::boxed::Box;
+use std::fmt;

 #[allow(dead_code)]
 struct Isa {
--- a/lib/cretonne/src/isa/constraints.rs
+++ b/lib/cretonne/src/isa/constraints.rs
@@ -8,8 +8,8 @@
 //! are satisfied.

 use binemit::CodeOffset;
+use ir::{Function, Inst, ValueLoc};
 use isa::{RegClass, RegUnit};
-use ir::{Function, ValueLoc, Inst};
 use regalloc::RegDiversions;

 /// Register constraint for a single value operand or instruction result.
@@ -205,6 +205,5 @@ mod tests {
        // Backward limit
        assert!(t1.contains(1000, 748));
        assert!(!t1.contains(1000, 746));
-
    }
 }
--- a/lib/cretonne/src/isa/enc_tables.rs
+++ b/lib/cretonne/src/isa/enc_tables.rs
@@ -3,8 +3,8 @@
 //! This module contains types and functions for working with the encoding tables generated by
 //! `lib/cretonne/meta/gen_encoding.py`.

-use constant_hash::{Table, probe};
-use ir::{Type, Opcode, DataFlowGraph, InstructionData};
+use constant_hash::{probe, Table};
+use ir::{DataFlowGraph, InstructionData, Opcode, Type};
 use isa::{Encoding, Legalize};
 use settings::PredicateView;
 use std::ops::Range;
--- a/lib/cretonne/src/isa/encoding.rs
+++ b/lib/cretonne/src/isa/encoding.rs
@@ -1,7 +1,7 @@
 //! The `Encoding` struct.

 use binemit::CodeOffset;
-use isa::constraints::{RecipeConstraints, BranchRange};
+use isa::constraints::{BranchRange, RecipeConstraints};
 use std::fmt;

 /// Bits needed to encode an instruction as binary machine code.
--- a/lib/cretonne/src/isa/intel/abi.rs
+++ b/lib/cretonne/src/isa/intel/abi.rs
@@ -1,19 +1,18 @@
 //! Intel ABI implementation.

+use super::registers::{FPR, GPR, RU};
+use abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
+use cursor::{Cursor, CursorPosition, EncCursor};
 use ir;
+use ir::immediates::Imm64;
+use ir::stackslot::{StackOffset, StackSize};
+use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, CallConv, InstBuilder};
 use isa::{RegClass, RegUnit, TargetIsa};
 use regalloc::AllocatableSet;
+use result;
 use settings as shared_settings;
-use super::registers::{GPR, FPR, RU};
-use abi::{ArgAction, ValueConversion, ArgAssigner, legalize_args};
-use ir::{AbiParam, ArgumentPurpose, ArgumentLoc, ArgumentExtension, CallConv, InstBuilder};
-use ir::stackslot::{StackSize, StackOffset};
-use ir::immediates::Imm64;
 use stack_layout::layout_stack;
 use std::i32;
-use cursor::{Cursor, EncCursor, CursorPosition};
-use result;
-

 /// Argument registers for x86-64
 static ARG_GPRS: [RU; 6] = [RU::rdi, RU::rsi, RU::rdx, RU::rcx, RU::r8, RU::r9];
@@ -171,7 +170,7 @@ pub fn callee_saved_registers(flags: &shared_settings::Flags) -> &'static [RU] {

 pub fn prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
    match func.signature.call_conv {
-        ir::CallConv::Native => native_prologue_epilogue(func, isa),
+        ir::CallConv::SystemV => system_v_prologue_epilogue(func, isa),
        ir::CallConv::SpiderWASM => spiderwasm_prologue_epilogue(func, isa),
    }
 }
@@ -194,7 +193,7 @@ pub fn spiderwasm_prologue_epilogue(
 }

 /// Insert a System V-compatible prologue and epilogue.
-pub fn native_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
+pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
    // The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but
    // newer versions use a 16-byte aligned stack pointer.
    let stack_align = 16;
@@ -242,17 +241,17 @@ pub fn native_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> res
    // Set up the cursor and insert the prologue
    let entry_ebb = func.layout.entry_block().expect("missing entry block");
    let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
-    insert_native_prologue(&mut pos, local_stack_size, csr_type, csrs);
+    insert_system_v_prologue(&mut pos, local_stack_size, csr_type, csrs);

    // Reset the cursor and insert the epilogue
    let mut pos = pos.at_position(CursorPosition::Nowhere);
-    insert_native_epilogues(&mut pos, local_stack_size, csr_type, csrs);
+    insert_system_v_epilogues(&mut pos, local_stack_size, csr_type, csrs);

    Ok(())
 }

 /// Insert the prologue for a given function.
-fn insert_native_prologue(
+fn insert_system_v_prologue(
    pos: &mut EncCursor,
    stack_size: i64,
    csr_type: ir::types::Type,
@@ -286,7 +285,7 @@ fn insert_native_prologue(
 }

 /// Find all `return` instructions and insert epilogues before them.
-fn insert_native_epilogues(
+fn insert_system_v_epilogues(
    pos: &mut EncCursor,
    stack_size: i64,
    csr_type: ir::types::Type,
@@ -296,14 +295,14 @@ fn insert_native_epilogues(
        pos.goto_last_inst(ebb);
        if let Some(inst) = pos.current_inst() {
            if pos.func.dfg[inst].opcode().is_return() {
-                insert_native_epilogue(inst, stack_size, pos, csr_type, csrs);
+                insert_system_v_epilogue(inst, stack_size, pos, csr_type, csrs);
            }
        }
    }
 }

 /// Insert an epilogue given a specific `return` instruction.
-fn insert_native_epilogue(
+fn insert_system_v_epilogue(
    inst: ir::Inst,
    stack_size: i64,
    pos: &mut EncCursor,
--- a/lib/cretonne/src/isa/intel/binemit.rs
+++ b/lib/cretonne/src/isa/intel/binemit.rs
@@ -1,11 +1,11 @@
 //! Emitting binary Intel machine code.

-use binemit::{CodeSink, Reloc, bad_encoding};
-use ir::{Function, Inst, Ebb, InstructionData, Opcode};
-use ir::condcodes::{CondCode, IntCC, FloatCC};
-use isa::{RegUnit, StackRef, StackBase, StackBaseMask};
-use regalloc::RegDiversions;
 use super::registers::RU;
+use binemit::{bad_encoding, CodeSink, Reloc};
+use ir::condcodes::{CondCode, FloatCC, IntCC};
+use ir::{Ebb, Function, Inst, InstructionData, Opcode, TrapCode};
+use isa::{RegUnit, StackBase, StackBaseMask, StackRef};
+use regalloc::RegDiversions;

 include!(concat!(env!("OUT_DIR"), "/binemit-intel.rs"));

@@ -257,7 +257,7 @@ fn icc2opc(cond: IntCC) -> u16 {

 /// Get the low 4 bits of an opcode for a floating point condition code.
 ///
-/// The ucomiss/ucomisd instructions set the EFLAGS bits CF/PF/CF like this:
+/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
 ///
 ///    ZPC OSA
 /// UN 111 000
--- a/lib/cretonne/src/isa/intel/enc_tables.rs
+++ b/lib/cretonne/src/isa/intel/enc_tables.rs
@@ -1,16 +1,16 @@
 //! Encoding tables for Intel ISAs.

+use super::registers::*;
 use bitset::BitSet;
 use cursor::{Cursor, FuncCursor};
 use flowgraph::ControlFlowGraph;
-use ir::{self, InstBuilder};
 use ir::condcodes::IntCC;
+use ir::{self, InstBuilder};
+use isa;
 use isa::constraints::*;
 use isa::enc_tables::*;
 use isa::encoding::RecipeSizing;
-use isa;
 use predicates;
-use super::registers::*;

 include!(concat!(env!("OUT_DIR"), "/encoding-intel.rs"));
 include!(concat!(env!("OUT_DIR"), "/legalize-intel.rs"));
@@ -22,7 +22,6 @@ fn expand_sdivrem(
    cfg: &mut ControlFlowGraph,
    isa: &isa::TargetIsa,
 ) {
-
    let (x, y, is_srem) = match func.dfg[inst] {
        ir::InstructionData::Binary {
            opcode: ir::Opcode::Sdiv,
@@ -113,7 +112,6 @@ fn expand_udivrem(
    _cfg: &mut ControlFlowGraph,
    isa: &isa::TargetIsa,
 ) {
-
    let (x, y, is_urem) = match func.dfg[inst] {
        ir::InstructionData::Binary {
            opcode: ir::Opcode::Udiv,
@@ -324,7 +322,7 @@ fn expand_fcvt_to_sint(
    cfg: &mut ControlFlowGraph,
    _isa: &isa::TargetIsa,
 ) {
-    use ir::condcodes::{IntCC, FloatCC};
+    use ir::condcodes::{FloatCC, IntCC};
    use ir::immediates::{Ieee32, Ieee64};

    let x;
@@ -423,7 +421,7 @@ fn expand_fcvt_to_uint(
    cfg: &mut ControlFlowGraph,
    _isa: &isa::TargetIsa,
 ) {
-    use ir::condcodes::{IntCC, FloatCC};
+    use ir::condcodes::{FloatCC, IntCC};
    use ir::immediates::{Ieee32, Ieee64};

    let x;
--- a/lib/cretonne/src/isa/intel/mod.rs
+++ b/lib/cretonne/src/isa/intel/mod.rs
@@ -1,22 +1,22 @@
 //! Intel Instruction Set Architectures.

-pub mod settings;
 mod abi;
 mod binemit;
 mod enc_tables;
 mod registers;
+pub mod settings;

-use binemit::{CodeSink, MemoryCodeSink, emit_function};
 use super::super::settings as shared_settings;
-use isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
-use isa::Builder as IsaBuilder;
-use isa::{TargetIsa, RegInfo, RegClass, EncInfo};
+use binemit::{emit_function, CodeSink, MemoryCodeSink};
 use ir;
+use isa::Builder as IsaBuilder;
+use isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
+use isa::{EncInfo, RegClass, RegInfo, TargetIsa};
 use regalloc;
 use result;
-use timing;
-use std::fmt;
 use std::boxed::Box;
+use std::fmt;
+use timing;

 #[allow(dead_code)]
 struct Isa {
@@ -58,6 +58,10 @@ impl TargetIsa for Isa {
        &self.shared_flags
    }

+    fn uses_cpu_flags(&self) -> bool {
+        true
+    }
+
    fn register_info(&self) -> RegInfo {
        registers::INFO.clone()
    }
--- a/lib/cretonne/src/isa/mod.rs
+++ b/lib/cretonne/src/isa/mod.rs
@@ -40,21 +40,21 @@
 //! The configured target ISA trait object is a `Box<TargetIsa>` which can be used for multiple
 //! concurrent function compilations.

-pub use isa::constraints::{RecipeConstraints, OperandConstraint, ConstraintKind, BranchRange};
-pub use isa::encoding::{Encoding, EncInfo};
-pub use isa::registers::{RegInfo, RegUnit, RegClass, RegClassIndex, regs_overlap};
+pub use isa::constraints::{BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints};
+pub use isa::encoding::{EncInfo, Encoding};
+pub use isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit};
 pub use isa::stack::{StackBase, StackBaseMask, StackRef};

 use binemit;
 use flowgraph;
-use settings;
 use ir;
+use isa::enc_tables::Encodings;
 use regalloc;
 use result;
-use timing;
-use isa::enc_tables::Encodings;
-use std::fmt;
+use settings;
 use std::boxed::Box;
+use std::fmt;
+use timing;

 #[cfg(build_riscv)]
 mod riscv;
@@ -68,28 +68,26 @@ mod arm32;
 #[cfg(build_arm64)]
 mod arm64;

-pub mod registers;
-mod encoding;
-mod enc_tables;
 mod constraints;
+mod enc_tables;
+mod encoding;
+pub mod registers;
 mod stack;

 /// Returns a builder that can create a corresponding `TargetIsa`
 /// or `Err(LookupError::Unsupported)` if not enabled.
 macro_rules! isa_builder {
-    ($module:ident, $name:ident) => {
-        {
-            #[cfg($name)]
-            fn $name() -> Result<Builder, LookupError> {
-                Ok($module::isa_builder())
-            };
-            #[cfg(not($name))]
-            fn $name() -> Result<Builder, LookupError> {
-                Err(LookupError::Unsupported)
-            }
-            $name()
+    ($module:ident, $name:ident) => {{
+        #[cfg($name)]
+        fn $name() -> Result<Builder, LookupError> {
+            Ok($module::isa_builder())
+        };
+        #[cfg(not($name))]
+        fn $name() -> Result<Builder, LookupError> {
+            Err(LookupError::Unsupported)
        }
-    };
+        $name()
+    }};
 }

 /// Look for a supported ISA with the given `name`.
@@ -158,6 +156,11 @@ pub trait TargetIsa: fmt::Display {
    /// Get the ISA-independent flags that were used to make this trait object.
    fn flags(&self) -> &settings::Flags;

+    /// Does the CPU implement scalar comparisons using a CPU flags register?
+    fn uses_cpu_flags(&self) -> bool {
+        false
+    }
+
    /// Get a data structure describing the registers in this ISA.
    fn register_info(&self) -> RegInfo;

@@ -243,8 +246,8 @@ pub trait TargetIsa: fmt::Display {
    fn prologue_epilogue(&self, func: &mut ir::Function) -> result::CtonResult {
        let _tt = timing::prologue_epilogue();
        // This default implementation is unlikely to be good enough.
+        use ir::stackslot::{StackOffset, StackSize};
        use stack_layout::layout_stack;
-        use ir::stackslot::{StackSize, StackOffset};

        let word_size = if self.flags().is_64bit() { 8 } else { 4 };

--- a/lib/cretonne/src/isa/riscv/abi.rs
+++ b/lib/cretonne/src/isa/riscv/abi.rs
@@ -5,13 +5,13 @@
 //!
 //! This doesn't support the soft-float ABI at the moment.

-use abi::{ArgAction, ValueConversion, ArgAssigner, legalize_args};
-use ir::{self, Type, AbiParam, ArgumentLoc, ArgumentExtension, ArgumentPurpose};
+use super::registers::{FPR, GPR};
+use super::settings;
+use abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
+use ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, Type};
 use isa::RegClass;
 use regalloc::AllocatableSet;
 use settings as shared_settings;
-use super::registers::{GPR, FPR};
-use super::settings;
 use std::i32;

 struct Args {
--- a/lib/cretonne/src/isa/riscv/binemit.rs
+++ b/lib/cretonne/src/isa/riscv/binemit.rs
@@ -1,8 +1,8 @@
 //! Emitting binary RISC-V machine code.

-use binemit::{CodeSink, Reloc, bad_encoding};
+use binemit::{bad_encoding, CodeSink, Reloc};
 use ir::{Function, Inst, InstructionData};
-use isa::{RegUnit, StackRef, StackBaseMask};
+use isa::{RegUnit, StackBaseMask, StackRef};
 use predicates::is_signed_int;
 use regalloc::RegDiversions;
 use std::u32;
--- a/lib/cretonne/src/isa/riscv/enc_tables.rs
+++ b/lib/cretonne/src/isa/riscv/enc_tables.rs
@@ -1,12 +1,12 @@
 //! Encoding tables for RISC-V.

+use super::registers::*;
 use ir;
 use isa;
 use isa::constraints::*;
 use isa::enc_tables::*;
 use isa::encoding::RecipeSizing;
 use predicates;
-use super::registers::*;

 // Include the generated encoding tables:
 // - `LEVEL1_RV32`
--- a/lib/cretonne/src/isa/riscv/mod.rs
+++ b/lib/cretonne/src/isa/riscv/mod.rs
@@ -1,20 +1,20 @@
 //! RISC-V Instruction Set Architecture.

-pub mod settings;
 mod abi;
 mod binemit;
 mod enc_tables;
 mod registers;
+pub mod settings;

 use super::super::settings as shared_settings;
-use binemit::{CodeSink, MemoryCodeSink, emit_function};
-use isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
-use isa::Builder as IsaBuilder;
-use isa::{TargetIsa, RegInfo, RegClass, EncInfo};
+use binemit::{emit_function, CodeSink, MemoryCodeSink};
 use ir;
+use isa::Builder as IsaBuilder;
+use isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
+use isa::{EncInfo, RegClass, RegInfo, TargetIsa};
 use regalloc;
-use std::fmt;
 use std::boxed::Box;
+use std::fmt;

 #[allow(dead_code)]
 struct Isa {
@@ -113,10 +113,10 @@ impl TargetIsa for Isa {

 #[cfg(test)]
 mod tests {
-    use settings::{self, Configurable};
-    use isa;
    use ir::{DataFlowGraph, InstructionData, Opcode};
-    use ir::{types, immediates};
+    use ir::{immediates, types};
+    use isa;
+    use settings::{self, Configurable};
    use std::string::{String, ToString};

    fn encstr(isa: &isa::TargetIsa, enc: Result<isa::Encoding, isa::Legalize>) -> String {
--- a/lib/cretonne/src/isa/riscv/registers.rs
+++ b/lib/cretonne/src/isa/riscv/registers.rs
@@ -6,7 +6,7 @@ include!(concat!(env!("OUT_DIR"), "/registers-riscv.rs"));

 #[cfg(test)]
 mod tests {
-    use super::{INFO, GPR, FPR};
+    use super::{FPR, GPR, INFO};
    use isa::RegUnit;
    use std::string::{String, ToString};

--- a/lib/cretonne/src/isa/riscv/settings.rs
+++ b/lib/cretonne/src/isa/riscv/settings.rs
@@ -22,12 +22,12 @@ mod tests {
        assert_eq!(
            f.to_string(),
            "[riscv]\n\
-                    supports_m = false\n\
-                    supports_a = false\n\
-                    supports_f = false\n\
-                    supports_d = false\n\
-                    enable_m = true\n\
-                    enable_e = false\n"
+             supports_m = false\n\
+             supports_a = false\n\
+             supports_f = false\n\
+             supports_d = false\n\
+             enable_m = true\n\
+             enable_e = false\n"
        );
        // Predicates are not part of the Display output.
        assert_eq!(f.full_float(), false);
--- a/lib/cretonne/src/isa/stack.rs
+++ b/lib/cretonne/src/isa/stack.rs
@@ -4,8 +4,8 @@
 //! defined in this module expresses the low-level details of accessing a stack slot from an
 //! encoded instruction.

-use ir::stackslot::{StackSlots, StackOffset, StackSlotKind};
 use ir::StackSlot;
+use ir::stackslot::{StackOffset, StackSlotKind, StackSlots};

 /// A method for referencing a stack slot in the current stack frame.
 ///
@@ -68,6 +68,8 @@ pub enum StackBase {
    FP = 1,

    /// Use an explicit zone pointer in a general-purpose register.
+    ///
+    /// This feature is not yet implemented.
    Zone = 2,
 }

--- a/lib/cretonne/src/legalizer/boundary.rs
+++ b/lib/cretonne/src/legalizer/boundary.rs
@@ -20,9 +20,9 @@
 use abi::{legalize_abi_value, ValueConversion};
 use cursor::{Cursor, FuncCursor};
 use flowgraph::ControlFlowGraph;
-use ir::{Function, DataFlowGraph, Inst, InstBuilder, Ebb, Type, Value, Signature, SigRef,
-         AbiParam, ArgumentPurpose, ArgumentLoc, ValueLoc};
 use ir::instructions::CallInfo;
+use ir::{AbiParam, ArgumentLoc, ArgumentPurpose, DataFlowGraph, Ebb, Function, Inst, InstBuilder,
+         SigRef, Signature, Type, Value, ValueLoc};
 use isa::TargetIsa;
 use legalizer::split::{isplit, vsplit};
 use std::vec::Vec;
@@ -35,9 +35,9 @@ use std::vec::Vec;
 pub fn legalize_signatures(func: &mut Function, isa: &TargetIsa) {
    isa.legalize_signature(&mut func.signature, true);
    func.signature.compute_argument_bytes();
-    for sig in func.dfg.signatures.keys() {
-        isa.legalize_signature(&mut func.dfg.signatures[sig], false);
-        func.dfg.signatures[sig].compute_argument_bytes();
+    for sig_data in func.dfg.signatures.values_mut() {
+        isa.legalize_signature(sig_data, false);
+        sig_data.compute_argument_bytes();
    }

    if let Some(entry) = func.layout.entry_block() {
--- a/lib/cretonne/src/legalizer/globalvar.rs
+++ b/lib/cretonne/src/legalizer/globalvar.rs
@@ -45,15 +45,18 @@ fn vmctx_addr(inst: ir::Inst, func: &mut ir::Function, offset: i64) {
 /// Expand a `global_addr` instruction for a deref global.
 fn deref_addr(inst: ir::Inst, func: &mut ir::Function, base: ir::GlobalVar, offset: i64) {
    // We need to load a pointer from the `base` global variable, so insert a new `global_addr`
-    // instruction. This depends on the iterative legalization loop. Note that the IL verifier
+    // instruction. This depends on the iterative legalization loop. Note that the IR verifier
    // detects any cycles in the `deref` globals.
    let ptr_ty = func.dfg.value_type(func.dfg.first_result(inst));
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);

    let base_addr = pos.ins().global_addr(ptr_ty, base);
-    // TODO: We could probably set both `notrap` and `aligned` on this load instruction.
-    let base_ptr = pos.ins().load(ptr_ty, ir::MemFlags::new(), base_addr, 0);
+    let mut mflags = ir::MemFlags::new();
+    // Deref globals are required to be accessible and aligned.
+    mflags.set_notrap();
+    mflags.set_aligned();
+    let base_ptr = pos.ins().load(ptr_ty, mflags, base_addr, 0);
    pos.func.dfg.replace(inst).iadd_imm(base_ptr, offset);
 }

--- a/lib/cretonne/src/legalizer/heap.rs
+++ b/lib/cretonne/src/legalizer/heap.rs
@@ -5,8 +5,8 @@

 use cursor::{Cursor, FuncCursor};
 use flowgraph::ControlFlowGraph;
-use ir::{self, InstBuilder, MemFlags};
 use ir::condcodes::IntCC;
+use ir::{self, InstBuilder, MemFlags};
 use isa::TargetIsa;

 /// Expand a `heap_addr` instruction according to the definition of the heap.
@@ -58,7 +58,11 @@ fn dynamic_addr(

    // Start with the bounds check. Trap if `offset + size > bound`.
    let bound_addr = pos.ins().global_addr(addr_ty, bound_gv);
-    let bound = pos.ins().load(offset_ty, MemFlags::new(), bound_addr, 0);
+    let mut mflags = MemFlags::new();
+    // The bound variable is requied to be accessible and aligned.
+    mflags.set_notrap();
+    mflags.set_aligned();
+    let bound = pos.ins().load(offset_ty, mflags, bound_addr, 0);

    let oob;
    if size == 1 {
@@ -175,7 +179,11 @@ fn offset_addr(
        ir::HeapBase::ReservedReg => unimplemented!(),
        ir::HeapBase::GlobalVar(base_gv) => {
            let base_addr = pos.ins().global_addr(addr_ty, base_gv);
-            let base = pos.ins().load(addr_ty, MemFlags::new(), base_addr, 0);
+            let mut mflags = MemFlags::new();
+            // The base address variable is requied to be accessible and aligned.
+            mflags.set_notrap();
+            mflags.set_aligned();
+            let base = pos.ins().load(addr_ty, mflags, base_addr, 0);
            pos.func.dfg.replace(inst).iadd(base, offset);
        }
    }
--- a/lib/cretonne/src/legalizer/libcall.rs
+++ b/lib/cretonne/src/legalizer/libcall.rs
@@ -29,8 +29,8 @@ pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function) -> bool {
 fn find_funcref(libcall: ir::LibCall, func: &ir::Function) -> Option<ir::FuncRef> {
    // We're assuming that all libcall function decls are at the end.
    // If we get this wrong, worst case we'll have duplicate libcall decls which is harmless.
-    for fref in func.dfg.ext_funcs.keys().rev() {
-        match func.dfg.ext_funcs[fref].name {
+    for (fref, func_data) in func.dfg.ext_funcs.iter().rev() {
+        match func_data.name {
            ir::ExternalName::LibCall(lc) => {
                if lc == libcall {
                    return Some(fref);
@@ -44,8 +44,8 @@ fn find_funcref(libcall: ir::LibCall, func: &ir::Function) -> Option<ir::FuncRef

 /// Create a funcref for `libcall` with a signature matching `inst`.
 fn make_funcref(libcall: ir::LibCall, inst: ir::Inst, func: &mut ir::Function) -> ir::FuncRef {
-    // Start with a native calling convention. We'll give the ISA a chance to change it.
-    let mut sig = ir::Signature::new(ir::CallConv::Native);
+    // Start with a system_v calling convention. We'll give the ISA a chance to change it.
+    let mut sig = ir::Signature::new(ir::CallConv::SystemV);
    for &v in func.dfg.inst_args(inst) {
        sig.params.push(ir::AbiParam::new(func.dfg.value_type(v)));
    }
--- a/lib/cretonne/src/legalizer/mod.rs
+++ b/lib/cretonne/src/legalizer/mod.rs
@@ -13,11 +13,11 @@
 //! The legalizer does not deal with register allocation constraints. These constraints are derived
 //! from the encoding recipes, and solved later by the register allocator.

+use bitset::BitSet;
 use cursor::{Cursor, FuncCursor};
 use flowgraph::ControlFlowGraph;
 use ir::{self, InstBuilder};
 use isa::TargetIsa;
-use bitset::BitSet;
 use timing;

 mod boundary;
@@ -56,28 +56,24 @@ pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, is
            let opcode = pos.func.dfg[inst].opcode();

            // Check for ABI boundaries that need to be converted to the legalized signature.
-            if opcode.is_call() && boundary::handle_call_abi(inst, pos.func, cfg) {
-                // Go back and legalize the inserted argument conversion instructions.
-                pos.set_position(prev_pos);
-                continue;
-            }
-
-            if opcode.is_return() && boundary::handle_return_abi(inst, pos.func, cfg) {
-                // Go back and legalize the inserted return value conversion instructions.
-                pos.set_position(prev_pos);
-                continue;
-            }
-
-            if opcode.is_branch() {
+            if opcode.is_call() {
+                if boundary::handle_call_abi(inst, pos.func, cfg) {
+                    // Go back and legalize the inserted argument conversion instructions.
+                    pos.set_position(prev_pos);
+                    continue;
+                }
+            } else if opcode.is_return() {
+                if boundary::handle_return_abi(inst, pos.func, cfg) {
+                    // Go back and legalize the inserted return value conversion instructions.
+                    pos.set_position(prev_pos);
+                    continue;
+                }
+            } else if opcode.is_branch() {
                split::simplify_branch_arguments(&mut pos.func.dfg, inst);
            }

-            match isa.encode(
-                &pos.func.dfg,
-                &pos.func.dfg[inst],
-                pos.func.dfg.ctrl_typevar(inst),
-            ) {
-                Ok(encoding) => pos.func.encodings[inst] = encoding,
+            match pos.func.update_encoding(inst, isa) {
+                Ok(()) => {}
                Err(action) => {
                    // We should transform the instruction into legal equivalents.
                    let changed = action(inst, pos.func, cfg, isa);
@@ -239,7 +235,6 @@ fn expand_select(
    cfg.recompute_ebb(pos.func, old_ebb);
 }

-
 /// Expand illegal `f32const` and `f64const` instructions.
 fn expand_fconst(
    inst: ir::Inst,
--- a/lib/cretonne/src/legalizer/split.rs
+++ b/lib/cretonne/src/legalizer/split.rs
@@ -66,7 +66,7 @@

 use cursor::{Cursor, CursorPosition, FuncCursor};
 use flowgraph::ControlFlowGraph;
-use ir::{self, Ebb, Inst, Value, Type, Opcode, ValueDef, InstructionData, InstBuilder};
+use ir::{self, Ebb, Inst, InstBuilder, InstructionData, Opcode, Type, Value, ValueDef};
 use std::iter;
 use std::vec::Vec;

@@ -229,7 +229,6 @@ fn split_value(
                let hi = pos.func.dfg.append_ebb_param(ebb, split_type);
                reuse = Some((lo, hi));

-
                // Now the original value is dangling. Insert a concatenation instruction that can
                // compute it from the two new parameters. This also serves as a record of what we
                // did so a future call to this function doesn't have to redo the work.
--- a/lib/cretonne/src/lib.rs
+++ b/lib/cretonne/src/lib.rs
@@ -1,8 +1,34 @@
 //! Cretonne code generation library.

-#![deny(missing_docs,
-        trivial_numeric_casts,
-        unused_extern_crates)]
+#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)]
+#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))]
+#![cfg_attr(feature="cargo-clippy", allow(
+// Rustfmt 0.9.0 is at odds with this lint:
+                block_in_if_condition_stmt,
+// Produces only a false positive:
+                while_let_loop,
+// Produces many false positives, but did produce some valid lints, now fixed:
+                needless_lifetimes,
+// Generated code makes some style transgressions, but readability doesn't suffer much:
+                many_single_char_names,
+                identity_op,
+                needless_borrow,
+                cast_lossless,
+                unreadable_literal,
+                assign_op_pattern,
+                empty_line_after_outer_attr,
+// Hard to avoid in generated code:
+                cyclomatic_complexity,
+                too_many_arguments,
+// Code generator doesn't have a way to collapse identical arms:
+                match_same_arms,
+// These are relatively minor style issues, but would be easy to fix:
+                new_without_default,
+                new_without_default_derive,
+                should_implement_trait,
+                redundant_field_names,
+                useless_let_if_seq,
+                len_without_is_empty))]

 // Turns on no_std and alloc features if std is not available.
 #![cfg_attr(not(feature = "std"), no_std)]
@@ -25,7 +51,7 @@ pub use verifier::verify_function;
 pub use write::write_function;

 /// Version number of the cretonne crate.
-pub const VERSION: &'static str = env!("CARGO_PKG_VERSION");
+pub const VERSION: &str = env!("CARGO_PKG_VERSION");

 #[macro_use]
 pub mod dbg;
@@ -34,6 +60,7 @@ pub mod entity;

 pub mod bforest;
 pub mod binemit;
+pub mod cfg_printer;
 pub mod cursor;
 pub mod dominator_tree;
 pub mod flowgraph;
@@ -41,6 +68,7 @@ pub mod ir;
 pub mod isa;
 pub mod loop_analysis;
 pub mod packed_option;
+pub mod print_errors;
 pub mod result;
 pub mod settings;
 pub mod timing;
@@ -50,11 +78,13 @@ mod abi;
 mod bitset;
 mod constant_hash;
 mod context;
+mod dce;
 mod divconst_magic_numbers;
 mod iterators;
 mod legalizer;
 mod licm;
 mod partition_slice;
+mod postopt;
 mod predicates;
 mod preopt;
 mod ref_slice;
--- a/lib/cretonne/src/licm.rs
+++ b/lib/cretonne/src/licm.rs
@@ -1,14 +1,14 @@
 //! A Loop Invariant Code Motion optimization pass

 use cursor::{Cursor, FuncCursor};
-use ir::{Function, Ebb, Inst, Value, Type, InstBuilder, Layout};
-use flowgraph::ControlFlowGraph;
-use std::collections::HashSet;
 use dominator_tree::DominatorTree;
 use entity::{EntityList, ListPool};
+use flowgraph::ControlFlowGraph;
+use ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, Layout, Opcode, Type, Value};
 use loop_analysis::{Loop, LoopAnalysis};
-use timing;
+use std::collections::HashSet;
 use std::vec::Vec;
+use timing;

 /// Performs the LICM pass by detecting loops within the CFG and moving
 /// loop-invariant instructions out of them.
@@ -27,10 +27,10 @@ pub fn do_licm(
    for lp in loop_analysis.loops() {
        // For each loop that we want to optimize we determine the set of loop-invariant
        // instructions
-        let invariant_inst = remove_loop_invariant_instructions(lp, func, cfg, loop_analysis);
+        let invariant_insts = remove_loop_invariant_instructions(lp, func, cfg, loop_analysis);
        // Then we create the loop's pre-header and fill it with the invariant instructions
        // Then we remove the invariant instructions from the loop body
-        if !invariant_inst.is_empty() {
+        if !invariant_insts.is_empty() {
            // If the loop has a natural pre-header we use it, otherwise we create it.
            let mut pos;
            match has_pre_header(&func.layout, cfg, domtree, loop_analysis.loop_header(lp)) {
@@ -47,7 +47,7 @@ pub fn do_licm(
            };
            // The last instruction of the pre-header is the termination instruction (usually
            // a jump) so we need to insert just before this.
-            for inst in invariant_inst {
+            for inst in invariant_insts {
                pos.insert_inst(inst);
            }
        }
@@ -121,7 +121,6 @@ fn has_pre_header(
    result
 }

-
 // Change the destination of a jump or branch instruction. Does nothing if called with a non-jump
 // or non-branch instruction.
 fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) {
@@ -131,6 +130,29 @@ fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function)
    }
 }

+/// Test whether the given opcode is unsafe to even consider for LICM.
+fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
+    opcode.can_load() || opcode.can_store() || opcode.is_call() || opcode.is_branch() ||
+        opcode.is_terminator() || opcode.is_return() ||
+        opcode.can_trap() || opcode.other_side_effects() || opcode.writes_cpu_flags()
+}
+
+/// Test whether the given instruction is loop-invariant.
+fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &HashSet<Value>) -> bool {
+    if trivially_unsafe_for_licm(dfg[inst].opcode()) {
+        return false;
+    }
+
+    let inst_args = dfg.inst_args(inst);
+    for arg in inst_args {
+        let arg = dfg.resolve_aliases(*arg);
+        if loop_values.contains(&arg) {
+            return false;
+        }
+    }
+    return true;
+}
+
 // Traverses a loop in reverse post-order from a header EBB and identify loop-invariant
 // instructions. These loop-invariant instructions are then removed from the code and returned
 // (in reverse post-order) for later use.
@@ -141,7 +163,7 @@ fn remove_loop_invariant_instructions(
    loop_analysis: &LoopAnalysis,
 ) -> Vec<Inst> {
    let mut loop_values: HashSet<Value> = HashSet::new();
-    let mut invariant_inst: Vec<Inst> = Vec::new();
+    let mut invariant_insts: Vec<Inst> = Vec::new();
    let mut pos = FuncCursor::new(func);
    // We traverse the loop EBB in reverse post-order.
    for ebb in postorder_ebbs_loop(loop_analysis, cfg, lp).iter().rev() {
@@ -150,15 +172,12 @@ fn remove_loop_invariant_instructions(
            loop_values.insert(*val);
        }
        pos.goto_top(*ebb);
-        while let Some(inst) = pos.next_inst() {
-            if pos.func.dfg.has_results(inst) &&
-                pos.func.dfg.inst_args(inst).into_iter().all(|arg| {
-                    !loop_values.contains(arg)
-                })
-            {
+        #[cfg_attr(feature = "cargo-clippy", allow(block_in_if_condition_stmt))]
+        'next_inst: while let Some(inst) = pos.next_inst() {
+            if is_loop_invariant(inst, &pos.func.dfg, &loop_values) {
                // If all the instruction's argument are defined outside the loop
                // then this instruction is loop-invariant
-                invariant_inst.push(inst);
+                invariant_insts.push(inst);
                // We remove it from the loop
                pos.remove_inst_and_step_back();
            } else {
@@ -170,7 +189,7 @@ fn remove_loop_invariant_instructions(
            }
        }
    }
-    invariant_inst
+    invariant_insts
 }

 /// Return ebbs from a loop in post-order, starting from an entry point in the block.
--- a/lib/cretonne/src/loop_analysis.rs
+++ b/lib/cretonne/src/loop_analysis.rs
@@ -2,13 +2,13 @@
 //! and parent in the loop tree.

 use dominator_tree::DominatorTree;
-use entity::{PrimaryMap, Keys};
 use entity::EntityMap;
+use entity::{Keys, PrimaryMap};
 use flowgraph::ControlFlowGraph;
-use ir::{Function, Ebb, Layout};
+use ir::{Ebb, Function, Layout};
 use packed_option::PackedOption;
-use timing;
 use std::vec::Vec;
+use timing;

 /// A opaque reference to a code loop.
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
@@ -218,7 +218,6 @@ impl LoopAnalysis {
                    }
                }
            }
-
        }
    }
 }
@@ -227,10 +226,10 @@ impl LoopAnalysis {
 mod test {

    use cursor::{Cursor, FuncCursor};
-    use ir::{Function, InstBuilder, types};
-    use loop_analysis::{Loop, LoopAnalysis};
-    use flowgraph::ControlFlowGraph;
    use dominator_tree::DominatorTree;
+    use flowgraph::ControlFlowGraph;
+    use ir::{types, Function, InstBuilder};
+    use loop_analysis::{Loop, LoopAnalysis};
    use std::vec::Vec;

    #[test]
@@ -257,7 +256,6 @@ mod test {

            cur.insert_ebb(ebb3);
            cur.ins().brnz(cond, ebb0, &[]);
-
        }

        let mut loop_analysis = LoopAnalysis::new();
@@ -317,7 +315,6 @@ mod test {

            cur.insert_ebb(ebb5);
            cur.ins().brnz(cond, ebb0, &[]);
-
        }

        let mut loop_analysis = LoopAnalysis::new();
--- a/lib/cretonne/src/partition_slice.rs
+++ b/lib/cretonne/src/partition_slice.rs
@@ -6,7 +6,7 @@
 /// The order of elements is not preserved, unless the slice is already partitioned.
 ///
 /// Returns the number of elements where `p(t)` is true.
-pub fn partition_slice<'a, T: 'a, F>(s: &'a mut [T], mut p: F) -> usize
+pub fn partition_slice<T, F>(s: &mut [T], mut p: F) -> usize
 where
    F: FnMut(&T) -> bool,
 {
--- a/Show More
+++ b/Show More