Merge remote-tracking branch 'origin/master' into no_std

2018-03-30 15:14:30 -07:00
parent fc7b0a7e51 b523b69c16
commit 07693048f0
260 changed files with 4509 additions and 5074 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,10 @@ language: rust
 rust:
    - stable
    - beta
+    - nightly
+matrix:
+    allow_failures:
+        - rust: nightly
 dist: trusty
 sudo: false
 addons:
--- a/README.rst
+++ b/README.rst
@@ -3,7 +3,7 @@ Cretonne Code Generator
 =======================

 Cretonne is a low-level retargetable code generator. It translates a `target-independent
-intermediate language <https://cretonne.readthedocs.io/en/latest/langref.html>`_ into executable
+intermediate representation <https://cretonne.readthedocs.io/en/latest/langref.html>`_ into executable
 machine code.

 *This is a work in progress that is not yet functional.*
@@ -16,6 +16,10 @@ machine code.
    :target: https://travis-ci.org/Cretonne/cretonne
    :alt: Build Status

+.. image:: https://badges.gitter.im/Cretonne/cretonne.png
+    :target: https://gitter.im/Cretonne/Lobby/~chat
+    :alt: Gitter chat
+
 For more information, see `the documentation
 <https://cretonne.readthedocs.io/en/latest/?badge=latest>`_.

--- a/check-clippy.sh
+++ b/check-clippy.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+set -euo pipefail
+
+# Usage: check-clippy.sh [--install]
+
+if cargo install --list | tee /dev/null | grep -q "^clippy v0"; then
+    exit 0
+else
+    exit 1
+fi
--- a/check-rustfmt.sh
+++ b/check-rustfmt.sh
@@ -22,11 +22,11 @@ set -euo pipefail
 # operation, however that doesn't appear to be possible through "cargo fmt").
 VERS="0.9.0"

-if cargo install --list | grep -q "^rustfmt v$VERS"; then
+if cargo install --list | tee /dev/null | grep -q "^rustfmt v$VERS"; then
    exit 0
 fi

-if [ "$1" != "--install" ]; then
+if [[ ${1:-""} != "--install" ]]; then
    echo "********************************************************************"
    echo "*  Please install rustfmt v$VERS to verify formatting.             *"
    echo "*  If a newer version of rustfmt is available, update this script. *"
--- a/clippy-all.sh
+++ b/clippy-all.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+set -euo pipefail
+
+# Check all sources with clippy.
+# In the cton-util crate (root dir) clippy will only work with nightly cargo -
+# there is a bug where it will reject the commands passed to it by cargo 0.25.0
+cargo +nightly clippy --all
--- a/cranelift/Cargo.toml
+++ b/cranelift/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "cretonne-tools"
 authors = ["The Cretonne Project Developers"]
-version = "0.3.4"
+version = "0.4.1"
 description = "Binaries for testing the Cretonne library"
 license = "Apache-2.0"
 documentation = "https://cretonne.readthedocs.io/"
@@ -13,18 +13,18 @@ name = "cton-util"
 path = "src/cton-util.rs"

 [dependencies]
-cretonne = { path = "lib/cretonne", version = "0.3.4" }
-cretonne-reader = { path = "lib/reader", version = "0.3.4" }
-cretonne-frontend = { path = "lib/frontend", version = "0.3.4" }
-cretonne-wasm = { path = "lib/wasm", version = "0.3.4" }
-cretonne-native = { path = "lib/native", version = "0.3.4" }
-filecheck = { path = "lib/filecheck" }
+cretonne = { path = "lib/cretonne", version = "0.4.1" }
+cretonne-reader = { path = "lib/reader", version = "0.4.1" }
+cretonne-frontend = { path = "lib/frontend", version = "0.4.1" }
+cretonne-wasm = { path = "lib/wasm", version = "0.4.1" }
+cretonne-native = { path = "lib/native", version = "0.4.1" }
+cretonne-filetests = { path = "lib/filetests", version = "0.4.1" }
+filecheck = "0.2.1"
 docopt = "0.8.0"
 serde = "1.0.8"
 serde_derive = "1.0.8"
-num_cpus = "1.5.1"
-tempdir="0.3.5"
-term = "0.5"
+tempdir = "0.3.5"
+term = "0.5.1"

 [workspace]

--- a/cranelift/clippy.toml
+++ b/cranelift/clippy.toml
@@ -0,0 +1 @@
+doc-valid-idents = [ "WebAssembly", "NaN", "SetCC" ]
--- a/cranelift/docs/callex.cton
+++ b/cranelift/docs/callex.cton
@@ -1,6 +1,6 @@
 test verifier

-function %gcd(i32 uext, i32 uext) -> i32 uext native {
+function %gcd(i32 uext, i32 uext) -> i32 uext system_v {
    fn1 = function %divmod(i32 uext, i32 uext) -> i32 uext, i32 uext

 ebb1(v1: i32, v2: i32):
--- a/cranelift/docs/compare-llvm.rst
+++ b/cranelift/docs/compare-llvm.rst
@@ -16,8 +16,8 @@ highlighting some of the differences and similarities. Both projects:
 - Use an ISA-agnostic input language in order to mostly abstract away the
  differences between target instruction set architectures.
 - Depend extensively on SSA form.
- Have both textual and in-memory forms of their primary intermediate language.
-  (LLVM also has a binary bitcode format; Cretonne doesn't.)
+- Have both textual and in-memory forms of their primary intermediate
+  representation. (LLVM also has a binary bitcode format; Cretonne doesn't.)
 - Can target multiple ISAs.
 - Can cross-compile by default without rebuilding the code generator.

@@ -41,8 +41,8 @@ LLVM uses multiple intermediate representations as it translates a program to
 binary machine code:

 `LLVM IR <https://llvm.org/docs/LangRef.html>`_
-    This is the primary intermediate language which has textual, binary, and
-    in-memory representations. It serves two main purposes:
+    This is the primary intermediate representation which has textual, binary, and
+    in-memory forms. It serves two main purposes:

    - An ISA-agnostic, stable(ish) input language that front ends can generate
      easily.
@@ -89,9 +89,9 @@ representation. Some target ISAs have a fast instruction selector that can
 translate simple code directly to MachineInstrs, bypassing SelectionDAG when
 possible.

-:doc:`Cretonne <langref>` uses a single intermediate language to cover these
-levels of abstraction. This is possible in part because of Cretonne's smaller
-scope.
+:doc:`Cretonne <langref>` uses a single intermediate representation to cover
+these levels of abstraction. This is possible in part because of Cretonne's
+smaller scope.

 - Cretonne does not provide assemblers and disassemblers, so it is not
  necessary to be able to represent every weird instruction in an ISA. Only
@@ -102,7 +102,7 @@ scope.
 - SSA form is preserved throughout. After register allocation, each SSA value
  is annotated with an assigned ISA register or stack slot.

-The Cretonne intermediate language is similar to LLVM IR, but at a slightly
+The Cretonne intermediate representation is similar to LLVM IR, but at a slightly
 lower level of abstraction.

 Program structure
@@ -112,12 +112,12 @@ In LLVM IR, the largest representable unit is the *module* which corresponds
 more or less to a C translation unit. It is a collection of functions and
 global variables that may contain references to external symbols too.

-In Cretonne IL, the largest representable unit is the *function*. This is so
+In Cretonne IR, the largest representable unit is the *function*. This is so
 that functions can easily be compiled in parallel without worrying about
 references to shared data structures. Cretonne does not have any
 inter-procedural optimizations like inlining.

-An LLVM IR function is a graph of *basic blocks*. A Cretonne IL function is a
+An LLVM IR function is a graph of *basic blocks*. A Cretonne IR function is a
 graph of *extended basic blocks* that may contain internal branch instructions.
 The main difference is that an LLVM conditional branch instruction has two
 target basic blocks---a true and a false edge. A Cretonne branch instruction
--- a/cranelift/docs/cton_domain.py
+++ b/cranelift/docs/cton_domain.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Sphinx domain for documenting compiler intermediate languages.
+# Sphinx domain for documenting compiler intermediate representations.
 #
 # This defines a 'cton' Sphinx domain with the following directives and roles:
 #
@@ -29,10 +29,10 @@ import sphinx.ext.autodoc

 class CtonObject(ObjectDescription):
    """
-    Any kind of Cretonne IL object.
+    Any kind of Cretonne IR object.

    This is a shared base class for the different kinds of indexable objects
-    in the Cretonne IL reference.
+    in the Cretonne IR reference.
    """
    option_spec = {
        'noindex': directives.flag,
@@ -98,7 +98,7 @@ def parse_type(name, signode):


 class CtonType(CtonObject):
-    """A Cretonne IL type description."""
+    """A Cretonne IR type description."""

    def handle_signature(self, sig, signode):
        """
@@ -112,7 +112,7 @@ class CtonType(CtonObject):
        return name

    def get_index_text(self, name):
-        return name + ' (IL type)'
+        return name + ' (IR type)'


 sep_equal = re.compile('\s*=\s*')
@@ -127,7 +127,7 @@ def parse_params(s, signode):


 class CtonInst(CtonObject):
-    """A Cretonne IL instruction."""
+    """A Cretonne IR instruction."""

    doc_field_types = [
        TypedField('argument', label=l_('Arguments'),
@@ -176,11 +176,11 @@ class CtonInst(CtonObject):


 class CtonInstGroup(CtonObject):
-    """A Cretonne IL instruction group."""
+    """A Cretonne IR instruction group."""


 class CretonneDomain(Domain):
-    """Cretonne domain for intermediate language objects."""
+    """Cretonne domain for IR objects."""
    name = 'cton'
    label = 'Cretonne'

--- a/cranelift/docs/example.cton
+++ b/cranelift/docs/example.cton
@@ -1,6 +1,6 @@
 test verifier

-function %average(i32, i32) -> f32 native {
+function %average(i32, i32) -> f32 system_v {
    ss1 = explicit_slot 8         ; Stack slot for ``sum``.

 ebb1(v1: i32, v2: i32):
--- a/cranelift/docs/langref.rst
+++ b/cranelift/docs/langref.rst
@@ -5,19 +5,19 @@ Cretonne Language Reference
 .. default-domain:: cton
 .. highlight:: cton

-The Cretonne intermediate language (:term:`IL`) has two equivalent
-representations: an *in-memory data structure* that the code generator library
-is using, and a *text format* which is used for test cases and debug output.
-Files containing Cretonne textual IL have the ``.cton`` filename extension.
+The Cretonne intermediate representation (:term:`IR`) has two primary forms:
+an *in-memory data structure* that the code generator library is using, and a
+*text format* which is used for test cases and debug output.
+Files containing Cretonne textual IR have the ``.cton`` filename extension.

-This reference uses the text format to describe IL semantics but glosses over
+This reference uses the text format to describe IR semantics but glosses over
 the finer details of the lexical and syntactic structure of the format.


 Overall structure
 =================

-Cretonne compiles functions independently. A ``.cton`` IL file may contain
+Cretonne compiles functions independently. A ``.cton`` IR file may contain
 multiple functions, and the programmatic API can create multiple function
 handles at the same time, but the functions don't share any data or reference
 each other directly.
@@ -27,7 +27,7 @@ This is a simple C function that computes the average of an array of floats:
 .. literalinclude:: example.c
    :language: c

-Here is the same function compiled into Cretonne IL:
+Here is the same function compiled into Cretonne IR:

 .. literalinclude:: example.cton
    :language: cton
@@ -77,7 +77,7 @@ variable value for the next iteration.

 The `cton_frontend` crate contains utilities for translating from programs
 containing multiple assignments to the same variables into SSA form for
-Cretonne :term:`IL`.
+Cretonne :term:`IR`.

 Such variables can also be presented to Cretonne as :term:`stack slot`\s.
 Stack slots are accessed with the :inst:`stack_store` and :inst:`stack_load`
@@ -303,7 +303,7 @@ indicate the different kinds of immediate operands on an instruction.
    A floating point condition code. See the :inst:`fcmp` instruction for details.

 The two IEEE floating point immediate types :type:`ieee32` and :type:`ieee64`
-are displayed as hexadecimal floating point literals in the textual :term:`IL`
+are displayed as hexadecimal floating point literals in the textual :term:`IR`
 format. Decimal floating point literals are not allowed because some computer
 systems can round differently when converting to binary. The hexadecimal
 floating point format is mostly the same as the one used by C99, but extended
@@ -400,11 +400,11 @@ convention:
    param        : type [paramext] [paramspecial]
    paramext     : "uext" | "sext"
    paramspecial : "sret" | "link" | "fp" | "csr" | "vmctx"
-    callconv     : "native" | "spiderwasm"
+    callconv     : "system_v" | "spiderwasm"

 Parameters and return values have flags whose meaning is mostly target
-dependent. They make it possible to call native functions on the target
-platform. When calling other Cretonne functions, the flags are not necessary.
+dependent. These flags support interfacing with code produced by other
+compilers.

 Functions that are called directly must be declared in the :term:`function
 preamble`:
@@ -563,7 +563,7 @@ runtime data structures.
    alignment for storing a pointer.

    Chains of ``deref`` global variables are possible, but cycles are not
-    allowed. They will be caught by the IL verifier.
+    allowed. They will be caught by the IR verifier.

    :arg BaseGV: Global variable containing the base pointer.
    :arg Offset: Byte offset from the loaded base pointer to the global
@@ -654,6 +654,11 @@ trap when accessed.
            address space reserved for the heap, not including the guard pages.
    :arg GuardBytes: Size of the guard pages in bytes.

+When the base is a global variable, it must be :term:`accessible` and naturally
+aligned for a pointer value.
+
+The ``reserved_reg`` option is not yet implemented.
+
 Dynamic heaps
 ~~~~~~~~~~~~~

@@ -672,6 +677,11 @@ is resized. The bound of a dynamic heap is stored in a global variable.
    :arg BoundGV: Global variable containing the current heap bound in bytes.
    :arg GuardBytes: Size of the guard pages in bytes.

+When the base is a global variable, it must be :term:`accessible` and naturally
+aligned for a pointer value.
+
+The ``reserved_reg`` option is not yet implemented.
+
 Heap examples
 ~~~~~~~~~~~~~

@@ -1144,19 +1154,11 @@ Glossary
        The extended basic blocks which contain all the executable code in a
        function. The function body follows the function preamble.

-    intermediate language
-    IL
-        The language used to describe functions to Cretonne. This reference
-        describes the syntax and semantics of the Cretonne IL. The IL has two
-        forms: Textual and an in-memory intermediate representation
-        (:term:`IR`).
-
    intermediate representation
    IR
-        The in-memory representation of :term:`IL`. The data structures
-        Cretonne uses to represent a program internally are called the
-        intermediate representation. Cretonne's IR can be converted to text
-        losslessly.
+        The language used to describe functions to Cretonne. This reference
+        describes the syntax and semantics of Cretonne IR. The IR has two
+        forms: Textual, and an in-memory data structure.

    stack slot
        A fixed size memory allocation in the current function's activation
--- a/cranelift/docs/testing.rst
+++ b/cranelift/docs/testing.rst
@@ -89,7 +89,7 @@ easier to provide substantial input functions for the compiler tests.

 File tests are :file:`*.cton` files in the :file:`filetests/` directory
 hierarchy. Each file has a header describing what to test followed by a number
-of input functions in the :doc:`Cretonne textual intermediate language
+of input functions in the :doc:`Cretonne textual intermediate representation
 <langref>`:

 .. productionlist::
@@ -136,13 +136,15 @@ This example will run the legalizer test twice. Both runs will have
 ``opt_level=best``, but they will have different ``is_64bit`` settings. The 32-bit
 run will also have the RISC-V specific flag ``supports_m`` disabled.

+The filetests are run automatically as part of `cargo test`, and they can
+also be run manually with the `cton-util test` command.
+
 Filecheck
 ---------

 Many of the test commands described below use *filecheck* to verify their
 output. Filecheck is a Rust implementation of the LLVM tool of the same name.
-See the :file:`lib/filecheck` `documentation <https://docs.rs/filecheck/>`_ for
-details of its syntax.
+See the `documentation <https://docs.rs/filecheck/>`_ for details of its syntax.

 Comments in :file:`.cton` files are associated with the entity they follow.
 This typically means an instruction or the whole function. Those tests that
@@ -164,7 +166,7 @@ Cretonne's tests don't need this.
 ----------

 This is one of the simplest file tests, used for testing the conversion to and
-from textual IL. The ``test cat`` command simply parses each function and
+from textual IR. The ``test cat`` command simply parses each function and
 converts it back to text again. The text of each function is then matched
 against the associated filecheck directives.

@@ -186,7 +188,7 @@ Example::
 `test verifier`
 ---------------

-Run each function through the IL verifier and check that it produces the
+Run each function through the IR verifier and check that it produces the
 expected error messages.

 Expected error messages are indicated with an ``error:`` directive *on the
@@ -324,6 +326,38 @@ Test the simple GVN pass.
 The simple GVN pass is run on each function, and then results are run
 through filecheck.

+`test licm`
+-----------------
+
+Test the LICM pass.
+
+The LICM pass is run on each function, and then results are run
+through filecheck.
+
+`test dce`
+-----------------
+
+Test the DCE pass.
+
+The DCE pass is run on each function, and then results are run
+through filecheck.
+
+`test preopt`
+-----------------
+
+Test the preopt pass.
+
+The preopt pass is run on each function, and then results are run
+through filecheck.
+
+`test postopt`
+-----------------
+
+Test the postopt pass.
+
+The postopt pass is run on each function, and then results are run
+through filecheck.
+
 `test compile`
 --------------

@@ -333,4 +367,4 @@ Each function is passed through the full ``Context::compile()`` function
 which is normally used to compile code. This type of test often depends
 on assertions or verifier errors, but it is also possible to use
 filecheck directives which will be matched against the final form of the
-Cretonne IL right before binary machine code emission.
+Cretonne IR right before binary machine code emission.
--- a/cranelift/filetests/dce/basic.cton
+++ b/cranelift/filetests/dce/basic.cton
@@ -0,0 +1,46 @@
+test dce
+
+function %simple() -> i32 {
+ebb0:
+    v2 = iconst.i32 2
+    v3 = iconst.i32 3
+    return v3
+}
+; sameln: function %simple
+; nextln: ebb0:
+; nextln:     v3 = iconst.i32 3
+; nextln:     return v3
+; nextln: }
+
+function %some_branching(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+    v3 = iconst.i32 70
+    v4 = iconst.i32 71
+    v5 = iconst.i32 72
+    v8 = iconst.i32 73
+    brz v0, ebb1
+    jump ebb2(v8)
+
+ebb1:
+    v2 = iadd v0, v3
+    return v0
+
+ebb2(v9: i32):
+    v6 = iadd v1, v4
+    v7 = iadd v6, v9
+    return v7
+}
+; sameln: function %some_branching
+; nextln: ebb0(v0: i32, v1: i32):
+; nextln:     v4 = iconst.i32 71
+; nextln:     v8 = iconst.i32 73
+; nextln:     brz v0, ebb1
+; nextln:     jump ebb2(v8)
+; nextln: 
+; nextln: ebb1:
+; nextln:     return v0
+; nextln: 
+; nextln: ebb2(v9: i32):
+; nextln:     v6 = iadd.i32 v1, v4
+; nextln:     v7 = iadd v6, v9
+; nextln:     return v7
--- a/cranelift/filetests/domtree/loops.cton
+++ b/cranelift/filetests/domtree/loops.cton
@@ -59,7 +59,7 @@ function %test(i32) {
 ; nextln: ebb5:
 ; nextln: }

-function %loop2(i32) native {
+function %loop2(i32) system_v {
    ebb0(v0: i32):
        brz v0, ebb1    ; dominates: ebb1 ebb3 ebb4 ebb5
        jump ebb2       ; dominates: ebb2
--- a/cranelift/filetests/domtree/loops2.cton
+++ b/cranelift/filetests/domtree/loops2.cton
@@ -43,7 +43,7 @@ function %loop1(i32) {
 ; nextln: ebb9:
 ; nextln: }

-function %loop2(i32) native {
+function %loop2(i32) system_v {
    ebb0(v0: i32):
        brz v0, ebb1    ; dominates: ebb1 ebb3 ebb4 ebb5
        jump ebb2       ; dominates: ebb2
--- a/cranelift/filetests/isa/intel/abi-bool.cton
+++ b/cranelift/filetests/isa/intel/abi-bool.cton
@@ -2,7 +2,7 @@ test compile
 set is_64bit=1
 isa intel haswell

-function %foo(i64, i64, i64, i32) -> b1 native {
+function %foo(i64, i64, i64, i32) -> b1 system_v {
 ebb3(v0: i64, v1: i64, v2: i64, v3: i32):
    v5 = icmp ne v2, v2
    v8 = iconst.i64 0
--- a/cranelift/filetests/isa/intel/abi32.cton
+++ b/cranelift/filetests/isa/intel/abi32.cton
@@ -5,14 +5,14 @@ isa intel
 ; regex: V=v\d+

 function %f() {
-    sig0 = (i32) -> i32 native
-    ; check: sig0 = (i32 [0]) -> i32 [%rax] native
+    sig0 = (i32) -> i32 system_v
+    ; check: sig0 = (i32 [0]) -> i32 [%rax] system_v

-    sig1 = (i64) -> b1 native
-    ; check: sig1 = (i32 [0], i32 [4]) -> b1 [%rax] native
+    sig1 = (i64) -> b1 system_v
+    ; check: sig1 = (i32 [0], i32 [4]) -> b1 [%rax] system_v

-    sig2 = (f32, i64) -> f64 native
-    ; check: sig2 = (f32 [0], i32 [4], i32 [8]) -> f64 [%xmm0] native
+    sig2 = (f32, i64) -> f64 system_v
+    ; check: sig2 = (f32 [0], i32 [4], i32 [8]) -> f64 [%xmm0] system_v

 ebb0:
    return
--- a/cranelift/filetests/isa/intel/abi64.cton
+++ b/cranelift/filetests/isa/intel/abi64.cton
@@ -6,14 +6,14 @@ isa intel
 ; regex: V=v\d+

 function %f() {
-    sig0 = (i32) -> i32 native
-    ; check: sig0 = (i32 [%rdi]) -> i32 [%rax] native
+    sig0 = (i32) -> i32 system_v
+    ; check: sig0 = (i32 [%rdi]) -> i32 [%rax] system_v

-    sig1 = (i64) -> b1 native
-    ; check: sig1 = (i64 [%rdi]) -> b1 [%rax] native
+    sig1 = (i64) -> b1 system_v
+    ; check: sig1 = (i64 [%rdi]) -> b1 [%rax] system_v

-    sig2 = (f32, i64) -> f64 native
-    ; check: sig2 = (f32 [%xmm0], i64 [%rdi]) -> f64 [%xmm0] native
+    sig2 = (f32, i64) -> f64 system_v
+    ; check: sig2 = (f32 [%xmm0], i64 [%rdi]) -> f64 [%xmm0] system_v

 ebb0:
    return
--- a/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount.cton
+++ b/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount.cton
@@ -47,28 +47,23 @@ ebb1(v20: i32):
 function %i64_popcount(i64) -> i64 {
 ebb0(v30: i64):
  v31 = popcnt v30;
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: iconst.i64
  ; check: band
  ; check: isub
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: band
  ; check: isub
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: band
  ; check: isub
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: iadd
  ; check: iconst.i64
  ; check: band
  ; check: iconst.i64
  ; check: imul
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  return v31;
 }

@@ -78,27 +73,22 @@ ebb0(v30: i64):
 function %i32_popcount(i32) -> i32 {
 ebb0(v40: i32):
  v41 = popcnt v40;
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: iconst.i32
  ; check: band
  ; check: isub
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: band
  ; check: isub
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: band
  ; check: isub
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: iadd
  ; check: iconst.i32
  ; check: band
  ; check: iconst.i32
  ; check: imul
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  return v41;
 }
--- a/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount_encoding.cton
+++ b/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount_encoding.cton
@@ -15,56 +15,56 @@ ebb0:

    [-,%r11]                 v10 = iconst.i64 0x1234
    ; asm: bsfq %r11, %rcx
-    [-,%rcx,%eflags]         v11, v12 = x86_bsf v10    ; bin: 49 0f bc cb
+    [-,%rcx,%rflags]         v11, v12 = x86_bsf v10    ; bin: 49 0f bc cb

    [-,%rdx]                 v14 = iconst.i64 0x5678
    ; asm: bsfq %rdx, %r12
-    [-,%r12,%eflags]         v15, v16 = x86_bsf v14    ; bin: 4c 0f bc e2
+    [-,%r12,%rflags]         v15, v16 = x86_bsf v14    ; bin: 4c 0f bc e2

    ; asm: bsfq %rdx, %rdi
-    [-,%rdi,%eflags]         v17, v18 = x86_bsf v14    ; bin: 48 0f bc fa
+    [-,%rdi,%rflags]         v17, v18 = x86_bsf v14    ; bin: 48 0f bc fa


    ; 32-bit wide bsf

    [-,%r11]                 v20 = iconst.i32 0x1234
    ; asm: bsfl %r11d, %ecx
-    [-,%rcx,%eflags]         v21, v22 = x86_bsf v20    ; bin: 41 0f bc cb
+    [-,%rcx,%rflags]         v21, v22 = x86_bsf v20    ; bin: 41 0f bc cb

    [-,%rdx]                 v24 = iconst.i32 0x5678
    ; asm: bsfl %edx, %r12d
-    [-,%r12,%eflags]         v25, v26 = x86_bsf v24    ; bin: 44 0f bc e2
+    [-,%r12,%rflags]         v25, v26 = x86_bsf v24    ; bin: 44 0f bc e2

    ; asm: bsfl %edx, %esi
-    [-,%rsi,%eflags]         v27, v28 = x86_bsf v24    ; bin: 0f bc f2
+    [-,%rsi,%rflags]         v27, v28 = x86_bsf v24    ; bin: 0f bc f2


    ; 64-bit wide bsr

    [-,%r11]                 v30 = iconst.i64 0x1234
    ; asm: bsrq %r11, %rcx
-    [-,%rcx,%eflags]         v31, v32 = x86_bsr v30    ; bin: 49 0f bd cb
+    [-,%rcx,%rflags]         v31, v32 = x86_bsr v30    ; bin: 49 0f bd cb

    [-,%rdx]                 v34 = iconst.i64 0x5678
    ; asm: bsrq %rdx, %r12
-    [-,%r12,%eflags]         v35, v36 = x86_bsr v34    ; bin: 4c 0f bd e2
+    [-,%r12,%rflags]         v35, v36 = x86_bsr v34    ; bin: 4c 0f bd e2

    ; asm: bsrq %rdx, %rdi
-    [-,%rdi,%eflags]         v37, v38 = x86_bsr v34    ; bin: 48 0f bd fa
+    [-,%rdi,%rflags]         v37, v38 = x86_bsr v34    ; bin: 48 0f bd fa


    ; 32-bit wide bsr

    [-,%r11]                 v40 = iconst.i32 0x1234
    ; asm: bsrl %r11d, %ecx
-    [-,%rcx,%eflags]         v41, v42 = x86_bsr v40    ; bin: 41 0f bd cb
+    [-,%rcx,%rflags]         v41, v42 = x86_bsr v40    ; bin: 41 0f bd cb

    [-,%rdx]                 v44 = iconst.i32 0x5678
    ; asm: bsrl %edx, %r12d
-    [-,%r12,%eflags]         v45, v46 = x86_bsr v44    ; bin: 44 0f bd e2
+    [-,%r12,%rflags]         v45, v46 = x86_bsr v44    ; bin: 44 0f bd e2

    ; asm: bsrl %edx, %esi
-    [-,%rsi,%eflags]         v47, v48 = x86_bsr v44    ; bin: 0f bd f2
+    [-,%rsi,%rflags]         v47, v48 = x86_bsr v44    ; bin: 0f bd f2


    ; 64-bit wide cmov
--- a/cranelift/filetests/isa/intel/binary32-float.cton
+++ b/cranelift/filetests/isa/intel/binary32-float.cton
@@ -147,48 +147,48 @@ ebb0:

    ; Load/Store

-    ; asm: movd (%ecx), %xmm5
-    [-,%xmm5]           v100 = load.f32 v0                      ; bin: 66 0f 6e 29
-    ; asm: movd (%esi), %xmm2
-    [-,%xmm2]           v101 = load.f32 v1                      ; bin: 66 0f 6e 16
-    ; asm: movd 50(%ecx), %xmm5
-    [-,%xmm5]           v110 = load.f32 v0+50                   ; bin: 66 0f 6e 69 32
-    ; asm: movd -50(%esi), %xmm2
-    [-,%xmm2]           v111 = load.f32 v1-50                   ; bin: 66 0f 6e 56 ce
-    ; asm: movd 10000(%ecx), %xmm5
-    [-,%xmm5]           v120 = load.f32 v0+10000                ; bin: 66 0f 6e a9 00002710
-    ; asm: movd -10000(%esi), %xmm2
-    [-,%xmm2]           v121 = load.f32 v1-10000                ; bin: 66 0f 6e 96 ffffd8f0
+    ; asm: movss (%ecx), %xmm5
+    [-,%xmm5]           v100 = load.f32 v0                      ; bin: heap_oob f3 0f 10 29
+    ; asm: movss (%esi), %xmm2
+    [-,%xmm2]           v101 = load.f32 v1                      ; bin: heap_oob f3 0f 10 16
+    ; asm: movss 50(%ecx), %xmm5
+    [-,%xmm5]           v110 = load.f32 v0+50                   ; bin: heap_oob f3 0f 10 69 32
+    ; asm: movss -50(%esi), %xmm2
+    [-,%xmm2]           v111 = load.f32 v1-50                   ; bin: heap_oob f3 0f 10 56 ce
+    ; asm: movss 10000(%ecx), %xmm5
+    [-,%xmm5]           v120 = load.f32 v0+10000                ; bin: heap_oob f3 0f 10 a9 00002710
+    ; asm: movss -10000(%esi), %xmm2
+    [-,%xmm2]           v121 = load.f32 v1-10000                ; bin: heap_oob f3 0f 10 96 ffffd8f0

-    ; asm: movd %xmm5, (%ecx)
-    [-]                 store.f32 v100, v0                      ; bin: 66 0f 7e 29
-    ; asm: movd %xmm2, (%esi)
-    [-]                 store.f32 v101, v1                      ; bin: 66 0f 7e 16
-    ; asm: movd %xmm5, 50(%ecx)
-    [-]                 store.f32 v100, v0+50                   ; bin: 66 0f 7e 69 32
-    ; asm: movd %xmm2, -50(%esi)
-    [-]                 store.f32 v101, v1-50                   ; bin: 66 0f 7e 56 ce
-    ; asm: movd %xmm5, 10000(%ecx)
-    [-]                 store.f32 v100, v0+10000                ; bin: 66 0f 7e a9 00002710
-    ; asm: movd %xmm2, -10000(%esi)
-    [-]                 store.f32 v101, v1-10000                ; bin: 66 0f 7e 96 ffffd8f0
+    ; asm: movss %xmm5, (%ecx)
+    [-]                 store.f32 v100, v0                      ; bin: heap_oob f3 0f 11 29
+    ; asm: movss %xmm2, (%esi)
+    [-]                 store.f32 v101, v1                      ; bin: heap_oob f3 0f 11 16
+    ; asm: movss %xmm5, 50(%ecx)
+    [-]                 store.f32 v100, v0+50                   ; bin: heap_oob f3 0f 11 69 32
+    ; asm: movss %xmm2, -50(%esi)
+    [-]                 store.f32 v101, v1-50                   ; bin: heap_oob f3 0f 11 56 ce
+    ; asm: movss %xmm5, 10000(%ecx)
+    [-]                 store.f32 v100, v0+10000                ; bin: heap_oob f3 0f 11 a9 00002710
+    ; asm: movss %xmm2, -10000(%esi)
+    [-]                 store.f32 v101, v1-10000                ; bin: heap_oob f3 0f 11 96 ffffd8f0

    ; Spill / Fill.

-    ; asm: movd %xmm5, 1032(%esp)
-    [-,ss1]             v200 = spill v100                       ; bin: 66 0f 7e ac 24 00000408
-    ; asm: movd %xmm2, 1032(%esp)
-    [-,ss1]             v201 = spill v101                       ; bin: 66 0f 7e 94 24 00000408
+    ; asm: movss %xmm5, 1032(%esp)
+    [-,ss1]             v200 = spill v100                       ; bin: f3 0f 11 ac 24 00000408
+    ; asm: movss %xmm2, 1032(%esp)
+    [-,ss1]             v201 = spill v101                       ; bin: f3 0f 11 94 24 00000408

-    ; asm: movd 1032(%esp), %xmm5
-    [-,%xmm5]           v210 = fill v200                        ; bin: 66 0f 6e ac 24 00000408
-    ; asm: movd 1032(%esp), %xmm2
-    [-,%xmm2]           v211 = fill v201                        ; bin: 66 0f 6e 94 24 00000408
+    ; asm: movss 1032(%esp), %xmm5
+    [-,%xmm5]           v210 = fill v200                        ; bin: f3 0f 10 ac 24 00000408
+    ; asm: movss 1032(%esp), %xmm2
+    [-,%xmm2]           v211 = fill v201                        ; bin: f3 0f 10 94 24 00000408

-    ; asm: movd %xmm5, 1032(%rsp)
-    regspill v100, %xmm5 -> ss1                                 ; bin: 66 0f 7e ac 24 00000408
-    ; asm: movd 1032(%rsp), %xmm5
-    regfill v100, ss1 -> %xmm5                                  ; bin: 66 0f 6e ac 24 00000408
+    ; asm: movss %xmm5, 1032(%rsp)
+    regspill v100, %xmm5 -> ss1                                 ; bin: f3 0f 11 ac 24 00000408
+    ; asm: movss 1032(%rsp), %xmm5
+    regfill v100, ss1 -> %xmm5                                  ; bin: f3 0f 10 ac 24 00000408

    ; Comparisons.
    ;
@@ -221,11 +221,11 @@ ebb0:
    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 0f 2e d5 0f 96 c2

    ; asm: ucomiss %xmm2, %xmm5
-    [-,%eflags]         v310 = ffcmp v10, v11                   ; bin: 0f 2e ea
+    [-,%rflags]         v310 = ffcmp v10, v11                   ; bin: 0f 2e ea
    ; asm: ucomiss %xmm2, %xmm5
-    [-,%eflags]         v311 = ffcmp v11, v10                   ; bin: 0f 2e d5
+    [-,%rflags]         v311 = ffcmp v11, v10                   ; bin: 0f 2e d5
    ; asm: ucomiss %xmm5, %xmm5
-    [-,%eflags]         v312 = ffcmp v10, v10                   ; bin: 0f 2e ed
+    [-,%rflags]         v312 = ffcmp v10, v10                   ; bin: 0f 2e ed

    return
 }
@@ -362,48 +362,48 @@ ebb0:

    ; Load/Store

-    ; asm: movq (%ecx), %xmm5
-    [-,%xmm5]           v100 = load.f64 v0                      ; bin: f3 0f 7e 29
-    ; asm: movq (%esi), %xmm2
-    [-,%xmm2]           v101 = load.f64 v1                      ; bin: f3 0f 7e 16
-    ; asm: movq 50(%ecx), %xmm5
-    [-,%xmm5]           v110 = load.f64 v0+50                   ; bin: f3 0f 7e 69 32
-    ; asm: movq -50(%esi), %xmm2
-    [-,%xmm2]           v111 = load.f64 v1-50                   ; bin: f3 0f 7e 56 ce
-    ; asm: movq 10000(%ecx), %xmm5
-    [-,%xmm5]           v120 = load.f64 v0+10000                ; bin: f3 0f 7e a9 00002710
-    ; asm: movq -10000(%esi), %xmm2
-    [-,%xmm2]           v121 = load.f64 v1-10000                ; bin: f3 0f 7e 96 ffffd8f0
+    ; asm: movsd (%ecx), %xmm5
+    [-,%xmm5]           v100 = load.f64 v0                      ; bin: heap_oob f2 0f 10 29
+    ; asm: movsd (%esi), %xmm2
+    [-,%xmm2]           v101 = load.f64 v1                      ; bin: heap_oob f2 0f 10 16
+    ; asm: movsd 50(%ecx), %xmm5
+    [-,%xmm5]           v110 = load.f64 v0+50                   ; bin: heap_oob f2 0f 10 69 32
+    ; asm: movsd -50(%esi), %xmm2
+    [-,%xmm2]           v111 = load.f64 v1-50                   ; bin: heap_oob f2 0f 10 56 ce
+    ; asm: movsd 10000(%ecx), %xmm5
+    [-,%xmm5]           v120 = load.f64 v0+10000                ; bin: heap_oob f2 0f 10 a9 00002710
+    ; asm: movsd -10000(%esi), %xmm2
+    [-,%xmm2]           v121 = load.f64 v1-10000                ; bin: heap_oob f2 0f 10 96 ffffd8f0

-    ; asm: movq %xmm5, (%ecx)
-    [-]                 store.f64 v100, v0                      ; bin: 66 0f d6 29
-    ; asm: movq %xmm2, (%esi)
-    [-]                 store.f64 v101, v1                      ; bin: 66 0f d6 16
-    ; asm: movq %xmm5, 50(%ecx)
-    [-]                 store.f64 v100, v0+50                   ; bin: 66 0f d6 69 32
-    ; asm: movq %xmm2, -50(%esi)
-    [-]                 store.f64 v101, v1-50                   ; bin: 66 0f d6 56 ce
-    ; asm: movq %xmm5, 10000(%ecx)
-    [-]                 store.f64 v100, v0+10000                ; bin: 66 0f d6 a9 00002710
-    ; asm: movq %xmm2, -10000(%esi)
-    [-]                 store.f64 v101, v1-10000                ; bin: 66 0f d6 96 ffffd8f0
+    ; asm: movsd %xmm5, (%ecx)
+    [-]                 store.f64 v100, v0                      ; bin: heap_oob f2 0f 11 29
+    ; asm: movsd %xmm2, (%esi)
+    [-]                 store.f64 v101, v1                      ; bin: heap_oob f2 0f 11 16
+    ; asm: movsd %xmm5, 50(%ecx)
+    [-]                 store.f64 v100, v0+50                   ; bin: heap_oob f2 0f 11 69 32
+    ; asm: movsd %xmm2, -50(%esi)
+    [-]                 store.f64 v101, v1-50                   ; bin: heap_oob f2 0f 11 56 ce
+    ; asm: movsd %xmm5, 10000(%ecx)
+    [-]                 store.f64 v100, v0+10000                ; bin: heap_oob f2 0f 11 a9 00002710
+    ; asm: movsd %xmm2, -10000(%esi)
+    [-]                 store.f64 v101, v1-10000                ; bin: heap_oob f2 0f 11 96 ffffd8f0

    ; Spill / Fill.

-    ; asm: movq %xmm5, 1032(%esp)
-    [-,ss1]             v200 = spill v100                       ; bin: 66 0f d6 ac 24 00000408
-    ; asm: movq %xmm2, 1032(%esp)
-    [-,ss1]             v201 = spill v101                       ; bin: 66 0f d6 94 24 00000408
+    ; asm: movsd %xmm5, 1032(%esp)
+    [-,ss1]             v200 = spill v100                       ; bin: f2 0f 11 ac 24 00000408
+    ; asm: movsd %xmm2, 1032(%esp)
+    [-,ss1]             v201 = spill v101                       ; bin: f2 0f 11 94 24 00000408

-    ; asm: movq 1032(%esp), %xmm5
-    [-,%xmm5]           v210 = fill v200                        ; bin: f3 0f 7e ac 24 00000408
-    ; asm: movq 1032(%esp), %xmm2
-    [-,%xmm2]           v211 = fill v201                        ; bin: f3 0f 7e 94 24 00000408
+    ; asm: movsd 1032(%esp), %xmm5
+    [-,%xmm5]           v210 = fill v200                        ; bin: f2 0f 10 ac 24 00000408
+    ; asm: movsd 1032(%esp), %xmm2
+    [-,%xmm2]           v211 = fill v201                        ; bin: f2 0f 10 94 24 00000408

-    ; asm: movq %xmm5, 1032(%rsp)
-    regspill v100, %xmm5 -> ss1                                 ; bin: 66 0f d6 ac 24 00000408
-    ; asm: movq 1032(%rsp), %xmm5
-    regfill v100, ss1 -> %xmm5                                  ; bin: f3 0f 7e ac 24 00000408
+    ; asm: movsd %xmm5, 1032(%rsp)
+    regspill v100, %xmm5 -> ss1                                 ; bin: f2 0f 11 ac 24 00000408
+    ; asm: movsd 1032(%rsp), %xmm5
+    regfill v100, ss1 -> %xmm5                                  ; bin: f2 0f 10 ac 24 00000408

    ; Comparisons.
    ;
@@ -436,11 +436,11 @@ ebb0:
    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 66 0f 2e d5 0f 96 c2

    ; asm: ucomisd %xmm2, %xmm5
-    [-,%eflags]         v310 = ffcmp v10, v11                   ; bin: 66 0f 2e ea
+    [-,%rflags]         v310 = ffcmp v10, v11                   ; bin: 66 0f 2e ea
    ; asm: ucomisd %xmm2, %xmm5
-    [-,%eflags]         v311 = ffcmp v11, v10                   ; bin: 66 0f 2e d5
+    [-,%rflags]         v311 = ffcmp v11, v10                   ; bin: 66 0f 2e d5
    ; asm: ucomisd %xmm5, %xmm5
-    [-,%eflags]         v312 = ffcmp v10, v10                   ; bin: 66 0f 2e ed
+    [-,%rflags]         v312 = ffcmp v10, v10                   ; bin: 66 0f 2e ed

    return
 }
@@ -448,7 +448,7 @@ ebb0:
 function %cpuflags_float(f32 [%xmm0]) {
 ebb0(v0: f32 [%xmm0]):
    ; asm: ucomiss %xmm0, %xmm0
-    [-,%eflags]         v1 = ffcmp v0, v0                       ; bin: 0f 2e c0
+    [-,%rflags]         v1 = ffcmp v0, v0                       ; bin: 0f 2e c0

    jump ebb1

@@ -471,21 +471,21 @@ ebb1:
    brff ule v1, ebb1                                           ; bin: 76 f0

    ; asm: jp .+4; ud2
-    trapff ord v1, user0                                        ; bin: 7a 02 0f 0b
+    trapff ord v1, user0                                        ; bin: 7a 02 user0 0f 0b
    ; asm: jnp .+4; ud2
-    trapff uno v1, user0                                        ; bin: 7b 02 0f 0b
+    trapff uno v1, user0                                        ; bin: 7b 02 user0 0f 0b
    ; asm: je .+4; ud2
-    trapff one v1, user0                                        ; bin: 74 02 0f 0b
+    trapff one v1, user0                                        ; bin: 74 02 user0 0f 0b
    ; asm: jne .+4; ud2
-    trapff ueq v1, user0                                        ; bin: 75 02 0f 0b
+    trapff ueq v1, user0                                        ; bin: 75 02 user0 0f 0b
    ; asm: jna .+4; ud2
-    trapff gt v1, user0                                         ; bin: 76 02 0f 0b
+    trapff gt v1, user0                                         ; bin: 76 02 user0 0f 0b
    ; asm: jnae .+4; ud2
-    trapff ge v1, user0                                         ; bin: 72 02 0f 0b
+    trapff ge v1, user0                                         ; bin: 72 02 user0 0f 0b
    ; asm: jnb .+4; ud2
-    trapff ult v1, user0                                        ; bin: 73 02 0f 0b
+    trapff ult v1, user0                                        ; bin: 73 02 user0 0f 0b
    ; asm: jnbe .+4; ud2
-    trapff ule v1, user0                                        ; bin: 77 02 0f 0b
+    trapff ule v1, user0                                        ; bin: 77 02 user0 0f 0b

    ; asm: setnp %bl
    [-,%rbx]            v10 = trueff ord v1                     ; bin: 0f 9b c3
--- a/cranelift/filetests/isa/intel/binary32.cton
+++ b/cranelift/filetests/isa/intel/binary32.cton
@@ -1,4 +1,4 @@
-; binary emission of 32-bit code.
+; binary emission of x86-32 code.
 test binemit
 set is_compressed
 isa intel haswell
@@ -25,6 +25,9 @@ ebb0:
    ; asm: movl $2, %esi
    [-,%rsi]            v2 = iconst.i32 2        ; bin: be 00000002

+    ; asm: movb $1, %cl
+    [-,%rcx]            v9007 = bconst.b1 true      ; bin: b9 00000001
+
    ; Integer Register-Register Operations.

    ; asm: addl %esi, %ecx
@@ -125,13 +128,13 @@ ebb0:
    ; asm: movl $2, %edx
    [-,%rdx]      v53 = iconst.i32 2                    ; bin: ba 00000002
    ; asm: idivl %ecx
-    [-,%rax,%rdx] v54, v55 = x86_sdivmodx v52, v53, v1  ; bin: f7 f9
+    [-,%rax,%rdx] v54, v55 = x86_sdivmodx v52, v53, v1  ; bin: int_divz f7 f9
    ; asm: idivl %esi
-    [-,%rax,%rdx] v56, v57 = x86_sdivmodx v52, v53, v2  ; bin: f7 fe
+    [-,%rax,%rdx] v56, v57 = x86_sdivmodx v52, v53, v2  ; bin: int_divz f7 fe
    ; asm: divl %ecx
-    [-,%rax,%rdx] v58, v59 = x86_udivmodx v52, v53, v1  ; bin: f7 f1
+    [-,%rax,%rdx] v58, v59 = x86_udivmodx v52, v53, v1  ; bin: int_divz f7 f1
    ; asm: divl %esi
-    [-,%rax,%rdx] v60, v61 = x86_udivmodx v52, v53, v2  ; bin: f7 f6
+    [-,%rax,%rdx] v60, v61 = x86_udivmodx v52, v53, v2  ; bin: int_divz f7 f6

    ; Register copies.

@@ -152,105 +155,105 @@ ebb0:
    ; Register indirect addressing with no displacement.

    ; asm: movl %ecx, (%esi)
-    store v1, v2                                ; bin: 89 0e
+    store v1, v2                                ; bin: heap_oob 89 0e
    ; asm: movl %esi, (%ecx)
-    store v2, v1                                ; bin: 89 31
+    store v2, v1                                ; bin: heap_oob 89 31
    ; asm: movw %cx, (%esi)
-    istore16 v1, v2                             ; bin: 66 89 0e
+    istore16 v1, v2                             ; bin: heap_oob 66 89 0e
    ; asm: movw %si, (%ecx)
-    istore16 v2, v1                             ; bin: 66 89 31
+    istore16 v2, v1                             ; bin: heap_oob 66 89 31
    ; asm: movb %cl, (%esi)
-    istore8 v1, v2                              ; bin: 88 0e
+    istore8 v1, v2                              ; bin: heap_oob 88 0e
    ; Can't store %sil in 32-bit mode (needs REX prefix).

    ; asm: movl (%ecx), %edi
-    [-,%rdi]            v100 = load.i32 v1      ; bin: 8b 39
+    [-,%rdi]            v100 = load.i32 v1      ; bin: heap_oob 8b 39
    ; asm: movl (%esi), %edx
-    [-,%rdx]            v101 = load.i32 v2      ; bin: 8b 16
+    [-,%rdx]            v101 = load.i32 v2      ; bin: heap_oob 8b 16
    ; asm: movzwl (%ecx), %edi
-    [-,%rdi]            v102 = uload16.i32 v1   ; bin: 0f b7 39
+    [-,%rdi]            v102 = uload16.i32 v1   ; bin: heap_oob 0f b7 39
    ; asm: movzwl (%esi), %edx
-    [-,%rdx]            v103 = uload16.i32 v2   ; bin: 0f b7 16
+    [-,%rdx]            v103 = uload16.i32 v2   ; bin: heap_oob 0f b7 16
    ; asm: movswl (%ecx), %edi
-    [-,%rdi]            v104 = sload16.i32 v1   ; bin: 0f bf 39
+    [-,%rdi]            v104 = sload16.i32 v1   ; bin: heap_oob 0f bf 39
    ; asm: movswl (%esi), %edx
-    [-,%rdx]            v105 = sload16.i32 v2   ; bin: 0f bf 16
+    [-,%rdx]            v105 = sload16.i32 v2   ; bin: heap_oob 0f bf 16
    ; asm: movzbl (%ecx), %edi
-    [-,%rdi]            v106 = uload8.i32 v1    ; bin: 0f b6 39
+    [-,%rdi]            v106 = uload8.i32 v1    ; bin: heap_oob 0f b6 39
    ; asm: movzbl (%esi), %edx
-    [-,%rdx]            v107 = uload8.i32 v2    ; bin: 0f b6 16
+    [-,%rdx]            v107 = uload8.i32 v2    ; bin: heap_oob 0f b6 16
    ; asm: movsbl (%ecx), %edi
-    [-,%rdi]            v108 = sload8.i32 v1    ; bin: 0f be 39
+    [-,%rdi]            v108 = sload8.i32 v1    ; bin: heap_oob 0f be 39
    ; asm: movsbl (%esi), %edx
-    [-,%rdx]            v109 = sload8.i32 v2    ; bin: 0f be 16
+    [-,%rdx]            v109 = sload8.i32 v2    ; bin: heap_oob 0f be 16

    ; Register-indirect with 8-bit signed displacement.

    ; asm: movl %ecx, 100(%esi)
-    store v1, v2+100                            ; bin: 89 4e 64
+    store v1, v2+100                            ; bin: heap_oob 89 4e 64
    ; asm: movl %esi, -100(%ecx)
-    store v2, v1-100                            ; bin: 89 71 9c
+    store v2, v1-100                            ; bin: heap_oob 89 71 9c
    ; asm: movw %cx, 100(%esi)
-    istore16 v1, v2+100                         ; bin: 66 89 4e 64
+    istore16 v1, v2+100                         ; bin: heap_oob 66 89 4e 64
    ; asm: movw %si, -100(%ecx)
-    istore16 v2, v1-100                         ; bin: 66 89 71 9c
+    istore16 v2, v1-100                         ; bin: heap_oob 66 89 71 9c
    ; asm: movb %cl, 100(%esi)
-    istore8 v1, v2+100                          ; bin: 88 4e 64
+    istore8 v1, v2+100                          ; bin: heap_oob 88 4e 64

    ; asm: movl 50(%ecx), %edi
-    [-,%rdi]            v110 = load.i32 v1+50           ; bin: 8b 79 32
+    [-,%rdi]            v110 = load.i32 v1+50           ; bin: heap_oob 8b 79 32
    ; asm: movl -50(%esi), %edx
-    [-,%rdx]            v111 = load.i32 v2-50           ; bin: 8b 56 ce
+    [-,%rdx]            v111 = load.i32 v2-50           ; bin: heap_oob 8b 56 ce
    ; asm: movzwl 50(%ecx), %edi
-    [-,%rdi]            v112 = uload16.i32 v1+50        ; bin: 0f b7 79 32
+    [-,%rdi]            v112 = uload16.i32 v1+50        ; bin: heap_oob 0f b7 79 32
    ; asm: movzwl -50(%esi), %edx
-    [-,%rdx]            v113 = uload16.i32 v2-50        ; bin: 0f b7 56 ce
+    [-,%rdx]            v113 = uload16.i32 v2-50        ; bin: heap_oob 0f b7 56 ce
    ; asm: movswl 50(%ecx), %edi
-    [-,%rdi]            v114 = sload16.i32 v1+50        ; bin: 0f bf 79 32
+    [-,%rdi]            v114 = sload16.i32 v1+50        ; bin: heap_oob 0f bf 79 32
    ; asm: movswl -50(%esi), %edx
-    [-,%rdx]            v115 = sload16.i32 v2-50        ; bin: 0f bf 56 ce
+    [-,%rdx]            v115 = sload16.i32 v2-50        ; bin: heap_oob 0f bf 56 ce
    ; asm: movzbl 50(%ecx), %edi
-    [-,%rdi]            v116 = uload8.i32 v1+50         ; bin: 0f b6 79 32
+    [-,%rdi]            v116 = uload8.i32 v1+50         ; bin: heap_oob 0f b6 79 32
    ; asm: movzbl -50(%esi), %edx
-    [-,%rdx]            v117 = uload8.i32 v2-50         ; bin: 0f b6 56 ce
+    [-,%rdx]            v117 = uload8.i32 v2-50         ; bin: heap_oob 0f b6 56 ce
    ; asm: movsbl 50(%ecx), %edi
-    [-,%rdi]            v118 = sload8.i32 v1+50         ; bin: 0f be 79 32
+    [-,%rdi]            v118 = sload8.i32 v1+50         ; bin: heap_oob 0f be 79 32
    ; asm: movsbl -50(%esi), %edx
-    [-,%rdx]            v119 = sload8.i32 v2-50         ; bin: 0f be 56 ce
+    [-,%rdx]            v119 = sload8.i32 v2-50         ; bin: heap_oob 0f be 56 ce

    ; Register-indirect with 32-bit signed displacement.

    ; asm: movl %ecx, 10000(%esi)
-    store v1, v2+10000                          ; bin: 89 8e 00002710
+    store v1, v2+10000                          ; bin: heap_oob 89 8e 00002710
    ; asm: movl %esi, -10000(%ecx)
-    store v2, v1-10000                          ; bin: 89 b1 ffffd8f0
+    store v2, v1-10000                          ; bin: heap_oob 89 b1 ffffd8f0
    ; asm: movw %cx, 10000(%esi)
-    istore16 v1, v2+10000                       ; bin: 66 89 8e 00002710
+    istore16 v1, v2+10000                       ; bin: heap_oob 66 89 8e 00002710
    ; asm: movw %si, -10000(%ecx)
-    istore16 v2, v1-10000                       ; bin: 66 89 b1 ffffd8f0
+    istore16 v2, v1-10000                       ; bin: heap_oob 66 89 b1 ffffd8f0
    ; asm: movb %cl, 10000(%esi)
-    istore8 v1, v2+10000                        ; bin: 88 8e 00002710
+    istore8 v1, v2+10000                        ; bin: heap_oob 88 8e 00002710

    ; asm: movl 50000(%ecx), %edi
-    [-,%rdi]            v120 = load.i32 v1+50000           ; bin: 8b b9 0000c350
+    [-,%rdi]            v120 = load.i32 v1+50000           ; bin: heap_oob 8b b9 0000c350
    ; asm: movl -50000(%esi), %edx
-    [-,%rdx]            v121 = load.i32 v2-50000           ; bin: 8b 96 ffff3cb0
+    [-,%rdx]            v121 = load.i32 v2-50000           ; bin: heap_oob 8b 96 ffff3cb0
    ; asm: movzwl 50000(%ecx), %edi
-    [-,%rdi]            v122 = uload16.i32 v1+50000        ; bin: 0f b7 b9 0000c350
+    [-,%rdi]            v122 = uload16.i32 v1+50000        ; bin: heap_oob 0f b7 b9 0000c350
    ; asm: movzwl -50000(%esi), %edx
-    [-,%rdx]            v123 = uload16.i32 v2-50000        ; bin: 0f b7 96 ffff3cb0
+    [-,%rdx]            v123 = uload16.i32 v2-50000        ; bin: heap_oob 0f b7 96 ffff3cb0
    ; asm: movswl 50000(%ecx), %edi
-    [-,%rdi]            v124 = sload16.i32 v1+50000        ; bin: 0f bf b9 0000c350
+    [-,%rdi]            v124 = sload16.i32 v1+50000        ; bin: heap_oob 0f bf b9 0000c350
    ; asm: movswl -50000(%esi), %edx
-    [-,%rdx]            v125 = sload16.i32 v2-50000        ; bin: 0f bf 96 ffff3cb0
+    [-,%rdx]            v125 = sload16.i32 v2-50000        ; bin: heap_oob 0f bf 96 ffff3cb0
    ; asm: movzbl 50000(%ecx), %edi
-    [-,%rdi]            v126 = uload8.i32 v1+50000         ; bin: 0f b6 b9 0000c350
+    [-,%rdi]            v126 = uload8.i32 v1+50000         ; bin: heap_oob 0f b6 b9 0000c350
    ; asm: movzbl -50000(%esi), %edx
-    [-,%rdx]            v127 = uload8.i32 v2-50000         ; bin: 0f b6 96 ffff3cb0
+    [-,%rdx]            v127 = uload8.i32 v2-50000         ; bin: heap_oob 0f b6 96 ffff3cb0
    ; asm: movsbl 50000(%ecx), %edi
-    [-,%rdi]            v128 = sload8.i32 v1+50000         ; bin: 0f be b9 0000c350
+    [-,%rdi]            v128 = sload8.i32 v1+50000         ; bin: heap_oob 0f be b9 0000c350
    ; asm: movsbl -50000(%esi), %edx
-    [-,%rdx]            v129 = sload8.i32 v2-50000         ; bin: 0f be 96 ffff3cb0
+    [-,%rdx]            v129 = sload8.i32 v2-50000         ; bin: heap_oob 0f be 96 ffff3cb0

    ; Bit-counting instructions.

@@ -403,6 +406,13 @@ ebb0:
    ; asm: addl $-2147483648, %esp
    adjust_sp_imm -2147483648                   ; bin: 81 c4 80000000

+    ; Shift immediates
+    ; asm: shll $2, %esi
+    [-,%rsi]             v513 = ishl_imm v2, 2    ; bin: c1 e6 02
+    ; asm: sarl $5, %esi
+    [-,%rsi]             v514 = sshr_imm v2, 5    ; bin: c1 fe 05
+    ; asm: shrl $8, %esi
+    [-,%rsi]             v515 = ushr_imm v2, 8    ; bin: c1 ee 08

    ; asm: testl %ecx, %ecx
    ; asm: je ebb1
@@ -427,7 +437,7 @@ ebb1:

    ; asm: ebb2:
 ebb2:
-    trap user0                                  ; bin: 0f 0b
+    trap user0                                  ; bin: user0 0f 0b
 }

 ; Special branch encodings only for I32 mode.
@@ -466,9 +476,9 @@ ebb0:

 ebb1:
    ; asm: cmpl %esi, %ecx
-    [-,%eflags]         v10 = ifcmp v1, v2      ; bin: 39 f1
+    [-,%rflags]         v10 = ifcmp v1, v2      ; bin: 39 f1
    ; asm: cmpl %ecx, %esi
-    [-,%eflags]         v11 = ifcmp v2, v1      ; bin: 39 ce
+    [-,%rflags]         v11 = ifcmp v2, v1      ; bin: 39 ce

    ; asm: je ebb1
    brif eq v11, ebb1                           ; bin: 74 fa
@@ -514,41 +524,41 @@ ebb1:

    ; The trapif instructions are encoded as macros: a conditional jump over a ud2.
    ; asm: jne .+4; ud2
-    trapif eq v11, user0                           ; bin: 75 02 0f 0b
+    trapif eq v11, user0                           ; bin: 75 02 user0 0f 0b
    ; asm: je .+4; ud2
-    trapif ne v11, user0                           ; bin: 74 02 0f 0b
+    trapif ne v11, user0                           ; bin: 74 02 user0 0f 0b
    ; asm: jnl .+4; ud2
-    trapif slt v11, user0                          ; bin: 7d 02 0f 0b
+    trapif slt v11, user0                          ; bin: 7d 02 user0 0f 0b
    ; asm: jnge .+4; ud2
-    trapif sge v11, user0                          ; bin: 7c 02 0f 0b
+    trapif sge v11, user0                          ; bin: 7c 02 user0 0f 0b
    ; asm: jng .+4; ud2
-    trapif sgt v11, user0                          ; bin: 7e 02 0f 0b
+    trapif sgt v11, user0                          ; bin: 7e 02 user0 0f 0b
    ; asm: jnle .+4; ud2
-    trapif sle v11, user0                          ; bin: 7f 02 0f 0b
+    trapif sle v11, user0                          ; bin: 7f 02 user0 0f 0b
    ; asm: jnb .+4; ud2
-    trapif ult v11, user0                          ; bin: 73 02 0f 0b
+    trapif ult v11, user0                          ; bin: 73 02 user0 0f 0b
    ; asm: jnae .+4; ud2
-    trapif uge v11, user0                          ; bin: 72 02 0f 0b
+    trapif uge v11, user0                          ; bin: 72 02 user0 0f 0b
    ; asm: jna .+4; ud2
-    trapif ugt v11, user0                          ; bin: 76 02 0f 0b
+    trapif ugt v11, user0                          ; bin: 76 02 user0 0f 0b
    ; asm: jnbe .+4; ud2
-    trapif ule v11, user0                          ; bin: 77 02 0f 0b
+    trapif ule v11, user0                          ; bin: 77 02 user0 0f 0b

    ; Stack check.
    ; asm: cmpl %esp, %ecx
-    [-,%eflags]         v40 = ifcmp_sp v1       ; bin: 39 e1
+    [-,%rflags]         v40 = ifcmp_sp v1       ; bin: 39 e1
    ; asm: cmpl %esp, %esi
-    [-,%eflags]         v41 = ifcmp_sp v2       ; bin: 39 e6
+    [-,%rflags]         v41 = ifcmp_sp v2       ; bin: 39 e6

    ; asm: cmpl $-100, %ecx
-    [-,%eflags]         v42 = ifcmp_imm v1, -100   ; bin: 83 f9 9c
+    [-,%rflags]         v42 = ifcmp_imm v1, -100   ; bin: 83 f9 9c
    ; asm: cmpl $100, %esi
-    [-,%eflags]         v43 = ifcmp_imm v2, 100    ; bin: 83 fe 64
+    [-,%rflags]         v43 = ifcmp_imm v2, 100    ; bin: 83 fe 64

    ; asm: cmpl $-10000, %ecx
-    [-,%eflags]         v44 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0
+    [-,%rflags]         v44 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0
    ; asm: cmpl $10000, %esi
-    [-,%eflags]         v45 = ifcmp_imm v2, 10000  ; bin: 81 fe 00002710
+    [-,%rflags]         v45 = ifcmp_imm v2, 10000  ; bin: 81 fe 00002710

    return
 }
@@ -566,7 +576,7 @@ ebb0:
    ; asm: movzbl %cl, %esi
    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b6 f1

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }

 ; Tests for i32/i16 conversion instructions.
@@ -582,5 +592,5 @@ ebb0:
    ; asm: movzwl %cx, %esi
    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b7 f1

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }
--- a/cranelift/filetests/isa/intel/binary64-float.cton
+++ b/cranelift/filetests/isa/intel/binary64-float.cton
@@ -157,52 +157,52 @@ ebb0:

    ; Load/Store

-    ; asm: movd (%r14), %xmm5
-    [-,%xmm5]           v100 = load.f32 v3                      ; bin: 66 41 0f 6e 2e
-    ; asm: movd (%rax), %xmm10
-    [-,%xmm10]          v101 = load.f32 v2                      ; bin: 66 44 0f 6e 10
-    ; asm: movd 50(%r14), %xmm5
-    [-,%xmm5]           v110 = load.f32 v3+50                   ; bin: 66 41 0f 6e 6e 32
-    ; asm: movd -50(%rax), %xmm10
-    [-,%xmm10]          v111 = load.f32 v2-50                   ; bin: 66 44 0f 6e 50 ce
-    ; asm: movd 10000(%r14), %xmm5
-    [-,%xmm5]           v120 = load.f32 v3+10000                ; bin: 66 41 0f 6e ae 00002710
-    ; asm: movd -10000(%rax), %xmm10
-    [-,%xmm10]          v121 = load.f32 v2-10000                ; bin: 66 44 0f 6e 90 ffffd8f0
+    ; asm: movss (%r14), %xmm5
+    [-,%xmm5]           v100 = load.f32 v3                      ; bin: heap_oob f3 41 0f 10 2e
+    ; asm: movss (%rax), %xmm10
+    [-,%xmm10]          v101 = load.f32 v2                      ; bin: heap_oob f3 44 0f 10 10
+    ; asm: movss 50(%r14), %xmm5
+    [-,%xmm5]           v110 = load.f32 v3+50                   ; bin: heap_oob f3 41 0f 10 6e 32
+    ; asm: movss -50(%rax), %xmm10
+    [-,%xmm10]          v111 = load.f32 v2-50                   ; bin: heap_oob f3 44 0f 10 50 ce
+    ; asm: movss 10000(%r14), %xmm5
+    [-,%xmm5]           v120 = load.f32 v3+10000                ; bin: heap_oob f3 41 0f 10 ae 00002710
+    ; asm: movss -10000(%rax), %xmm10
+    [-,%xmm10]          v121 = load.f32 v2-10000                ; bin: heap_oob f3 44 0f 10 90 ffffd8f0

-    ; asm: movd %xmm5, (%r14)
-    [-]                 store.f32 v100, v3                      ; bin: 66 41 0f 7e 2e
-    ; asm: movd %xmm10, (%rax)
-    [-]                 store.f32 v101, v2                      ; bin: 66 44 0f 7e 10
-    ; asm: movd %xmm5, (%r13)
-    [-]                 store.f32 v100, v4                      ; bin: 66 41 0f 7e 6d 00
-    ; asm: movd %xmm10, (%r13)
-    [-]                 store.f32 v101, v4                      ; bin: 66 45 0f 7e 55 00
-    ; asm: movd %xmm5, 50(%r14)
-    [-]                 store.f32 v100, v3+50                   ; bin: 66 41 0f 7e 6e 32
-    ; asm: movd %xmm10, -50(%rax)
-    [-]                 store.f32 v101, v2-50                   ; bin: 66 44 0f 7e 50 ce
-    ; asm: movd %xmm5, 10000(%r14)
-    [-]                 store.f32 v100, v3+10000                ; bin: 66 41 0f 7e ae 00002710
-    ; asm: movd %xmm10, -10000(%rax)
-    [-]                 store.f32 v101, v2-10000                ; bin: 66 44 0f 7e 90 ffffd8f0
+    ; asm: movss %xmm5, (%r14)
+    [-]                 store.f32 v100, v3                      ; bin: heap_oob f3 41 0f 11 2e
+    ; asm: movss %xmm10, (%rax)
+    [-]                 store.f32 v101, v2                      ; bin: heap_oob f3 44 0f 11 10
+    ; asm: movss %xmm5, (%r13)
+    [-]                 store.f32 v100, v4                      ; bin: heap_oob f3 41 0f 11 6d 00
+    ; asm: movss %xmm10, (%r13)
+    [-]                 store.f32 v101, v4                      ; bin: heap_oob f3 45 0f 11 55 00
+    ; asm: movss %xmm5, 50(%r14)
+    [-]                 store.f32 v100, v3+50                   ; bin: heap_oob f3 41 0f 11 6e 32
+    ; asm: movss %xmm10, -50(%rax)
+    [-]                 store.f32 v101, v2-50                   ; bin: heap_oob f3 44 0f 11 50 ce
+    ; asm: movss %xmm5, 10000(%r14)
+    [-]                 store.f32 v100, v3+10000                ; bin: heap_oob f3 41 0f 11 ae 00002710
+    ; asm: movss %xmm10, -10000(%rax)
+    [-]                 store.f32 v101, v2-10000                ; bin: heap_oob f3 44 0f 11 90 ffffd8f0

    ; Spill / Fill.

-    ; asm: movd %xmm5, 1032(%rsp)
-    [-,ss1]             v200 = spill v100                       ; bin: 66 0f 7e ac 24 00000408
-    ; asm: movd %xmm10, 1032(%rsp)
-    [-,ss1]             v201 = spill v101                       ; bin: 66 44 0f 7e 94 24 00000408
+    ; asm: movss %xmm5, 1032(%rsp)
+    [-,ss1]             v200 = spill v100                       ; bin: f3 0f 11 ac 24 00000408
+    ; asm: movss %xmm10, 1032(%rsp)
+    [-,ss1]             v201 = spill v101                       ; bin: f3 44 0f 11 94 24 00000408

-    ; asm: movd 1032(%rsp), %xmm5
-    [-,%xmm5]           v210 = fill v200                        ; bin: 66 0f 6e ac 24 00000408
-    ; asm: movd 1032(%rsp), %xmm10
-    [-,%xmm10]          v211 = fill v201                        ; bin: 66 44 0f 6e 94 24 00000408
+    ; asm: movss 1032(%rsp), %xmm5
+    [-,%xmm5]           v210 = fill v200                        ; bin: f3 0f 10 ac 24 00000408
+    ; asm: movss 1032(%rsp), %xmm10
+    [-,%xmm10]          v211 = fill v201                        ; bin: f3 44 0f 10 94 24 00000408

-    ; asm: movd %xmm5, 1032(%rsp)
-    regspill v100, %xmm5 -> ss1                                 ; bin: 66 0f 7e ac 24 00000408
-    ; asm: movd 1032(%rsp), %xmm5
-    regfill v100, ss1 -> %xmm5                                  ; bin: 66 0f 6e ac 24 00000408
+    ; asm: movss %xmm5, 1032(%rsp)
+    regspill v100, %xmm5 -> ss1                                 ; bin: f3 0f 11 ac 24 00000408
+    ; asm: movss 1032(%rsp), %xmm5
+    regfill v100, ss1 -> %xmm5                                  ; bin: f3 0f 10 ac 24 00000408

    ; Comparisons.
    ;
@@ -235,11 +235,11 @@ ebb0:
    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 44 0f 2e d5 0f 96 c2

    ; asm: ucomiss %xmm10, %xmm5
-    [-,%eflags]         v310 = ffcmp v10, v11                   ; bin: 41 0f 2e ea
+    [-,%rflags]         v310 = ffcmp v10, v11                   ; bin: 41 0f 2e ea
    ; asm: ucomiss %xmm10, %xmm5
-    [-,%eflags]         v311 = ffcmp v11, v10                   ; bin: 44 0f 2e d5
+    [-,%rflags]         v311 = ffcmp v11, v10                   ; bin: 44 0f 2e d5
    ; asm: ucomiss %xmm5, %xmm5
-    [-,%eflags]         v312 = ffcmp v10, v10                   ; bin: 0f 2e ed
+    [-,%rflags]         v312 = ffcmp v10, v10                   ; bin: 0f 2e ed

    return
 }
@@ -392,52 +392,52 @@ ebb0:

    ; Load/Store

-    ; asm: movq (%r14), %xmm5
-    [-,%xmm5]           v100 = load.f64 v3                      ; bin: f3 41 0f 7e 2e
-    ; asm: movq (%rax), %xmm10
-    [-,%xmm10]          v101 = load.f64 v2                      ; bin: f3 44 0f 7e 10
-    ; asm: movq 50(%r14), %xmm5
-    [-,%xmm5]           v110 = load.f64 v3+50                   ; bin: f3 41 0f 7e 6e 32
-    ; asm: movq -50(%rax), %xmm10
-    [-,%xmm10]          v111 = load.f64 v2-50                   ; bin: f3 44 0f 7e 50 ce
-    ; asm: movq 10000(%r14), %xmm5
-    [-,%xmm5]           v120 = load.f64 v3+10000                ; bin: f3 41 0f 7e ae 00002710
-    ; asm: movq -10000(%rax), %xmm10
-    [-,%xmm10]          v121 = load.f64 v2-10000                ; bin: f3 44 0f 7e 90 ffffd8f0
+    ; asm: movsd (%r14), %xmm5
+    [-,%xmm5]           v100 = load.f64 v3                      ; bin: heap_oob f2 41 0f 10 2e
+    ; asm: movsd (%rax), %xmm10
+    [-,%xmm10]          v101 = load.f64 v2                      ; bin: heap_oob f2 44 0f 10 10
+    ; asm: movsd 50(%r14), %xmm5
+    [-,%xmm5]           v110 = load.f64 v3+50                   ; bin: heap_oob f2 41 0f 10 6e 32
+    ; asm: movsd -50(%rax), %xmm10
+    [-,%xmm10]          v111 = load.f64 v2-50                   ; bin: heap_oob f2 44 0f 10 50 ce
+    ; asm: movsd 10000(%r14), %xmm5
+    [-,%xmm5]           v120 = load.f64 v3+10000                ; bin: heap_oob f2 41 0f 10 ae 00002710
+    ; asm: movsd -10000(%rax), %xmm10
+    [-,%xmm10]          v121 = load.f64 v2-10000                ; bin: heap_oob f2 44 0f 10 90 ffffd8f0

-    ; asm: movq %xmm5, (%r14)
-    [-]                 store.f64 v100, v3                      ; bin: 66 41 0f d6 2e
-    ; asm: movq %xmm10, (%rax)
-    [-]                 store.f64 v101, v2                      ; bin: 66 44 0f d6 10
-    ; asm: movq %xmm5, (%r13)
-    [-]                 store.f64 v100, v4                      ; bin: 66 41 0f d6 6d 00
-    ; asm: movq %xmm10, (%r13)
-    [-]                 store.f64 v101, v4                      ; bin: 66 45 0f d6 55 00
-    ; asm: movq %xmm5, 50(%r14)
-    [-]                 store.f64 v100, v3+50                   ; bin: 66 41 0f d6 6e 32
-    ; asm: movq %xmm10, -50(%rax)
-    [-]                 store.f64 v101, v2-50                   ; bin: 66 44 0f d6 50 ce
-    ; asm: movq %xmm5, 10000(%r14)
-    [-]                 store.f64 v100, v3+10000                ; bin: 66 41 0f d6 ae 00002710
-    ; asm: movq %xmm10, -10000(%rax)
-    [-]                 store.f64 v101, v2-10000                ; bin: 66 44 0f d6 90 ffffd8f0
+    ; asm: movsd %xmm5, (%r14)
+    [-]                 store.f64 v100, v3                      ; bin: heap_oob f2 41 0f 11 2e
+    ; asm: movsd %xmm10, (%rax)
+    [-]                 store.f64 v101, v2                      ; bin: heap_oob f2 44 0f 11 10
+    ; asm: movsd %xmm5, (%r13)
+    [-]                 store.f64 v100, v4                      ; bin: heap_oob f2 41 0f 11 6d 00
+    ; asm: movsd %xmm10, (%r13)
+    [-]                 store.f64 v101, v4                      ; bin: heap_oob f2 45 0f 11 55 00
+    ; asm: movsd %xmm5, 50(%r14)
+    [-]                 store.f64 v100, v3+50                   ; bin: heap_oob f2 41 0f 11 6e 32
+    ; asm: movsd %xmm10, -50(%rax)
+    [-]                 store.f64 v101, v2-50                   ; bin: heap_oob f2 44 0f 11 50 ce
+    ; asm: movsd %xmm5, 10000(%r14)
+    [-]                 store.f64 v100, v3+10000                ; bin: heap_oob f2 41 0f 11 ae 00002710
+    ; asm: movsd %xmm10, -10000(%rax)
+    [-]                 store.f64 v101, v2-10000                ; bin: heap_oob f2 44 0f 11 90 ffffd8f0

    ; Spill / Fill.

-    ; asm: movq %xmm5, 1032(%rsp)
-    [-,ss1]             v200 = spill v100                       ; bin: 66 0f d6 ac 24 00000408
-    ; asm: movq %xmm10, 1032(%rsp)
-    [-,ss1]             v201 = spill v101                       ; bin: 66 44 0f d6 94 24 00000408
+    ; asm: movsd %xmm5, 1032(%rsp)
+    [-,ss1]             v200 = spill v100                       ; bin: f2 0f 11 ac 24 00000408
+    ; asm: movsd %xmm10, 1032(%rsp)
+    [-,ss1]             v201 = spill v101                       ; bin: f2 44 0f 11 94 24 00000408

-    ; asm: movq 1032(%rsp), %xmm5
-    [-,%xmm5]           v210 = fill v200                        ; bin: f3 0f 7e ac 24 00000408
-    ; asm: movq 1032(%rsp), %xmm10
-    [-,%xmm10]          v211 = fill v201                        ; bin: f3 44 0f 7e 94 24 00000408
+    ; asm: movsd 1032(%rsp), %xmm5
+    [-,%xmm5]           v210 = fill v200                        ; bin: f2 0f 10 ac 24 00000408
+    ; asm: movsd 1032(%rsp), %xmm10
+    [-,%xmm10]          v211 = fill v201                        ; bin: f2 44 0f 10 94 24 00000408

-    ; asm: movq %xmm5, 1032(%rsp)
-    regspill v100, %xmm5 -> ss1                                 ; bin: 66 0f d6 ac 24 00000408
-    ; asm: movq 1032(%rsp), %xmm5
-    regfill v100, ss1 -> %xmm5                                  ; bin: f3 0f 7e ac 24 00000408
+    ; asm: movsd %xmm5, 1032(%rsp)
+    regspill v100, %xmm5 -> ss1                                 ; bin: f2 0f 11 ac 24 00000408
+    ; asm: movsd 1032(%rsp), %xmm5
+    regfill v100, ss1 -> %xmm5                                  ; bin: f2 0f 10 ac 24 00000408

    ; Comparisons.
    ;
@@ -470,11 +470,11 @@ ebb0:
    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 66 44 0f 2e d5 0f 96 c2

    ; asm: ucomisd %xmm10, %xmm5
-    [-,%eflags]         v310 = ffcmp v10, v11                   ; bin: 66 41 0f 2e ea
+    [-,%rflags]         v310 = ffcmp v10, v11                   ; bin: 66 41 0f 2e ea
    ; asm: ucomisd %xmm10, %xmm5
-    [-,%eflags]         v311 = ffcmp v11, v10                   ; bin: 66 44 0f 2e d5
+    [-,%rflags]         v311 = ffcmp v11, v10                   ; bin: 66 44 0f 2e d5
    ; asm: ucomisd %xmm5, %xmm5
-    [-,%eflags]         v312 = ffcmp v10, v10                   ; bin: 66 0f 2e ed
+    [-,%rflags]         v312 = ffcmp v10, v10                   ; bin: 66 0f 2e ed

    return
 }
@@ -482,7 +482,7 @@ ebb0:
 function %cpuflags_float(f32 [%xmm0]) {
 ebb0(v0: f32 [%xmm0]):
    ; asm: ucomiss %xmm0, %xmm0
-    [-,%eflags]         v1 = ffcmp v0, v0                       ; bin: 0f 2e c0
+    [-,%rflags]         v1 = ffcmp v0, v0                       ; bin: 0f 2e c0

    jump ebb1

@@ -505,21 +505,21 @@ ebb1:
    brff ule v1, ebb1                                           ; bin: 76 f0

    ; asm: jp .+4; ud2
-    trapff ord v1, user0                                        ; bin: 7a 02 0f 0b
+    trapff ord v1, user0                                        ; bin: 7a 02 user0 0f 0b
    ; asm: jnp .+4; ud2
-    trapff uno v1, user0                                        ; bin: 7b 02 0f 0b
+    trapff uno v1, user0                                        ; bin: 7b 02 user0 0f 0b
    ; asm: je .+4; ud2
-    trapff one v1, user0                                        ; bin: 74 02 0f 0b
+    trapff one v1, user0                                        ; bin: 74 02 user0 0f 0b
    ; asm: jne .+4; ud2
-    trapff ueq v1, user0                                        ; bin: 75 02 0f 0b
+    trapff ueq v1, user0                                        ; bin: 75 02 user0 0f 0b
    ; asm: jna .+4; ud2
-    trapff gt v1, user0                                         ; bin: 76 02 0f 0b
+    trapff gt v1, user0                                         ; bin: 76 02 user0 0f 0b
    ; asm: jnae .+4; ud2
-    trapff ge v1, user0                                         ; bin: 72 02 0f 0b
+    trapff ge v1, user0                                         ; bin: 72 02 user0 0f 0b
    ; asm: jnb .+4; ud2
-    trapff ult v1, user0                                        ; bin: 73 02 0f 0b
+    trapff ult v1, user0                                        ; bin: 73 02 user0 0f 0b
    ; asm: jnbe .+4; ud2
-    trapff ule v1, user0                                        ; bin: 77 02 0f 0b
+    trapff ule v1, user0                                        ; bin: 77 02 user0 0f 0b

    ; asm: setnp %bl
    [-,%rbx]            v10 = trueff ord v1                     ; bin: 0f 9b c3
--- a/cranelift/filetests/isa/intel/binary64.cton
+++ b/cranelift/filetests/isa/intel/binary64.cton
@@ -1,4 +1,4 @@
-; binary emission of 64-bit code.
+; binary emission of x86-64 code.
 test binemit
 set is_64bit
 set is_compressed
@@ -38,6 +38,11 @@ ebb0:
    ; asm: movq $0xffffffff88001122, %r14                     # 32-bit sign-extended constant.
    [-,%r14]            v5 = iconst.i64 0xffff_ffff_8800_1122 ; bin: 49 c7 c6 88001122

+    ; asm: movb $1, %cl
+    [-,%rcx]            v9007 = bconst.b1 true      ; bin: b9 00000001
+    ; asm: movb $1, %sil
+    [-,%r10]            v9008 = bconst.b1 true      ; bin: 41 ba 00000001
+
    ; Integer Register-Register Operations.

    ; asm: addq %rsi, %rcx
@@ -170,146 +175,146 @@ ebb0:
    ; Register indirect addressing with no displacement.

    ; asm: movq %rcx, (%r10)
-    store v1, v3                                ; bin: 49 89 0a
+    store v1, v3                                ; bin: heap_oob 49 89 0a
    ; asm: movq %r10, (%rcx)
-    store v3, v1                                ; bin: 4c 89 11
+    store v3, v1                                ; bin: heap_oob 4c 89 11
    ; asm: movl %ecx, (%r10)
-    istore32 v1, v3                             ; bin: 41 89 0a
+    istore32 v1, v3                             ; bin: heap_oob 41 89 0a
    ; asm: movl %r10d, (%rcx)
-    istore32 v3, v1                             ; bin: 44 89 11
+    istore32 v3, v1                             ; bin: heap_oob 44 89 11
    ; asm: movw %cx, (%r10)
-    istore16 v1, v3                             ; bin: 66 41 89 0a
+    istore16 v1, v3                             ; bin: heap_oob 66 41 89 0a
    ; asm: movw %r10w, (%rcx)
-    istore16 v3, v1                             ; bin: 66 44 89 11
+    istore16 v3, v1                             ; bin: heap_oob 66 44 89 11
    ; asm: movb %cl, (%r10)
-    istore8 v1, v3                              ; bin: 41 88 0a
+    istore8 v1, v3                              ; bin: heap_oob 41 88 0a
    ; asm: movb %r10b, (%rcx)
-    istore8 v3, v1                              ; bin: 44 88 11
+    istore8 v3, v1                              ; bin: heap_oob 44 88 11

    ; asm: movq (%rcx), %r14
-    [-,%r14]            v120 = load.i64 v1      ; bin: 4c 8b 31
+    [-,%r14]            v120 = load.i64 v1      ; bin: heap_oob 4c 8b 31
    ; asm: movq (%r10), %rdx
-    [-,%rdx]            v121 = load.i64 v3      ; bin: 49 8b 12
+    [-,%rdx]            v121 = load.i64 v3      ; bin: heap_oob 49 8b 12
    ; asm: movl (%rcx), %r14d
-    [-,%r14]            v122 = uload32.i64 v1   ; bin: 44 8b 31
+    [-,%r14]            v122 = uload32.i64 v1   ; bin: heap_oob 44 8b 31
    ; asm: movl (%r10), %edx
-    [-,%rdx]            v123 = uload32.i64 v3   ; bin: 41 8b 12
+    [-,%rdx]            v123 = uload32.i64 v3   ; bin: heap_oob 41 8b 12
    ; asm: movslq (%rcx), %r14
-    [-,%r14]            v124 = sload32.i64 v1   ; bin: 4c 63 31
+    [-,%r14]            v124 = sload32.i64 v1   ; bin: heap_oob 4c 63 31
    ; asm: movslq (%r10), %rdx
-    [-,%rdx]            v125 = sload32.i64 v3   ; bin: 49 63 12
+    [-,%rdx]            v125 = sload32.i64 v3   ; bin: heap_oob 49 63 12
    ; asm: movzwq (%rcx), %r14
-    [-,%r14]            v126 = uload16.i64 v1   ; bin: 4c 0f b7 31
+    [-,%r14]            v126 = uload16.i64 v1   ; bin: heap_oob 4c 0f b7 31
    ; asm: movzwq (%r10), %rdx
-    [-,%rdx]            v127 = uload16.i64 v3   ; bin: 49 0f b7 12
+    [-,%rdx]            v127 = uload16.i64 v3   ; bin: heap_oob 49 0f b7 12
    ; asm: movswq (%rcx), %r14
-    [-,%r14]            v128 = sload16.i64 v1   ; bin: 4c 0f bf 31
+    [-,%r14]            v128 = sload16.i64 v1   ; bin: heap_oob 4c 0f bf 31
    ; asm: movswq (%r10), %rdx
-    [-,%rdx]            v129 = sload16.i64 v3   ; bin: 49 0f bf 12
+    [-,%rdx]            v129 = sload16.i64 v3   ; bin: heap_oob 49 0f bf 12
    ; asm: movzbq (%rcx), %r14
-    [-,%r14]            v130 = uload8.i64 v1    ; bin: 4c 0f b6 31
+    [-,%r14]            v130 = uload8.i64 v1    ; bin: heap_oob 4c 0f b6 31
    ; asm: movzbq (%r10), %rdx
-    [-,%rdx]            v131 = uload8.i64 v3    ; bin: 49 0f b6 12
+    [-,%rdx]            v131 = uload8.i64 v3    ; bin: heap_oob 49 0f b6 12
    ; asm: movsbq (%rcx), %r14
-    [-,%r14]            v132 = sload8.i64 v1    ; bin: 4c 0f be 31
+    [-,%r14]            v132 = sload8.i64 v1    ; bin: heap_oob 4c 0f be 31
    ; asm: movsbq (%r10), %rdx
-    [-,%rdx]            v133 = sload8.i64 v3    ; bin: 49 0f be 12
+    [-,%rdx]            v133 = sload8.i64 v3    ; bin: heap_oob 49 0f be 12

    ; Register-indirect with 8-bit signed displacement.

    ; asm: movq %rcx, 100(%r10)
-    store v1, v3+100                            ; bin: 49 89 4a 64
+    store v1, v3+100                            ; bin: heap_oob 49 89 4a 64
    ; asm: movq %r10, -100(%rcx)
-    store v3, v1-100                            ; bin: 4c 89 51 9c
+    store v3, v1-100                            ; bin: heap_oob 4c 89 51 9c
    ; asm: movl %ecx, 100(%r10)
-    istore32 v1, v3+100                         ; bin: 41 89 4a 64
+    istore32 v1, v3+100                         ; bin: heap_oob 41 89 4a 64
    ; asm: movl %r10d, -100(%rcx)
-    istore32 v3, v1-100                         ; bin: 44 89 51 9c
+    istore32 v3, v1-100                         ; bin: heap_oob 44 89 51 9c
    ; asm: movw %cx, 100(%r10)
-    istore16 v1, v3+100                         ; bin: 66 41 89 4a 64
+    istore16 v1, v3+100                         ; bin: heap_oob 66 41 89 4a 64
    ; asm: movw %r10w, -100(%rcx)
-    istore16 v3, v1-100                         ; bin: 66 44 89 51 9c
+    istore16 v3, v1-100                         ; bin: heap_oob 66 44 89 51 9c
    ; asm: movb %cl, 100(%r10)
-    istore8 v1, v3+100                          ; bin: 41 88 4a 64
+    istore8 v1, v3+100                          ; bin: heap_oob 41 88 4a 64
    ; asm: movb %r10b, 100(%rcx)
-    istore8 v3, v1+100                          ; bin: 44 88 51 64
+    istore8 v3, v1+100                          ; bin: heap_oob 44 88 51 64

    ; asm: movq 50(%rcx), %r10
-    [-,%r10]            v140 = load.i64 v1+50           ; bin: 4c 8b 51 32
+    [-,%r10]            v140 = load.i64 v1+50           ; bin: heap_oob 4c 8b 51 32
    ; asm: movq -50(%r10), %rdx
-    [-,%rdx]            v141 = load.i64 v3-50           ; bin: 49 8b 52 ce
+    [-,%rdx]            v141 = load.i64 v3-50           ; bin: heap_oob 49 8b 52 ce
    ; asm: movl 50(%rcx), %edi
-    [-,%rdi]            v142 = uload32.i64 v1+50        ; bin: 8b 79 32
+    [-,%rdi]            v142 = uload32.i64 v1+50        ; bin: heap_oob 8b 79 32
    ; asm: movl -50(%rsi), %edx
-    [-,%rdx]            v143 = uload32.i64 v2-50        ; bin: 8b 56 ce
+    [-,%rdx]            v143 = uload32.i64 v2-50        ; bin: heap_oob 8b 56 ce
    ; asm: movslq 50(%rcx), %rdi
-    [-,%rdi]            v144 = sload32.i64 v1+50        ; bin: 48 63 79 32
+    [-,%rdi]            v144 = sload32.i64 v1+50        ; bin: heap_oob 48 63 79 32
    ; asm: movslq -50(%rsi), %rdx
-    [-,%rdx]            v145 = sload32.i64 v2-50        ; bin: 48 63 56 ce
+    [-,%rdx]            v145 = sload32.i64 v2-50        ; bin: heap_oob 48 63 56 ce
    ; asm: movzwq 50(%rcx), %rdi
-    [-,%rdi]            v146 = uload16.i64 v1+50        ; bin: 48 0f b7 79 32
+    [-,%rdi]            v146 = uload16.i64 v1+50        ; bin: heap_oob 48 0f b7 79 32
    ; asm: movzwq -50(%rsi), %rdx
-    [-,%rdx]            v147 = uload16.i64 v2-50        ; bin: 48 0f b7 56 ce
+    [-,%rdx]            v147 = uload16.i64 v2-50        ; bin: heap_oob 48 0f b7 56 ce
    ; asm: movswq 50(%rcx), %rdi
-    [-,%rdi]            v148 = sload16.i64 v1+50        ; bin: 48 0f bf 79 32
+    [-,%rdi]            v148 = sload16.i64 v1+50        ; bin: heap_oob 48 0f bf 79 32
    ; asm: movswq -50(%rsi), %rdx
-    [-,%rdx]            v149 = sload16.i64 v2-50        ; bin: 48 0f bf 56 ce
+    [-,%rdx]            v149 = sload16.i64 v2-50        ; bin: heap_oob 48 0f bf 56 ce
    ; asm: movzbq 50(%rcx), %rdi
-    [-,%rdi]            v150 = uload8.i64 v1+50         ; bin: 48 0f b6 79 32
+    [-,%rdi]            v150 = uload8.i64 v1+50         ; bin: heap_oob 48 0f b6 79 32
    ; asm: movzbq -50(%rsi), %rdx
-    [-,%rdx]            v151 = uload8.i64 v2-50         ; bin: 48 0f b6 56 ce
+    [-,%rdx]            v151 = uload8.i64 v2-50         ; bin: heap_oob 48 0f b6 56 ce
    ; asm: movsbq 50(%rcx), %rdi
-    [-,%rdi]            v152 = sload8.i64 v1+50         ; bin: 48 0f be 79 32
+    [-,%rdi]            v152 = sload8.i64 v1+50         ; bin: heap_oob 48 0f be 79 32
    ; asm: movsbq -50(%rsi), %rdx
-    [-,%rdx]            v153 = sload8.i64 v2-50         ; bin: 48 0f be 56 ce
+    [-,%rdx]            v153 = sload8.i64 v2-50         ; bin: heap_oob 48 0f be 56 ce

    ; Register-indirect with 32-bit signed displacement.

    ; asm: movq %rcx, 10000(%r10)
-    store v1, v3+10000                          ; bin: 49 89 8a 00002710
+    store v1, v3+10000                          ; bin: heap_oob 49 89 8a 00002710
    ; asm: movq %r10, -10000(%rcx)
-    store v3, v1-10000                          ; bin: 4c 89 91 ffffd8f0
+    store v3, v1-10000                          ; bin: heap_oob 4c 89 91 ffffd8f0
    ; asm: movl %ecx, 10000(%rsi)
-    istore32 v1, v2+10000                       ; bin: 89 8e 00002710
+    istore32 v1, v2+10000                       ; bin: heap_oob 89 8e 00002710
    ; asm: movl %esi, -10000(%rcx)
-    istore32 v2, v1-10000                       ; bin: 89 b1 ffffd8f0
+    istore32 v2, v1-10000                       ; bin: heap_oob 89 b1 ffffd8f0
    ; asm: movw %cx, 10000(%rsi)
-    istore16 v1, v2+10000                       ; bin: 66 89 8e 00002710
+    istore16 v1, v2+10000                       ; bin: heap_oob 66 89 8e 00002710
    ; asm: movw %si, -10000(%rcx)
-    istore16 v2, v1-10000                       ; bin: 66 89 b1 ffffd8f0
+    istore16 v2, v1-10000                       ; bin: heap_oob 66 89 b1 ffffd8f0
    ; asm: movb %cl, 10000(%rsi)
-    istore8 v1, v2+10000                        ; bin: 88 8e 00002710
+    istore8 v1, v2+10000                        ; bin: heap_oob 88 8e 00002710
    ; asm: movb %sil, 10000(%rcx)
-    istore8 v2, v1+10000                        ; bin: 40 88 b1 00002710
+    istore8 v2, v1+10000                        ; bin: heap_oob 40 88 b1 00002710

    ; asm: movq 50000(%rcx), %r10
-    [-,%r10]            v160 = load.i64 v1+50000           ; bin: 4c 8b 91 0000c350
+    [-,%r10]            v160 = load.i64 v1+50000           ; bin: heap_oob 4c 8b 91 0000c350
    ; asm: movq -50000(%r10), %rdx
-    [-,%rdx]            v161 = load.i64 v3-50000           ; bin: 49 8b 92 ffff3cb0
+    [-,%rdx]            v161 = load.i64 v3-50000           ; bin: heap_oob 49 8b 92 ffff3cb0
    ; asm: movl 50000(%rcx), %edi
-    [-,%rdi]            v162 = uload32.i64 v1+50000        ; bin: 8b b9 0000c350
+    [-,%rdi]            v162 = uload32.i64 v1+50000        ; bin: heap_oob 8b b9 0000c350
    ; asm: movl -50000(%rsi), %edx
-    [-,%rdx]            v163 = uload32.i64 v2-50000        ; bin: 8b 96 ffff3cb0
+    [-,%rdx]            v163 = uload32.i64 v2-50000        ; bin: heap_oob 8b 96 ffff3cb0
    ; asm: movslq 50000(%rcx), %rdi
-    [-,%rdi]            v164 = sload32.i64 v1+50000        ; bin: 48 63 b9 0000c350
+    [-,%rdi]            v164 = sload32.i64 v1+50000        ; bin: heap_oob 48 63 b9 0000c350
    ; asm: movslq -50000(%rsi), %rdx
-    [-,%rdx]            v165 = sload32.i64 v2-50000        ; bin: 48 63 96 ffff3cb0
+    [-,%rdx]            v165 = sload32.i64 v2-50000        ; bin: heap_oob 48 63 96 ffff3cb0
    ; asm: movzwq 50000(%rcx), %rdi
-    [-,%rdi]            v166 = uload16.i64 v1+50000        ; bin: 48 0f b7 b9 0000c350
+    [-,%rdi]            v166 = uload16.i64 v1+50000        ; bin: heap_oob 48 0f b7 b9 0000c350
    ; asm: movzwq -50000(%rsi), %rdx
-    [-,%rdx]            v167 = uload16.i64 v2-50000        ; bin: 48 0f b7 96 ffff3cb0
+    [-,%rdx]            v167 = uload16.i64 v2-50000        ; bin: heap_oob 48 0f b7 96 ffff3cb0
    ; asm: movswq 50000(%rcx), %rdi
-    [-,%rdi]            v168 = sload16.i64 v1+50000        ; bin: 48 0f bf b9 0000c350
+    [-,%rdi]            v168 = sload16.i64 v1+50000        ; bin: heap_oob 48 0f bf b9 0000c350
    ; asm: movswq -50000(%rsi), %rdx
-    [-,%rdx]            v169 = sload16.i64 v2-50000        ; bin: 48 0f bf 96 ffff3cb0
+    [-,%rdx]            v169 = sload16.i64 v2-50000        ; bin: heap_oob 48 0f bf 96 ffff3cb0
    ; asm: movzbq 50000(%rcx), %rdi
-    [-,%rdi]            v170 = uload8.i64 v1+50000         ; bin: 48 0f b6 b9 0000c350
+    [-,%rdi]            v170 = uload8.i64 v1+50000         ; bin: heap_oob 48 0f b6 b9 0000c350
    ; asm: movzbq -50000(%rsi), %rdx
-    [-,%rdx]            v171 = uload8.i64 v2-50000         ; bin: 48 0f b6 96 ffff3cb0
+    [-,%rdx]            v171 = uload8.i64 v2-50000         ; bin: heap_oob 48 0f b6 96 ffff3cb0
    ; asm: movsbq 50000(%rcx), %rdi
-    [-,%rdi]            v172 = sload8.i64 v1+50000         ; bin: 48 0f be b9 0000c350
+    [-,%rdi]            v172 = sload8.i64 v1+50000         ; bin: heap_oob 48 0f be b9 0000c350
    ; asm: movsbq -50000(%rsi), %rdx
-    [-,%rdx]            v173 = sload8.i64 v2-50000         ; bin: 48 0f be 96 ffff3cb0
+    [-,%rdx]            v173 = sload8.i64 v2-50000         ; bin: heap_oob 48 0f be 96 ffff3cb0


    ; More arithmetic.
@@ -324,17 +329,17 @@ ebb0:
    [-,%rax]      v190 = iconst.i64 1
    [-,%rdx]      v191 = iconst.i64 2
    ; asm: idivq %rcx
-    [-,%rax,%rdx] v192, v193 = x86_sdivmodx v190, v191, v1  ; bin: 48 f7 f9
+    [-,%rax,%rdx] v192, v193 = x86_sdivmodx v190, v191, v1  ; bin: int_divz 48 f7 f9
    ; asm: idivq %rsi
-    [-,%rax,%rdx] v194, v195 = x86_sdivmodx v190, v191, v2  ; bin: 48 f7 fe
+    [-,%rax,%rdx] v194, v195 = x86_sdivmodx v190, v191, v2  ; bin: int_divz 48 f7 fe
    ; asm: idivq %r10
-    [-,%rax,%rdx] v196, v197 = x86_sdivmodx v190, v191, v3  ; bin: 49 f7 fa
+    [-,%rax,%rdx] v196, v197 = x86_sdivmodx v190, v191, v3  ; bin: int_divz 49 f7 fa
    ; asm: divq %rcx
-    [-,%rax,%rdx] v198, v199 = x86_udivmodx v190, v191, v1  ; bin: 48 f7 f1
+    [-,%rax,%rdx] v198, v199 = x86_udivmodx v190, v191, v1  ; bin: int_divz 48 f7 f1
    ; asm: divq %rsi
-    [-,%rax,%rdx] v200, v201 = x86_udivmodx v190, v191, v2  ; bin: 48 f7 f6
+    [-,%rax,%rdx] v200, v201 = x86_udivmodx v190, v191, v2  ; bin: int_divz 48 f7 f6
    ; asm: divq %r10
-    [-,%rax,%rdx] v202, v203 = x86_udivmodx v190, v191, v3  ; bin: 49 f7 f2
+    [-,%rax,%rdx] v202, v203 = x86_udivmodx v190, v191, v3  ; bin: int_divz 49 f7 f2

    ; double-length multiply instructions, 64 bit
    [-,%rax]       v1001 = iconst.i64 1
@@ -453,6 +458,14 @@ ebb0:
    ; asm: setbe %dl
    [-,%rdx]            v319 = icmp ule v2, v3  ; bin: 4c 39 d6 0f 96 c2

+    ; asm: cmpq $37, %rcx
+    ; asm: setl %bl
+    [-,%rbx]            v320 = icmp_imm slt v1, 37     ; bin: 48 83 f9 25 0f 9c c3
+
+    ; asm: cmpq $100000, %rcx
+    ; asm: setl %bl
+    [-,%rbx]            v321 = icmp_imm slt v1, 100000 ; bin: 48 81 f9 000186a0 0f 9c c3
+
    ; Bool-to-int conversions.

    ; asm: movzbq %bl, %rcx
@@ -529,6 +542,21 @@ ebb0:
    ; asm: addq $-2147483648, %rsp
    adjust_sp_imm -2147483648                   ; bin: 48 81 c4 80000000

+    ; Shift immediates
+    ; asm: shlq $12, %rsi
+    [-,%rsi]             v515 = ishl_imm v2, 12   ; bin: 48 c1 e6 0c
+    ; asm: shlq $13, %r8
+    [-,%r8]              v516 = ishl_imm v4, 13   ; bin: 49 c1 e0 0d
+    ; asm: sarq $32, %rsi
+    [-,%rsi]             v517 = sshr_imm v2, 32   ; bin: 48 c1 fe 20
+    ; asm: sarq $33, %r8
+    [-,%r8]              v518 = sshr_imm v4, 33   ; bin: 49 c1 f8 21
+    ; asm: shrl $62, %rsi
+    [-,%rsi]             v519 = ushr_imm v2, 62   ; bin: 48 c1 ee 3e
+    ; asm: shrl $63, %r8
+    [-,%r8]              v520 = ushr_imm v4, 63   ; bin: 49 c1 e8 3f
+
+
    ; asm: testq %rcx, %rcx
    ; asm: je ebb1
    brz v1, ebb1                                ; bin: 48 85 c9 74 1b
@@ -569,9 +597,9 @@ ebb0:

 ebb1:
    ; asm: cmpq %r10, %rcx
-    [-,%eflags]         v10 = ifcmp v1, v2      ; bin: 4c 39 d1
+    [-,%rflags]         v10 = ifcmp v1, v2      ; bin: 4c 39 d1
    ; asm: cmpq %rcx, %r10
-    [-,%eflags]         v11 = ifcmp v2, v1      ; bin: 49 39 ca
+    [-,%rflags]         v11 = ifcmp v2, v1      ; bin: 49 39 ca

    ; asm: je ebb1
    brif eq v11, ebb1                           ; bin: 74 f8
@@ -617,41 +645,42 @@ ebb1:

    ; The trapif instructions are encoded as macros: a conditional jump over a ud2.
    ; asm: jne .+4; ud2
-    trapif eq v11, user0                           ; bin: 75 02 0f 0b
+    trapif eq v11, user0                           ; bin: 75 02 user0 0f 0b
    ; asm: je .+4; ud2
-    trapif ne v11, user0                           ; bin: 74 02 0f 0b
+    trapif ne v11, user0                           ; bin: 74 02 user0 0f 0b
    ; asm: jnl .+4; ud2
-    trapif slt v11, user0                          ; bin: 7d 02 0f 0b
+    trapif slt v11, user0                          ; bin: 7d 02 user0 0f 0b
    ; asm: jnge .+4; ud2
-    trapif sge v11, user0                          ; bin: 7c 02 0f 0b
+    trapif sge v11, user0                          ; bin: 7c 02 user0 0f 0b
    ; asm: jng .+4; ud2
-    trapif sgt v11, user0                          ; bin: 7e 02 0f 0b
+    trapif sgt v11, user0                          ; bin: 7e 02 user0 0f 0b
    ; asm: jnle .+4; ud2
-    trapif sle v11, user0                          ; bin: 7f 02 0f 0b
+    trapif sle v11, user0                          ; bin: 7f 02 user0 0f 0b
    ; asm: jnb .+4; ud2
-    trapif ult v11, user0                          ; bin: 73 02 0f 0b
+    trapif ult v11, user0                          ; bin: 73 02 user0 0f 0b
    ; asm: jnae .+4; ud2
-    trapif uge v11, user0                          ; bin: 72 02 0f 0b
+    trapif uge v11, user0                          ; bin: 72 02 user0 0f 0b
    ; asm: jna .+4; ud2
-    trapif ugt v11, user0                          ; bin: 76 02 0f 0b
+    trapif ugt v11, user0                          ; bin: 76 02 user0 0f 0b
    ; asm: jnbe .+4; ud2
-    trapif ule v11, user0                          ; bin: 77 02 0f 0b
+    trapif ule v11, user0                          ; bin: 77 02 user0 0f 0b

    ; Stack check.
    ; asm: cmpq %rsp, %rcx
-    [-,%eflags]         v40 = ifcmp_sp v1       ; bin: 48 39 e1
+    [-,%rflags]         v40 = ifcmp_sp v1       ; bin: 48 39 e1
    ; asm: cmpq %rsp, %r10
-    [-,%eflags]         v41 = ifcmp_sp v2       ; bin: 49 39 e2
+    [-,%rflags]         v41 = ifcmp_sp v2       ; bin: 49 39 e2

    ; asm: cmpq $-100, %rcx
-    [-,%eflags]         v522 = ifcmp_imm v1, -100   ; bin: 48 83 f9 9c
+    [-,%rflags]         v522 = ifcmp_imm v1, -100   ; bin: 48 83 f9 9c
    ; asm: cmpq $100, %r10
-    [-,%eflags]         v523 = ifcmp_imm v2, 100    ; bin: 49 83 fa 64
+    [-,%rflags]         v523 = ifcmp_imm v2, 100    ; bin: 49 83 fa 64

    ; asm: cmpq $-10000, %rcx
-    [-,%eflags]         v524 = ifcmp_imm v1, -10000 ; bin: 48 81 f9 ffffd8f0
+    [-,%rflags]         v524 = ifcmp_imm v1, -10000 ; bin: 48 81 f9 ffffd8f0
    ; asm: cmpq $10000, %r10
-    [-,%eflags]         v525 = ifcmp_imm v2, 10000  ; bin: 49 81 fa 00002710
+    [-,%rflags]         v525 = ifcmp_imm v2, 10000  ; bin: 49 81 fa 00002710
+

    return
 }
@@ -708,71 +737,71 @@ ebb0:
    ; Register indirect addressing with no displacement.

    ; asm: movl (%rcx), %edi
-    [-,%rdi]            v10 = load.i32 v1      ; bin: 8b 39
+    [-,%rdi]            v10 = load.i32 v1      ; bin: heap_oob 8b 39
    ; asm: movl (%rsi), %edx
-    [-,%rdx]            v11 = load.i32 v2      ; bin: 8b 16
+    [-,%rdx]            v11 = load.i32 v2      ; bin: heap_oob 8b 16
    ; asm: movzwl (%rcx), %edi
-    [-,%rdi]            v12 = uload16.i32 v1   ; bin: 0f b7 39
+    [-,%rdi]            v12 = uload16.i32 v1   ; bin: heap_oob 0f b7 39
    ; asm: movzwl (%rsi), %edx
-    [-,%rdx]            v13 = uload16.i32 v2   ; bin: 0f b7 16
+    [-,%rdx]            v13 = uload16.i32 v2   ; bin: heap_oob 0f b7 16
    ; asm: movswl (%rcx), %edi
-    [-,%rdi]            v14 = sload16.i32 v1   ; bin: 0f bf 39
+    [-,%rdi]            v14 = sload16.i32 v1   ; bin: heap_oob 0f bf 39
    ; asm: movswl (%rsi), %edx
-    [-,%rdx]            v15 = sload16.i32 v2   ; bin: 0f bf 16
+    [-,%rdx]            v15 = sload16.i32 v2   ; bin: heap_oob 0f bf 16
    ; asm: movzbl (%rcx), %edi
-    [-,%rdi]            v16 = uload8.i32 v1    ; bin: 0f b6 39
+    [-,%rdi]            v16 = uload8.i32 v1    ; bin: heap_oob 0f b6 39
    ; asm: movzbl (%rsi), %edx
-    [-,%rdx]            v17 = uload8.i32 v2    ; bin: 0f b6 16
+    [-,%rdx]            v17 = uload8.i32 v2    ; bin: heap_oob 0f b6 16
    ; asm: movsbl (%rcx), %edi
-    [-,%rdi]            v18 = sload8.i32 v1    ; bin: 0f be 39
+    [-,%rdi]            v18 = sload8.i32 v1    ; bin: heap_oob 0f be 39
    ; asm: movsbl (%rsi), %edx
-    [-,%rdx]            v19 = sload8.i32 v2    ; bin: 0f be 16
+    [-,%rdx]            v19 = sload8.i32 v2    ; bin: heap_oob 0f be 16

    ; Register-indirect with 8-bit signed displacement.

    ; asm: movl 50(%rcx), %edi
-    [-,%rdi]            v20 = load.i32 v1+50           ; bin: 8b 79 32
+    [-,%rdi]            v20 = load.i32 v1+50           ; bin: heap_oob 8b 79 32
    ; asm: movl -50(%rsi), %edx
-    [-,%rdx]            v21 = load.i32 v2-50           ; bin: 8b 56 ce
+    [-,%rdx]            v21 = load.i32 v2-50           ; bin: heap_oob 8b 56 ce
    ; asm: movzwl 50(%rcx), %edi
-    [-,%rdi]            v22 = uload16.i32 v1+50        ; bin: 0f b7 79 32
+    [-,%rdi]            v22 = uload16.i32 v1+50        ; bin: heap_oob 0f b7 79 32
    ; asm: movzwl -50(%rsi), %edx
-    [-,%rdx]            v23 = uload16.i32 v2-50        ; bin: 0f b7 56 ce
+    [-,%rdx]            v23 = uload16.i32 v2-50        ; bin: heap_oob 0f b7 56 ce
    ; asm: movswl 50(%rcx), %edi
-    [-,%rdi]            v24 = sload16.i32 v1+50        ; bin: 0f bf 79 32
+    [-,%rdi]            v24 = sload16.i32 v1+50        ; bin: heap_oob 0f bf 79 32
    ; asm: movswl -50(%rsi), %edx
-    [-,%rdx]            v25 = sload16.i32 v2-50        ; bin: 0f bf 56 ce
+    [-,%rdx]            v25 = sload16.i32 v2-50        ; bin: heap_oob 0f bf 56 ce
    ; asm: movzbl 50(%rcx), %edi
-    [-,%rdi]            v26 = uload8.i32 v1+50         ; bin: 0f b6 79 32
+    [-,%rdi]            v26 = uload8.i32 v1+50         ; bin: heap_oob 0f b6 79 32
    ; asm: movzbl -50(%rsi), %edx
-    [-,%rdx]            v27 = uload8.i32 v2-50         ; bin: 0f b6 56 ce
+    [-,%rdx]            v27 = uload8.i32 v2-50         ; bin: heap_oob 0f b6 56 ce
    ; asm: movsbl 50(%rcx), %edi
-    [-,%rdi]            v28 = sload8.i32 v1+50         ; bin: 0f be 79 32
+    [-,%rdi]            v28 = sload8.i32 v1+50         ; bin: heap_oob 0f be 79 32
    ; asm: movsbl -50(%rsi), %edx
-    [-,%rdx]            v29 = sload8.i32 v2-50         ; bin: 0f be 56 ce
+    [-,%rdx]            v29 = sload8.i32 v2-50         ; bin: heap_oob 0f be 56 ce

    ; Register-indirect with 32-bit signed displacement.

    ; asm: movl 50000(%rcx), %edi
-    [-,%rdi]            v30 = load.i32 v1+50000           ; bin: 8b b9 0000c350
+    [-,%rdi]            v30 = load.i32 v1+50000           ; bin: heap_oob 8b b9 0000c350
    ; asm: movl -50000(%rsi), %edx
-    [-,%rdx]            v31 = load.i32 v2-50000           ; bin: 8b 96 ffff3cb0
+    [-,%rdx]            v31 = load.i32 v2-50000           ; bin: heap_oob 8b 96 ffff3cb0
    ; asm: movzwl 50000(%rcx), %edi
-    [-,%rdi]            v32 = uload16.i32 v1+50000        ; bin: 0f b7 b9 0000c350
+    [-,%rdi]            v32 = uload16.i32 v1+50000        ; bin: heap_oob 0f b7 b9 0000c350
    ; asm: movzwl -50000(%rsi), %edx
-    [-,%rdx]            v33 = uload16.i32 v2-50000        ; bin: 0f b7 96 ffff3cb0
+    [-,%rdx]            v33 = uload16.i32 v2-50000        ; bin: heap_oob 0f b7 96 ffff3cb0
    ; asm: movswl 50000(%rcx), %edi
-    [-,%rdi]            v34 = sload16.i32 v1+50000        ; bin: 0f bf b9 0000c350
+    [-,%rdi]            v34 = sload16.i32 v1+50000        ; bin: heap_oob 0f bf b9 0000c350
    ; asm: movswl -50000(%rsi), %edx
-    [-,%rdx]            v35 = sload16.i32 v2-50000        ; bin: 0f bf 96 ffff3cb0
+    [-,%rdx]            v35 = sload16.i32 v2-50000        ; bin: heap_oob 0f bf 96 ffff3cb0
    ; asm: movzbl 50000(%rcx), %edi
-    [-,%rdi]            v36 = uload8.i32 v1+50000         ; bin: 0f b6 b9 0000c350
+    [-,%rdi]            v36 = uload8.i32 v1+50000         ; bin: heap_oob 0f b6 b9 0000c350
    ; asm: movzbl -50000(%rsi), %edx
-    [-,%rdx]            v37 = uload8.i32 v2-50000         ; bin: 0f b6 96 ffff3cb0
+    [-,%rdx]            v37 = uload8.i32 v2-50000         ; bin: heap_oob 0f b6 96 ffff3cb0
    ; asm: movsbl 50000(%rcx), %edi
-    [-,%rdi]            v38 = sload8.i32 v1+50000         ; bin: 0f be b9 0000c350
+    [-,%rdi]            v38 = sload8.i32 v1+50000         ; bin: heap_oob 0f be b9 0000c350
    ; asm: movsbl -50000(%rsi), %edx
-    [-,%rdx]            v39 = sload8.i32 v2-50000         ; bin: 0f be 96 ffff3cb0
+    [-,%rdx]            v39 = sload8.i32 v2-50000         ; bin: heap_oob 0f be 96 ffff3cb0

    ; Integer Register-Register Operations.

@@ -903,17 +932,17 @@ ebb0:
    [-,%rax]      v160 = iconst.i32 1
    [-,%rdx]      v161 = iconst.i32 2
    ; asm: idivl %ecx
-    [-,%rax,%rdx] v162, v163 = x86_sdivmodx v160, v161, v1  ; bin: f7 f9
+    [-,%rax,%rdx] v162, v163 = x86_sdivmodx v160, v161, v1  ; bin: int_divz f7 f9
    ; asm: idivl %esi
-    [-,%rax,%rdx] v164, v165 = x86_sdivmodx v160, v161, v2  ; bin: f7 fe
+    [-,%rax,%rdx] v164, v165 = x86_sdivmodx v160, v161, v2  ; bin: int_divz f7 fe
    ; asm: idivl %r10d
-    [-,%rax,%rdx] v166, v167 = x86_sdivmodx v160, v161, v3  ; bin: 41 f7 fa
+    [-,%rax,%rdx] v166, v167 = x86_sdivmodx v160, v161, v3  ; bin: int_divz 41 f7 fa
    ; asm: divl %ecx
-    [-,%rax,%rdx] v168, v169 = x86_udivmodx v160, v161, v1  ; bin: f7 f1
+    [-,%rax,%rdx] v168, v169 = x86_udivmodx v160, v161, v1  ; bin: int_divz f7 f1
    ; asm: divl %esi
-    [-,%rax,%rdx] v170, v171 = x86_udivmodx v160, v161, v2  ; bin: f7 f6
+    [-,%rax,%rdx] v170, v171 = x86_udivmodx v160, v161, v2  ; bin: int_divz f7 f6
    ; asm: divl %r10d
-    [-,%rax,%rdx] v172, v173 = x86_udivmodx v160, v161, v3  ; bin: 41 f7 f2
+    [-,%rax,%rdx] v172, v173 = x86_udivmodx v160, v161, v3  ; bin: int_divz 41 f7 f2

    ; Bit-counting instructions.

@@ -1010,6 +1039,14 @@ ebb0:
    ; asm: setbe %dl
    [-,%rdx]            v319 = icmp ule v2, v3  ; bin: 44 39 d6 0f 96 c2

+    ; asm: cmpl $37, %ecx
+    ; asm: setl %bl
+    [-,%rbx]            v320 = icmp_imm slt v1, 37  ; bin: 83 f9 25 0f 9c c3
+
+    ; asm: cmpq $100000, %ecx
+    ; asm: setl %bl
+    [-,%rbx]            v321 = icmp_imm slt v1, 100000 ; bin: 81 f9 000186a0 0f 9c c3
+
    ; Bool-to-int conversions.

    ; asm: movzbl %bl, %ecx
@@ -1039,19 +1076,32 @@ ebb0:
    regfill v1, ss1 -> %rcx                     ; bin: 8b 8c 24 00000408

    ; asm: cmpl %esi, %ecx
-    [-,%eflags]         v520 = ifcmp v1, v2      ; bin: 39 f1
+    [-,%rflags]         v520 = ifcmp v1, v2      ; bin: 39 f1
    ; asm: cmpl %r10d, %esi
-    [-,%eflags]         v521 = ifcmp v2, v3      ; bin: 44 39 d6
+    [-,%rflags]         v521 = ifcmp v2, v3      ; bin: 44 39 d6

    ; asm: cmpl $-100, %ecx
-    [-,%eflags]         v522 = ifcmp_imm v1, -100   ; bin: 83 f9 9c
+    [-,%rflags]         v522 = ifcmp_imm v1, -100   ; bin: 83 f9 9c
    ; asm: cmpl $100, %r10d
-    [-,%eflags]         v523 = ifcmp_imm v3, 100    ; bin: 41 83 fa 64
+    [-,%rflags]         v523 = ifcmp_imm v3, 100    ; bin: 41 83 fa 64

    ; asm: cmpl $-10000, %ecx
-    [-,%eflags]         v524 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0
+    [-,%rflags]         v524 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0
    ; asm: cmpl $10000, %r10d
-    [-,%eflags]         v525 = ifcmp_imm v3, 10000  ; bin: 41 81 fa 00002710
+    [-,%rflags]         v525 = ifcmp_imm v3, 10000  ; bin: 41 81 fa 00002710
+
+    ; asm: shll $2, %esi
+    [-,%rsi]             v526 = ishl_imm v2, 2    ; bin: c1 e6 02
+    ; asm: shll $12, %r10d
+    [-,%r10]             v527 = ishl_imm v3, 12   ; bin: 41 c1 e2 0c
+    ; asm: sarl $5, %esi
+    [-,%rsi]             v529 = sshr_imm v2, 5    ; bin: c1 fe 05
+    ; asm: sarl $32, %r10d
+    [-,%r10]             v530 = sshr_imm v3, 32   ; bin: 41 c1 fa 20
+    ; asm: shrl $8, %esi
+    [-,%rsi]             v532 = ushr_imm v2, 8    ; bin: c1 ee 08
+    ; asm: shrl $31, %r10d
+    [-,%r10]             v533 = ushr_imm v3, 31   ; bin: 41 c1 ea 1f

    ; asm: testl %ecx, %ecx
    ; asm: je ebb1x
@@ -1082,6 +1132,7 @@ ebb1:
    ; asm: ebb2x:
 ebb2:
    jump ebb1                                   ; bin: eb fd
+
 }

 ; Tests for i32/i8 conversion instructions.
@@ -1109,7 +1160,7 @@ ebb0:
    ; asm: movzbl %r10b, %ecx
    [-,%rcx]            v32 = uextend.i32 v13           ; bin: 41 0f b6 ca

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }

 ; Tests for i32/i16 conversion instructions.
@@ -1137,7 +1188,7 @@ ebb0:
    ; asm: movzwl %r10w, %ecx
    [-,%rcx]            v32 = uextend.i32 v13           ; bin: 41 0f b7 ca

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }

 ; Tests for i64/i8 conversion instructions.
@@ -1165,7 +1216,7 @@ ebb0:
    ; asm: movzbl %r10b, %ecx
    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 41 0f b6 ca

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }

 ; Tests for i64/i16 conversion instructions.
@@ -1193,7 +1244,7 @@ ebb0:
    ; asm: movzwl %r10w, %ecx
    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 41 0f b7 ca

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }

 ; Tests for i64/i32 conversion instructions.
@@ -1221,5 +1272,5 @@ ebb0:
    ; asm: movl %r10d, %ecx
    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 44 89 d1

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }
--- a/cranelift/filetests/isa/intel/legalize-div-traps.cton
+++ b/cranelift/filetests/isa/intel/legalize-div-traps.cton
@@ -40,7 +40,7 @@ ebb0(v0: i64, v1: i64):
    ; nextln: brif eq $fm1, $(m1=$EBB)
    ; nextln: $(fz=$V) = ifcmp_imm v1, 0
    ; nextln: trapif eq $fz, int_divz
-    ; check: $(hi=$V) = sshr
+    ; check: $(hi=$V) = sshr_imm
    ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
    ; nextln: jump $(done=$EBB)($q)
    ; check: $m1:
@@ -60,7 +60,7 @@ ebb0(v0: i64, v1: i64):
    v2 = srem v0, v1
    ; nextln: $(fm1=$V) = ifcmp_imm v1, -1
    ; nextln: brif eq $fm1, $(m1=$EBB)
-    ; check: $(hi=$V) = sshr
+    ; check: $(hi=$V) = sshr_imm
    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
    ; nextln: jump $(done=$EBB)($r)
    ; check: $m1:
--- a/cranelift/filetests/isa/intel/legalize-div.cton
+++ b/cranelift/filetests/isa/intel/legalize-div.cton
@@ -32,7 +32,7 @@ function %sdiv(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    ; check: ebb0(
    v2 = sdiv v0, v1
-    ; check: $(hi=$V) = sshr
+    ; check: $(hi=$V) = sshr_imm
    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
    return v2
    ; nextln: return $d
@@ -46,7 +46,7 @@ ebb0(v0: i64, v1: i64):
    v2 = srem v0, v1
    ; nextln: $(fm1=$V) = ifcmp_imm v1, -1
    ; nextln: brif eq $fm1, $(m1=$EBB)
-    ; check: $(hi=$V) = sshr
+    ; check: $(hi=$V) = sshr_imm
    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
    ; nextln: jump $(done=$EBB)($r)
    ; check: $m1:
--- a/cranelift/filetests/isa/intel/legalize-libcall.cton
+++ b/cranelift/filetests/isa/intel/legalize-libcall.cton
@@ -9,7 +9,7 @@ ebb0(v0: f32):
    v1 = floor v0
    return v1
 }
-; check: function %floor(f32 [%xmm0]) -> f32 [%xmm0] native {
-; check: sig0 = (f32) -> f32 native
+; check: function %floor(f32 [%xmm0]) -> f32 [%xmm0] system_v {
+; check: sig0 = (f32) -> f32 system_v
 ; check: fn0 = sig0 %FloorF32
 ; check: v1 = call fn0(v0)
--- a/cranelift/filetests/isa/intel/legalize-memory.cton
+++ b/cranelift/filetests/isa/intel/legalize-memory.cton
@@ -23,7 +23,7 @@ function %deref(i64 vmctx) -> i64 {
 ebb1(v1: i64):
    v2 = global_addr.i64 gv2
    ; check: $(a1=$V) = iadd_imm v1, -16
-    ; check: $(p1=$V) = load.i64 $a1
+    ; check: $(p1=$V) = load.i64 notrap aligned $a1
    ; check: v2 = iadd_imm $p1, 32
    return v2
    ; check: return v2
@@ -55,7 +55,7 @@ ebb0(v0: i32, v999: i64):
    ; Checks here are assuming that no pipehole opts fold the load offsets.
    ; nextln: $(xoff=$V) = uextend.i64 v0
    ; nextln: $(haddr=$V) = iadd_imm v999, 64
-    ; nextln: $(hbase=$V) = load.i64 $haddr
+    ; nextln: $(hbase=$V) = load.i64 notrap aligned $haddr
    ; nextln: v1 = iadd $hbase, $xoff
    v2 = load.f32 v1+16
    ; nextln: v2 = load.f32 v1+16
@@ -103,7 +103,7 @@ ebb0(v0: i32, v999: i64):
    ; Checks here are assuming that no pipehole opts fold the load offsets.
    ; nextln: $(xoff=$V) = uextend.i64 v0
    ; nextln: $(haddr=$V) = iadd_imm.i64 v999, 64
-    ; nextln: $(hbase=$V) = load.i64 $haddr
+    ; nextln: $(hbase=$V) = load.i64 notrap aligned $haddr
    ; nextln: v1 = iadd $hbase, $xoff
    v2 = load.f32 v1+0x7fff_ffff
    ; nextln: v2 = load.f32 v1+0x7fff_ffff
--- a/cranelift/filetests/isa/intel/prologue-epilogue.cton
+++ b/cranelift/filetests/isa/intel/prologue-epilogue.cton
@@ -9,7 +9,7 @@ ebb0:
    return
 }

-; check: function %foo(i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] native {
+; check: function %foo(i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
 ; nextln:     ss0 = explicit_slot 168, offset -224
 ; nextln:     ss1 = incoming_arg 56, offset -56
 ; check: ebb0(v0: i64 [%rbp], v1: i64 [%rbx], v2: i64 [%r12], v3: i64 [%r13], v4: i64 [%r14], v5: i64 [%r15]):
--- a/cranelift/filetests/isa/riscv/abi-e.cton
+++ b/cranelift/filetests/isa/riscv/abi-e.cton
@@ -7,8 +7,8 @@ isa riscv enable_e
 function %f() {
    ; Spilling into the stack args after %x15 since %16 and up are not
    ; available in RV32E.
-    sig0 = (i64, i64, i64, i64) -> i64 native
-    ; check: sig0 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [0], i32 [4]) -> i32 [%x10], i32 [%x11] native
+    sig0 = (i64, i64, i64, i64) -> i64 system_v
+    ; check: sig0 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [0], i32 [4]) -> i32 [%x10], i32 [%x11] system_v
 ebb0:
    return
 }
--- a/cranelift/filetests/isa/riscv/abi.cton
+++ b/cranelift/filetests/isa/riscv/abi.cton
@@ -5,27 +5,27 @@ isa riscv
 ; regex: V=v\d+

 function %f() {
-    sig0 = (i32) -> i32 native
-    ; check: sig0 = (i32 [%x10]) -> i32 [%x10] native
+    sig0 = (i32) -> i32 system_v
+    ; check: sig0 = (i32 [%x10]) -> i32 [%x10] system_v

-    sig1 = (i64) -> b1 native
-    ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] native
+    sig1 = (i64) -> b1 system_v
+    ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v

    ; The i64 argument must go in an even-odd register pair.
-    sig2 = (f32, i64) -> f64 native
-    ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] native
+    sig2 = (f32, i64) -> f64 system_v
+    ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v

    ; Spilling into the stack args.
-    sig3 = (f64, f64, f64, f64, f64, f64, f64, i64) -> f64 native
-    ; check: sig3 = (f64 [%f10], f64 [%f11], f64 [%f12], f64 [%f13], f64 [%f14], f64 [%f15], f64 [%f16], i32 [0], i32 [4]) -> f64 [%f10] native
+    sig3 = (f64, f64, f64, f64, f64, f64, f64, i64) -> f64 system_v
+    ; check: sig3 = (f64 [%f10], f64 [%f11], f64 [%f12], f64 [%f13], f64 [%f14], f64 [%f15], f64 [%f16], i32 [0], i32 [4]) -> f64 [%f10] system_v

    ; Splitting vectors.
-    sig4 = (i32x4) native
-    ; check: sig4 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13]) native
+    sig4 = (i32x4) system_v
+    ; check: sig4 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13]) system_v

    ; Splitting vectors, then splitting ints.
-    sig5 = (i64x4) native
-    ; check: sig5 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [%x16], i32 [%x17]) native
+    sig5 = (i64x4) system_v
+    ; check: sig5 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [%x16], i32 [%x17]) system_v

 ebb0:
    return
--- a/cranelift/filetests/isa/riscv/legalize-abi.cton
+++ b/cranelift/filetests/isa/riscv/legalize-abi.cton
@@ -106,7 +106,7 @@ ebb0(v0: i64x4):
 }

 function %indirect(i32) {
-    sig1 = () native
+    sig1 = () system_v
 ebb0(v0: i32):
    call_indirect sig1, v0()
    return
@@ -114,7 +114,7 @@ ebb0(v0: i32):

 ; The first argument to call_indirect doesn't get altered.
 function %indirect_arg(i32, f32x2) {
-    sig1 = (f32x2) native
+    sig1 = (f32x2) system_v
 ebb0(v0: i32, v1: f32x2):
    call_indirect sig1, v0(v1)
    ; check: call_indirect sig1, v0($V, $V)
--- a/cranelift/filetests/isa/riscv/parse-encoding.cton
+++ b/cranelift/filetests/isa/riscv/parse-encoding.cton
@@ -3,32 +3,32 @@ test legalizer
 isa riscv

 function %parse_encoding(i32 [%x5]) -> i32 [%x10] {
-    ; check: function %parse_encoding(i32 [%x5], i32 link [%x1]) -> i32 [%x10], i32 link [%x1] native {
+    ; check: function %parse_encoding(i32 [%x5], i32 link [%x1]) -> i32 [%x10], i32 link [%x1] system_v {

-    sig0 = (i32 [%x10]) -> i32 [%x10] native
-    ; check: sig0 = (i32 [%x10]) -> i32 [%x10] native
+    sig0 = (i32 [%x10]) -> i32 [%x10] system_v
+    ; check: sig0 = (i32 [%x10]) -> i32 [%x10] system_v

-    sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] native
-    ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] native
+    sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v
+    ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v

-    sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] native
-    ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] native
+    sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v
+    ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v

    ; Arguments on stack where not necessary
-    sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] native
-    ; check: sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] native
+    sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] system_v
+    ; check: sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] system_v

    ; Stack argument before register argument
-    sig4 = (f32 [72], i32 [%x10]) native
-    ; check: sig4 = (f32 [72], i32 [%x10]) native
+    sig4 = (f32 [72], i32 [%x10]) system_v
+    ; check: sig4 = (f32 [72], i32 [%x10]) system_v

    ; Return value on stack
-    sig5 = () -> f32 [0] native
-    ; check: sig5 = () -> f32 [0] native
+    sig5 = () -> f32 [0] system_v
+    ; check: sig5 = () -> f32 [0] system_v

    ; function + signature
-    fn0 = function %bar(i32 [%x10]) -> b1 [%x10] native
-    ; check: sig6 = (i32 [%x10]) -> b1 [%x10] native
+    fn0 = function %bar(i32 [%x10]) -> b1 [%x10] system_v
+    ; check: sig6 = (i32 [%x10]) -> b1 [%x10] system_v
    ; nextln: fn0 = sig6 %bar

 ebb0(v0: i32):
--- a/cranelift/filetests/licm/complex.cton
+++ b/cranelift/filetests/licm/complex.cton
@@ -1,6 +1,6 @@
 test licm

-function %complex(i32) -> i32 native {
+function %complex(i32) -> i32 system_v {
 ebb0(v0: i32):
    jump ebb1(v0)

--- a/cranelift/filetests/licm/reject.cton
+++ b/cranelift/filetests/licm/reject.cton
@@ -0,0 +1,81 @@
+test licm
+
+function %other_side_effects(i32) -> i32 {
+
+ebb0(v0: i32):
+    jump ebb1(v0)
+
+ebb1(v1: i32):
+    regmove.i32 v0, %10 -> %20
+; check: ebb1(v1: i32):
+; check: regmove.i32 v0, %10 -> %20
+    v2 = iconst.i32 1
+    brz v1, ebb2(v1)
+    v5 = isub v1, v2
+    jump ebb1(v5)
+
+ebb2(v6: i32):
+    return v6
+
+}
+
+function %cpu_flags(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+    jump ebb1(v0, v1)
+
+ebb1(v2: i32, v3: i32):
+    v4 = ifcmp.i32 v0, v1
+    v5 = selectif.i32 eq v4, v2, v3
+; check: ebb1(v2: i32, v3: i32):
+; check: ifcmp.i32 v0, v1
+; check: v5 = selectif.i32 eq v4, v2, v3
+    v8 = iconst.i32 1
+    brz v1, ebb2(v1)
+    v9 = isub v1, v8
+    v10 = iadd v1, v8
+    jump ebb1(v9, v10)
+
+ebb2(v6: i32):
+    return v6
+}
+
+function %spill(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+    v2 = spill.i32 v0
+    jump ebb1(v0, v1)
+
+ebb1(v3: i32, v4: i32):
+    v5 = spill.i32 v1
+    v6 = fill.i32 v2
+    v7 = fill.i32 v5
+; check: ebb1(v3: i32, v4: i32):
+; check: v5 = spill.i32 v1
+; check: v6 = fill.i32 v2
+; check: v7 = fill v5
+    brz v1, ebb2(v1)
+    v9 = isub v1, v4
+    jump ebb1(v9, v3)
+
+ebb2(v10: i32):
+    return v10
+}
+
+function %non_invariant_aliases(i32) -> i32 {
+
+ebb0(v0: i32):
+    jump ebb1(v0)
+
+ebb1(v1: i32):
+    v8 -> v1
+    v9 -> v1
+    v2 = iadd v8, v9
+; check: ebb1(v1: i32):
+; check: v2 = iadd v8, v9
+    brz v1, ebb2(v1)
+    v5 = isub v1, v2
+    jump ebb1(v5)
+
+ebb2(v6: i32):
+    return v6
+
+}
--- a/cranelift/filetests/parser/branch.cton
+++ b/cranelift/filetests/parser/branch.cton
@@ -9,7 +9,7 @@ ebb0:
 ebb1:
    jump ebb0()
 }
-; sameln: function %minimal() native {
+; sameln: function %minimal() system_v {
 ; nextln: ebb0:
 ; nextln:     jump ebb1
 ; nextln: 
@@ -25,7 +25,7 @@ ebb0(v90: i32):
 ebb1(v91: i32):
    jump ebb0(v91)
 }
-; sameln: function %onearg(i32) native {
+; sameln: function %onearg(i32) system_v {
 ; nextln: ebb0(v90: i32):
 ; nextln:     jump ebb1(v90)
 ; nextln: 
@@ -41,7 +41,7 @@ ebb0(v90: i32, v91: f32):
 ebb1(v92: i32, v93: f32):
    jump ebb0(v92, v93)
 }
-; sameln: function %twoargs(i32, f32) native {
+; sameln: function %twoargs(i32, f32) system_v {
 ; nextln: ebb0(v90: i32, v91: f32):
 ; nextln:     jump ebb1(v90, v91)
 ; nextln: 
@@ -57,7 +57,7 @@ ebb0(v90: i32):
 ebb1:
    brnz v90, ebb1()
 }
-; sameln: function %minimal(i32) native {
+; sameln: function %minimal(i32) system_v {
 ; nextln: ebb0(v90: i32):
 ; nextln:     brz v90, ebb1
 ; nextln: 
@@ -72,7 +72,7 @@ ebb0(v90: i32, v91: f32):
 ebb1(v92: i32, v93: f32):
    brnz v90, ebb0(v92, v93)
 }
-; sameln: function %twoargs(i32, f32) native {
+; sameln: function %twoargs(i32, f32) system_v {
 ; nextln: ebb0(v90: i32, v91: f32):
 ; nextln:     brz v90, ebb1(v90, v91)
 ; nextln: 
@@ -94,7 +94,7 @@ ebb30:
 ebb40:
    trap user4
 }
-; sameln: function %jumptable(i32) native {
+; sameln: function %jumptable(i32) system_v {
 ; check:      jt2 = jump_table 0, 0, ebb10, ebb40, ebb20, ebb30
 ; check:      jt200 = jump_table 0
 ; check:  ebb10(v3: i32):
--- a/cranelift/filetests/parser/call.cton
+++ b/cranelift/filetests/parser/call.cton
@@ -5,7 +5,7 @@ function %mini() {
 ebb1:
    return
 }
-; sameln: function %mini() native {
+; sameln: function %mini() system_v {
 ; nextln: ebb1:
 ; nextln:     return
 ; nextln: }
@@ -29,10 +29,10 @@ function %signatures() {
    fn5 = sig11 %foo
    fn8 = function %bar(i32) -> b1
 }
-; sameln: function %signatures() native {
-; check:      sig10 = () native
+; sameln: function %signatures() system_v {
+; check:      sig10 = () system_v
 ; check:      sig11 = (i32, f64) -> i32, b1 spiderwasm
-; check:      sig12 = (i32) -> b1 native
+; check:      sig12 = (i32) -> b1 system_v
 ; not:        fn0
 ; check:      fn5 = sig11 %foo
 ; check:      fn8 = sig12 %bar
@@ -88,7 +88,7 @@ function %special1(i32 sret, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32
 ebb0(v1: i32, v2: i32, v3: i32, v4: i32):
    return v4, v2, v3, v1
 }
-; check: function %special1(i32 sret, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32 csr, i32 sret native {
+; check: function %special1(i32 sret, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32 csr, i32 sret system_v {
 ; check: ebb0(v1: i32, v2: i32, v3: i32, v4: i32):
 ; check:     return v4, v2, v3, v1
 ; check: }
--- a/cranelift/filetests/parser/instruction_encoding.cton
+++ b/cranelift/filetests/parser/instruction_encoding.cton
@@ -13,7 +13,7 @@ ebb1(v0: i32 [%x8], v1: i32):
@55 v9 = iadd v8, v7
@a5 [Iret#5] return v0, v8
 }
-; sameln: function %foo(i32, i32) native {
+; sameln: function %foo(i32, i32) system_v {
 ; nextln: ebb1(v0: i32 [%x8], v1: i32):
 ; nextln:     [-,-]$WS v2 = iadd v0, v1
 ; nextln:     [-]$WS trap heap_oob
--- a/cranelift/filetests/parser/keywords.cton
+++ b/cranelift/filetests/parser/keywords.cton
@@ -2,4 +2,4 @@ test cat

 ; 'function' is not a keyword, and can be used as the name of a function too.
 function %function() {}
-; check: function %function() native
+; check: function %function() system_v
--- a/cranelift/filetests/parser/rewrite.cton
+++ b/cranelift/filetests/parser/rewrite.cton
@@ -9,7 +9,7 @@ ebb100(v20: i32):
    v9200 = f64const 0x4.0p0
    trap user4
 }
-; sameln: function %defs() native {
+; sameln: function %defs() system_v {
 ; nextln: ebb100(v20: i32):
 ; nextln:     v1000 = iconst.i32x8 5
 ; nextln:     v9200 = f64const 0x1.0000000000000p2
@@ -23,7 +23,7 @@ ebb100(v20: i32):
    v200 = iadd v20, v1000
    jump ebb100(v1000)
 }
-; sameln: function %use_value() native {
+; sameln: function %use_value() system_v {
 ; nextln: ebb100(v20: i32):
 ; nextln:     v1000 = iadd_imm v20, 5
 ; nextln:     v200 = iadd v20, v1000
--- a/cranelift/filetests/parser/tiny.cton
+++ b/cranelift/filetests/parser/tiny.cton
@@ -5,7 +5,7 @@ function %minimal() {
 ebb0:
    trap user0
 }
-; sameln: function %minimal() native {
+; sameln: function %minimal() system_v {
 ; nextln: ebb0:
 ; nextln:     trap user0
 ; nextln: }
@@ -18,7 +18,7 @@ ebb0:
    v1 = iconst.i8 6
    v2 = ishl v0, v1
 }
-; sameln: function %ivalues() native {
+; sameln: function %ivalues() system_v {
 ; nextln: ebb0:
 ; nextln:     v0 = iconst.i32 2
 ; nextln:     v1 = iconst.i8 6
@@ -34,7 +34,7 @@ ebb0:
    v2 = bextend.b32 v1
    v3 = bxor v0, v2
 }
-; sameln: function %bvalues() native {
+; sameln: function %bvalues() system_v {
 ; nextln: ebb0:
 ; nextln:     v0 = bconst.b32 true
 ; nextln:     v1 = bconst.b8 false
@@ -47,17 +47,17 @@ function %select() {
 ebb0(v90: i32, v91: i32, v92: b1):
    v0 = select v92, v90, v91
 }
-; sameln: function %select() native {
+; sameln: function %select() system_v {
 ; nextln: ebb0(v90: i32, v91: i32, v92: b1):
 ; nextln:     v0 = select v92, v90, v91
 ; nextln: }

 ; Polymorphic instruction controlled by third operand.
-function %selectif() native {
+function %selectif() system_v {
 ebb0(v95: i32, v96: i32, v97: b1):
    v98 = selectif.i32 eq v97, v95, v96
 }
-; sameln: function %selectif() native {
+; sameln: function %selectif() system_v {
 ; nextln: ebb0(v95: i32, v96: i32, v97: b1):
 ; nextln: v98 = selectif.i32 eq v97, v95, v96
 ; nextln: }
@@ -69,7 +69,7 @@ ebb0:
    v1 = extractlane v0, 3
    v2 = insertlane v0, 1, v1
 }
-; sameln: function %lanes() native {
+; sameln: function %lanes() system_v {
 ; nextln: ebb0:
 ; nextln:     v0 = iconst.i32x4 2
 ; nextln:     v1 = extractlane v0, 3
@@ -85,7 +85,7 @@ ebb0(v90: i32, v91: i32):
    v3 = irsub_imm v91, 45
    br_icmp eq v90, v91, ebb0(v91, v90)
 }
-; sameln: function %icmp(i32, i32) native {
+; sameln: function %icmp(i32, i32) system_v {
 ; nextln: ebb0(v90: i32, v91: i32):
 ; nextln:     v0 = icmp eq v90, v91
 ; nextln:     v1 = icmp ult v90, v91
@@ -101,7 +101,7 @@ ebb0(v90: f32, v91: f32):
    v1 = fcmp uno v90, v91
    v2 = fcmp lt v90, v91
 }
-; sameln: function %fcmp(f32, f32) native {
+; sameln: function %fcmp(f32, f32) system_v {
 ; nextln: ebb0(v90: f32, v91: f32):
 ; nextln:     v0 = fcmp eq v90, v91
 ; nextln:     v1 = fcmp uno v90, v91
@@ -115,7 +115,7 @@ ebb0(v90: i32, v91: f32):
    v0 = bitcast.i8x4 v90
    v1 = bitcast.i32 v91
 }
-; sameln: function %bitcast(i32, f32) native {
+; sameln: function %bitcast(i32, f32) system_v {
 ; nextln: ebb0(v90: i32, v91: f32):
 ; nextln:     v0 = bitcast.i8x4 v90
 ; nextln:     v1 = bitcast.i32 v91
@@ -135,7 +135,7 @@ ebb0:
    stack_store v1, ss10+2
    stack_store v2, ss2
 }
-; sameln: function %stack() native {
+; sameln: function %stack() system_v {
 ; check:     ss2 = explicit_slot 4
 ; check:     ss3 = incoming_arg 4, offset 8
 ; check:     ss4 = outgoing_arg 4
@@ -162,7 +162,7 @@ ebb0(v1: i32):
    store aligned v3, v1+12
    store notrap aligned v3, v1-12
 }
-; sameln: function %memory(i32) native {
+; sameln: function %memory(i32) system_v {
 ; nextln: ebb0(v1: i32):
 ; nextln:     v2 = load.i64 v1
 ; nextln:     v3 = load.i64 aligned v1
@@ -187,7 +187,7 @@ ebb0(v1: i32):
    regfill v1, ss0 -> %10
    return
 }
-; sameln: function %diversion(i32) native {
+; sameln: function %diversion(i32) system_v {
 ; nextln:     ss0 = spill_slot 4
 ; check: ebb0(v1: i32):
 ; nextln:     regmove v1, %10 -> %20
@@ -204,7 +204,7 @@ ebb0:
    copy_special %20 -> %10
    return
 }
-; sameln: function %copy_special() native {
+; sameln: function %copy_special() system_v {
 ; nextln: ebb0:
 ; nextln:     copy_special %10 -> %20
 ; nextln:     copy_special %20 -> %10
--- a/cranelift/filetests/postopt/basic.cton
+++ b/cranelift/filetests/postopt/basic.cton
@@ -0,0 +1,100 @@
+test postopt
+isa intel
+
+; Test that compare+branch sequences are folded effectively on x86.
+
+function %br_icmp(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+[Op1icscc#39,%rdx]  v2 = icmp slt v0, v1
+[Op1t8jccd_long#85] brnz v2, ebb1
+[Op1ret#c3]         return v1
+
+ebb1:
+[Op1puid#b8,%rax]   v8 = iconst.i32 3
+[Op1ret#c3]         return v8
+}
+; sameln: function %br_icmp
+; nextln: ebb0(v0: i32, v1: i32):
+; nextln:    v9 = ifcmp v0, v1
+; nextln:    v2 = trueif slt v9
+; nextln:    brif slt v9, ebb1
+; nextln:    return v1
+; nextln: 
+; nextln: ebb1:
+; nextln:    v8 = iconst.i32 3
+; nextln:    return v8
+; nextln: }
+
+; Use brz instead of brnz, so the condition is inverted.
+
+function %br_icmp_inverse(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+[Op1icscc#39,%rdx]  v2 = icmp slt v0, v1
+[Op1t8jccd_long#84] brz v2, ebb1
+[Op1ret#c3]         return v1
+
+ebb1:
+[Op1puid#b8,%rax]   v8 = iconst.i32 3
+[Op1ret#c3]         return v8
+}
+; sameln: function %br_icmp_inverse
+; nextln: ebb0(v0: i32, v1: i32):
+; nextln:    v9 = ifcmp v0, v1
+; nextln:    v2 = trueif slt v9
+; nextln:    brif sge v9, ebb1
+; nextln:    return v1
+; nextln: 
+; nextln: ebb1:
+; nextln:    v8 = iconst.i32 3
+; nextln:    return v8
+; nextln: }
+
+; Use icmp_imm instead of icmp.
+
+function %br_icmp_imm(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+[Op1icsccib#7083]   v2 = icmp_imm slt v0, 2
+[Op1t8jccd_long#84] brz v2, ebb1
+[Op1ret#c3]         return v1
+
+ebb1:
+[Op1puid#b8,%rax]   v8 = iconst.i32 3
+[Op1ret#c3]         return v8
+}
+; sameln: function %br_icmp_imm
+; nextln: ebb0(v0: i32, v1: i32):
+; nextln:    v9 = ifcmp_imm v0, 2
+; nextln:    v2 = trueif slt v9
+; nextln:    brif sge v9, ebb1
+; nextln:    return v1
+; nextln: 
+; nextln: ebb1:
+; nextln:    v8 = iconst.i32 3
+; nextln:    return v8
+; nextln: }
+
+; Use fcmp instead of icmp.
+
+function %br_fcmp(f32, f32) -> f32 {
+ebb0(v0: f32, v1: f32):
+[Op2fcscc#42e,%rdx] v2 = fcmp gt v0, v1
+[Op1t8jccd_long#84] brz v2, ebb1
+[Op1ret#c3]         return v1
+
+ebb1:
+[Op1puid#b8,%rax]    v18 = iconst.i32 0x40a8_0000
+[Mp2frurm#56e,%xmm0] v8 = bitcast.f32 v18
+[Op1ret#c3]         return v8
+}
+; sameln: function %br_fcmp
+; nextln: ebb0(v0: f32, v1: f32):
+; nextln:    v19 = ffcmp v0, v1
+; nextln:    v2 = trueff gt v19
+; nextln:    brff ule v19, ebb1
+; nextln:    return v1
+; nextln: 
+; nextln: ebb1:
+; nextln:    v18 = iconst.i32 0x40a8_0000
+; nextln:    v8 = bitcast.f32 v18
+; nextln:    return v8
+; nextln: }
--- a/cranelift/filetests/preopt/simplify.cton
+++ b/cranelift/filetests/preopt/simplify.cton
@@ -0,0 +1,80 @@
+test preopt
+isa intel
+
+function %iadd_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = iadd v0, v1
+    return v2
+}
+; sameln: function %iadd_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, 2
+; nextln:     return v2
+; nextln: }
+
+function %isub_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = isub v0, v1
+    return v2
+}
+; sameln: function %isub_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, -2
+; nextln:     return v2
+; nextln: }
+
+function %icmp_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = icmp slt v0, v1
+    v3 = bint.i32 v2
+    return v3
+}
+; sameln: function %icmp_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = icmp_imm slt v0, 2
+; nextln:     v3 = bint.i32 v2
+; nextln:     return v3
+; nextln: }
+
+function %brz_bint(i32) {
+ebb0(v0: i32):
+    v3 = icmp_imm slt v0, 0
+    v1 = bint.i32 v3
+    v2 = select v1, v1, v1
+    trapz v1, user0
+    brz v1, ebb1
+    jump ebb2
+
+ebb1:
+    return
+
+ebb2:
+    return
+}
+; sameln: function %brz_bint
+; nextln: (v0: i32):
+; nextln:    v3 = icmp_imm slt v0, 0
+; nextln:    v1 = bint.i32 v3
+; nextln:    v2 = select v3, v1, v1
+; nextln:    trapz v3, user0
+; nextln:    brz v3, ebb1
+; nextln:    jump ebb2
+
+function %irsub_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = isub v1, v0
+    return v2
+}
+; sameln: function %irsub_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = irsub_imm v1, 2
+; nextln:     return v2
+; nextln: }
--- a/cranelift/filetests/regalloc/coalesce.cton
+++ b/cranelift/filetests/regalloc/coalesce.cton
@@ -109,7 +109,7 @@ ebb1(v10: i32):
    return v11
 }

-function %gvn_unremovable_phi(i32) native {
+function %gvn_unremovable_phi(i32) system_v {
 ebb0(v0: i32):
    v2 = iconst.i32 0
    jump ebb2(v2, v0)
--- a/cranelift/filetests/regalloc/coalescing-207.cton
+++ b/cranelift/filetests/regalloc/coalescing-207.cton
@@ -5,12 +5,12 @@ isa intel haswell
 ; Reported as https://github.com/Cretonne/cretonne/issues/207
 ;
 ; The coalescer creates a virtual register with two interfering values.
-function %pr207(i64 vmctx, i32, i32) -> i32 native {
+function %pr207(i64 vmctx, i32, i32) -> i32 system_v {
    gv0 = vmctx-8
    heap0 = static gv0, min 0, bound 0x5000, guard 0x0040_0000
-    sig0 = (i64 vmctx, i32, i32) -> i32 native
-    sig1 = (i64 vmctx, i32, i32, i32) -> i32 native
-    sig2 = (i64 vmctx, i32, i32, i32) -> i32 native
+    sig0 = (i64 vmctx, i32, i32) -> i32 system_v
+    sig1 = (i64 vmctx, i32, i32, i32) -> i32 system_v
+    sig2 = (i64 vmctx, i32, i32, i32) -> i32 system_v
    fn0 = sig0 u0:2
    fn1 = sig1 u0:0
    fn2 = sig2 u0:1
@@ -1034,10 +1034,10 @@ ebb92(v767: i32):
 }

 ; Same problem from musl.wasm.
-function %musl(f64 [%xmm0], i64 vmctx [%rdi]) -> f64 [%xmm0] native {
+function %musl(f64 [%xmm0], i64 vmctx [%rdi]) -> f64 [%xmm0] system_v {
    gv0 = vmctx
    heap0 = static gv0, min 0, bound 0x0001_0000_0000, guard 0x8000_0000
-    sig0 = (f64 [%xmm0], i32 [%rdi], i64 vmctx [%rsi]) -> f64 [%xmm0] native
+    sig0 = (f64 [%xmm0], i32 [%rdi], i64 vmctx [%rsi]) -> f64 [%xmm0] system_v
    fn0 = sig0 u0:517

 ebb0(v0: f64, v1: i64):
--- a/cranelift/filetests/regalloc/coalescing-216.cton
+++ b/cranelift/filetests/regalloc/coalescing-216.cton
@@ -5,7 +5,7 @@ isa intel haswell
 ; Reported as https://github.com/Cretonne/cretonne/issues/216 from the Binaryen fuzzer.
 ;
 ; The (old) coalescer creates a virtual register with two identical values.
-function %pr216(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] native {
+function %pr216(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v {
 ebb0(v0: i32, v1: i64):
    v3 = iconst.i64 0
    v5 = iconst.i32 0
--- a/cranelift/filetests/regalloc/coloring-227.cton
+++ b/cranelift/filetests/regalloc/coloring-227.cton
@@ -2,7 +2,7 @@ test regalloc
 set is_64bit
 isa intel haswell

-function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) native {
+function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) system_v {
    gv0 = vmctx
    heap0 = static gv0, min 0, bound 0x0001_0000_0000, guard 0x8000_0000

@@ -21,7 +21,7 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
@0011 [RexOp1puid#b8]               v9 = iconst.i32 0
@0015 [RexOp1puid#b8]               v11 = iconst.i32 0
@0017 [RexOp1icscc#39]              v12 = icmp.i32 eq v15, v11
-@0017 [RexOp2urm#4b6]               v13 = bint.i32 v12
+@0017 [RexOp2urm_noflags#4b6]       v13 = bint.i32 v12
@001a [RexOp1rr#21]                 v14 = band v9, v13
@001b [RexOp1tjccb#75]              brnz v14, ebb6
@001d [RexOp1jmpb#eb]               jump ebb7
--- a/cranelift/filetests/regalloc/ghost-param.cton
+++ b/cranelift/filetests/regalloc/ghost-param.cton
@@ -9,7 +9,7 @@ isa intel haswell
 ;
 ; Test case by binaryen fuzzer!

-function %pr215(i64 vmctx [%rdi]) native {
+function %pr215(i64 vmctx [%rdi]) system_v {
 ebb0(v0: i64):
    v10 = iconst.i64 0
    v1 = bitcast.f64 v10
--- a/cranelift/filetests/regalloc/global-fixed.cton
+++ b/cranelift/filetests/regalloc/global-fixed.cton
@@ -2,7 +2,7 @@ test regalloc
 set is_64bit=1
 isa intel haswell

-function %foo() native {
+function %foo() system_v {
 ebb4:
    v3 = iconst.i32 0
    jump ebb3
--- a/cranelift/filetests/regalloc/intel-regres.cton
+++ b/cranelift/filetests/regalloc/intel-regres.cton
@@ -11,7 +11,7 @@ isa intel
 ; This ended up confusong the constraint solver which had not made a record of
 ; the fixed register assignment for v9 since it was already in the correct
 ; register.
-function %pr147(i32) -> i32 native {
+function %pr147(i32) -> i32 system_v {
 ebb0(v0: i32):
    v1 = iconst.i32 0
    v2 = iconst.i32 1
--- a/cranelift/filetests/regalloc/output-interference.cton
+++ b/cranelift/filetests/regalloc/output-interference.cton
@@ -2,7 +2,7 @@ test regalloc
 set is_64bit=1
 isa intel haswell

-function %test(i64) -> i64 native {
+function %test(i64) -> i64 system_v {
 ebb0(v0: i64):
    v2 = iconst.i64 12
    ; This division clobbers two of its fixed input registers on Intel.
--- a/cranelift/filetests/regalloc/reload-208.cton
+++ b/cranelift/filetests/regalloc/reload-208.cton
@@ -11,11 +11,11 @@ isa intel haswell
 ;
 ; The problem was the reload pass rewriting EBB arguments on "brnz v9, ebb3(v9)"

-function %pr208(i64 vmctx [%rdi]) native {
+function %pr208(i64 vmctx [%rdi]) system_v {
    gv0 = vmctx-8
    heap0 = static gv0, min 0, bound 0x5000, guard 0x0040_0000
-    sig0 = (i64 vmctx [%rdi]) -> i32 [%rax] native
-    sig1 = (i64 vmctx [%rdi], i32 [%rsi]) native
+    sig0 = (i64 vmctx [%rdi]) -> i32 [%rax] system_v
+    sig1 = (i64 vmctx [%rdi], i32 [%rsi]) system_v
    fn0 = sig0 u0:1
    fn1 = sig1 u0:3

--- a/cranelift/filetests/regalloc/reload.cton
+++ b/cranelift/filetests/regalloc/reload.cton
@@ -5,7 +5,7 @@ isa riscv enable_e

 ; Check that we can handle a function return value that got spilled.
 function %spill_return() -> i32 {
-    fn0 = function %foo() -> i32 native
+    fn0 = function %foo() -> i32 system_v

 ebb0:
    v0 = call fn0()
--- a/cranelift/filetests/regalloc/schedule-moves.cton
+++ b/cranelift/filetests/regalloc/schedule-moves.cton
@@ -1,7 +1,7 @@
 test regalloc
 isa intel haswell

-function %pr165() native {
+function %pr165() system_v {
 ebb0:
    v0 = iconst.i32 0x0102_0304
    v1 = iconst.i32 0x1102_0304
@@ -19,7 +19,7 @@ ebb0:

 ; Same as above, but use so many registers that spilling is required.
 ; Note: This is also a candidate for using xchg instructions.
-function %emergency_spill() native {
+function %emergency_spill() system_v {
 ebb0:
    v0 = iconst.i32 0x0102_0304
    v1 = iconst.i32 0x1102_0304
--- a/cranelift/filetests/regalloc/spill-noregs.cton
+++ b/cranelift/filetests/regalloc/spill-noregs.cton
@@ -13,7 +13,7 @@ isa intel
 ;
 ; The spiller was not releasing register pressure for dead EBB parameters.

-function %pr223(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] native {
+function %pr223(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v {
 ebb0(v0: i32, v1: i64):
    v2 = iconst.i32 0
    v3 = iconst.i64 0
--- a/cranelift/filetests/regalloc/spill.cton
+++ b/cranelift/filetests/regalloc/spill.cton
@@ -93,7 +93,7 @@ ebb0(v0: i32):

 ; The same value used as indirect callee and argument.
 function %doubleuse_icall1(i32) {
-    sig0 = (i32) native
+    sig0 = (i32) system_v
 ebb0(v0: i32):
    ; not:copy
    call_indirect sig0, v0(v0)
@@ -102,7 +102,7 @@ ebb0(v0: i32):

 ; The same value used as indirect callee and two arguments.
 function %doubleuse_icall2(i32) {
-    sig0 = (i32, i32) native
+    sig0 = (i32, i32) system_v
 ebb0(v0: i32):
    ; check: $(c=$V) = copy v0
    call_indirect sig0, v0(v0, v0)
--- a/cranelift/filetests/verifier/defs_dominates_uses.cton
+++ b/cranelift/filetests/verifier/defs_dominates_uses.cton
@@ -0,0 +1,16 @@
+test verifier
+
+; Test verification that uses properly dominate defs.
+
+function %non_dominating(i32) -> i32 system_v {
+ebb0(v0: i32):
+    v1 = iadd.i32 v2, v0   ; error: uses value from non-dominating
+    v2 = iadd.i32 v1, v0
+    return v2
+}
+
+function %inst_uses_its_own_values(i32) -> i32 system_v {
+ebb0(v0: i32):
+    v1 = iadd.i32 v1, v0   ; error: uses value from itself
+    return v1
+}
--- a/cranelift/filetests/verifier/flags.cton
+++ b/cranelift/filetests/verifier/flags.cton
@@ -6,7 +6,7 @@ function %simple(i32) -> i32 {
                    ebb0(v0: i32):
    [Op1rcmp#39]              v1 = ifcmp v0, v0
    [Op2seti_abcd#490]        v2 = trueif ugt v1
-    [Op2urm_abcd#4b6]   v3 = bint.i32 v2
+    [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
    [Op1ret#c3]               return v3
 }

@@ -18,7 +18,7 @@ function %overlap(i32, f32) -> i32 {
    [Op2setf_abcd#490]        v4 = trueff gt v3 ; error: conflicting live CPU flags: v2 and v3
    [Op2seti_abcd#490]        v5 = trueif ugt v2
    [Op1rr#21]                v6 = band v4, v5
-    [Op2urm_abcd#4b6]   v7 = bint.i32 v6
+    [Op2urm_noflags_abcd#4b6] v7 = bint.i32 v6
    [Op1ret#c3]               return v7
 }

@@ -28,7 +28,7 @@ function %clobbered(i32) -> i32 {
    [Op1rcmp#39]              v1 = ifcmp v0, v0
    [Op1rr#01]                v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1
    [Op2seti_abcd#490]        v3 = trueif ugt v1
-    [Op2urm_abcd#4b6]   v4 = bint.i32 v3
+    [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3
    [Op1ret#c3]               return v4
 }

@@ -38,7 +38,7 @@ function %live_across_load(i32) -> i32 {
    [Op1rcmp#39]              v1 = ifcmp v0, v0
    [Op1ld#8b]                v2 = load.i32 v0
    [Op2seti_abcd#490]        v3 = trueif ugt v1
-    [Op2urm_abcd#4b6]   v4 = bint.i32 v3
+    [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3
    [Op1ret#c3]               return v4
 }

@@ -49,7 +49,7 @@ function %live_across_ebb(i32) -> i32 {
    [Op1jmpb#eb]              jump ebb1
                          ebb1:
    [Op2seti_abcd#490]        v2 = trueif ugt v1
-    [Op2urm_abcd#4b6]   v3 = bint.i32 v2
+    [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
    [Op1ret#c3]               return v3
 }

@@ -58,7 +58,7 @@ function %live_across_ebb_backwards(i32) -> i32 {
    [Op1jmpb#eb]              jump ebb2
                          ebb1:
    [Op2seti_abcd#490]        v2 = trueif ugt v1
-    [Op2urm_abcd#4b6]   v3 = bint.i32 v2
+    [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
    [Op1ret#c3]               return v3
                          ebb2:
    [Op1rcmp#39]              v1 = ifcmp v0, v0
--- a/cranelift/publish-all.sh
+++ b/cranelift/publish-all.sh
@@ -4,17 +4,13 @@ cd $(dirname "$0")
 topdir="$(pwd)"

 # All the cretonne-* crates have the same version number
-# The filecheck crate version is managed independently.
-version="0.3.4"
+version="0.4.1"

 # Update all of the Cargo.toml files.
 #
 # The main Cargo.toml in the top-level directory is the cretonne-tools crate which we don't publish.
 echo "Updating crate versions to $version"
 for crate in . lib/*; do
-    if [ "$crate" = "lib/filecheck" ]; then
-        continue
-    fi
    # Update the version number of this crate to $version.
    sed -i.bk -e "s/^version = .*/version = \"$version\"/" "$crate/Cargo.toml"
    # Update the required version number of any cretonne* dependencies.
@@ -31,7 +27,7 @@ cargo update

 echo git commit -a -m "\"Bump version to $version"\"
 echo git push
-for crate in filecheck cretonne frontend native reader wasm; do
+for crate in cretonne frontend native reader wasm; do
    echo cargo publish --manifest-path "lib/$crate/Cargo.toml"
 done
 echo
--- a/cranelift/src/cat.rs
+++ b/cranelift/src/cat.rs
@@ -1,16 +1,13 @@
 //! The `cat` sub-command.
 //!
-//! Read a sequence of Cretonne IL files and print them again to stdout. This has the effect of
+//! Read a sequence of Cretonne IR files and print them again to stdout. This has the effect of
 //! normalizing formatting and removing comments.

-use std::borrow::Cow;
-use cretonne::ir::Function;
-use cton_reader::{parse_functions, TestCommand};
 use CommandResult;
+use cton_reader::parse_functions;
 use utils::read_to_string;
-use filetest::subtest::{self, SubTest, Context, Result as STResult};

-pub fn run(files: Vec<String>) -> CommandResult {
+pub fn run(files: &[String]) -> CommandResult {
    for (i, f) in files.into_iter().enumerate() {
        if i != 0 {
            println!();
@@ -20,7 +17,7 @@ pub fn run(files: Vec<String>) -> CommandResult {
    Ok(())
 }

-fn cat_one(filename: String) -> CommandResult {
+fn cat_one(filename: &str) -> CommandResult {
    let buffer = read_to_string(&filename).map_err(
        |e| format!("{}: {}", filename, e),
    )?;
@@ -37,34 +34,3 @@ fn cat_one(filename: String) -> CommandResult {

    Ok(())
 }
-
-/// Object implementing the `test cat` sub-test.
-///
-/// This command is used for testing the parser and function printer. It simply parses a function
-/// and prints it out again.
-///
-/// The result is verified by filecheck.
-struct TestCat;
-
-pub fn subtest(parsed: &TestCommand) -> STResult<Box<SubTest>> {
-    assert_eq!(parsed.command, "cat");
-    if !parsed.options.is_empty() {
-        Err(format!("No options allowed on {}", parsed))
-    } else {
-        Ok(Box::new(TestCat))
-    }
-}
-
-impl SubTest for TestCat {
-    fn name(&self) -> Cow<str> {
-        Cow::from("cat")
-    }
-
-    fn needs_verifier(&self) -> bool {
-        false
-    }
-
-    fn run(&self, func: Cow<Function>, context: &Context) -> STResult<()> {
-        subtest::run_filecheck(&func.display(context.isa).to_string(), context)
-    }
-}
--- a/cranelift/src/compile.rs
+++ b/cranelift/src/compile.rs
@@ -1,14 +1,13 @@
-//! CLI tool to compile cretonne IL into native code.
-//!
-//! Reads IR files into Cretonne IL and compiles it.
+//! CLI tool to read Cretonne IR files and compile them into native code.

-use cton_reader::parse_test;
-use std::path::PathBuf;
 use cretonne::Context;
+use cretonne::print_errors::pretty_error;
 use cretonne::settings::FlagsOrIsa;
 use cretonne::{binemit, ir};
+use cton_reader::parse_test;
 use std::path::Path;
-use utils::{pretty_error, read_to_string, parse_sets_and_isa};
+use std::path::PathBuf;
+use utils::{parse_sets_and_isa, read_to_string};

 struct PrintRelocs {
    flag_print: bool,
@@ -45,26 +44,38 @@ impl binemit::RelocSink for PrintRelocs {
    }
 }

+struct PrintTraps {
+    flag_print: bool,
+}
+
+impl binemit::TrapSink for PrintTraps {
+    fn trap(&mut self, offset: binemit::CodeOffset, _srcloc: ir::SourceLoc, code: ir::TrapCode) {
+        if self.flag_print {
+            println!("trap: {} at {}", code, offset);
+        }
+    }
+}
+
 pub fn run(
    files: Vec<String>,
    flag_print: bool,
-    flag_set: Vec<String>,
-    flag_isa: String,
+    flag_set: &[String],
+    flag_isa: &str,
 ) -> Result<(), String> {
    let parsed = parse_sets_and_isa(flag_set, flag_isa)?;

    for filename in files {
        let path = Path::new(&filename);
        let name = String::from(path.as_os_str().to_string_lossy());
-        handle_module(flag_print, path.to_path_buf(), name, parsed.as_fisa())?;
+        handle_module(flag_print, &path.to_path_buf(), &name, parsed.as_fisa())?;
    }
    Ok(())
 }

 fn handle_module(
    flag_print: bool,
-    path: PathBuf,
-    name: String,
+    path: &PathBuf,
+    name: &str,
    fisa: FlagsOrIsa,
 ) -> Result<(), String> {
    let buffer = read_to_string(&path).map_err(
@@ -95,8 +106,9 @@ fn handle_module(
        // Encode the result as machine code.
        let mut mem = Vec::new();
        let mut relocs = PrintRelocs { flag_print };
+        let mut traps = PrintTraps { flag_print };
        mem.resize(size as usize, 0);
-        context.emit_to_memory(mem.as_mut_ptr(), &mut relocs, &*isa);
+        context.emit_to_memory(mem.as_mut_ptr(), &mut relocs, &mut traps, &*isa);

        if flag_print {
            print!(".byte ");
--- a/cranelift/src/cton-util.rs
+++ b/cranelift/src/cton-util.rs
@@ -1,27 +1,25 @@
-#[macro_use(dbg)]
 extern crate cretonne;
+extern crate cton_filetests;
 extern crate cton_reader;
 extern crate cton_wasm;
 extern crate docopt;
+extern crate filecheck;
 #[macro_use]
 extern crate serde_derive;
-extern crate filecheck;
-extern crate num_cpus;
 extern crate tempdir;
 extern crate term;

-use cretonne::{VERSION, timing};
+use cretonne::{timing, VERSION};
 use docopt::Docopt;
 use std::io::{self, Write};
 use std::process;

-mod utils;
-mod filetest;
 mod cat;
+mod compile;
 mod print_cfg;
 mod rsfilecheck;
+mod utils;
 mod wasm;
-mod compile;

 const USAGE: &str = "
 Cretonne code generator utility
@@ -40,12 +38,12 @@ Options:
    -T, --time-passes
                    print pass timing report
    -t, --just-decode
-                    just decode WebAssembly to Cretonne IL
+                    just decode WebAssembly to Cretonne IR
    -s, --print-size
                    prints generated code size
    -c, --check-translation
-                    just checks the correctness of Cretonne IL translated from WebAssembly
-    -p, --print     print the resulting Cretonne IL
+                    just checks the correctness of Cretonne IR translated from WebAssembly
+    -p, --print     print the resulting Cretonne IR
    -h, --help      print this help message
    --set=<set>     configure Cretonne settings
    --isa=<isa>     specify the Cretonne ISA
@@ -88,15 +86,20 @@ fn cton_util() -> CommandResult {

    // Find the sub-command to execute.
    let result = if args.cmd_test {
-        filetest::run(args.flag_verbose, args.arg_file)
+        cton_filetests::run(args.flag_verbose, &args.arg_file).map(|_time| ())
    } else if args.cmd_cat {
-        cat::run(args.arg_file)
+        cat::run(&args.arg_file)
    } else if args.cmd_filecheck {
-        rsfilecheck::run(args.arg_file, args.flag_verbose)
+        rsfilecheck::run(&args.arg_file, args.flag_verbose)
    } else if args.cmd_print_cfg {
-        print_cfg::run(args.arg_file)
+        print_cfg::run(&args.arg_file)
    } else if args.cmd_compile {
-        compile::run(args.arg_file, args.flag_print, args.flag_set, args.flag_isa)
+        compile::run(
+            args.arg_file,
+            args.flag_print,
+            &args.flag_set,
+            &args.flag_isa,
+        )
    } else if args.cmd_wasm {
        wasm::run(
            args.arg_file,
@@ -104,8 +107,8 @@ fn cton_util() -> CommandResult {
            args.flag_just_decode,
            args.flag_check_translation,
            args.flag_print,
-            args.flag_set,
-            args.flag_isa,
+            &args.flag_set,
+            &args.flag_isa,
            args.flag_print_size,
        )
    } else {
--- a/cranelift/src/filetest/mod.rs
+++ b/cranelift/src/filetest/mod.rs
@@ -1,75 +0,0 @@
-//! File tests.
-//!
-//! This module contains the main driver for `cton-util test` as well as implementations of the
-//! available test commands.
-
-use std::path::Path;
-use std::time;
-use cton_reader::TestCommand;
-use CommandResult;
-use cat;
-use print_cfg;
-use filetest::runner::TestRunner;
-
-pub mod subtest;
-
-mod binemit;
-mod compile;
-mod concurrent;
-mod domtree;
-mod legalizer;
-mod licm;
-mod preopt;
-mod regalloc;
-mod runner;
-mod runone;
-mod simple_gvn;
-mod verifier;
-
-/// The result of running the test in a file.
-pub type TestResult = Result<time::Duration, String>;
-
-/// Main entry point for `cton-util test`.
-///
-/// Take a list of filenames which can be either `.cton` files or directories.
-///
-/// Files are interpreted as test cases and executed immediately.
-///
-/// Directories are scanned recursively for test cases ending in `.cton`. These test cases are
-/// executed on background threads.
-///
-pub fn run(verbose: bool, files: Vec<String>) -> CommandResult {
-    let mut runner = TestRunner::new(verbose);
-
-    for path in files.iter().map(Path::new) {
-        if path.is_file() {
-            runner.push_test(path);
-        } else {
-            runner.push_dir(path);
-        }
-    }
-
-    runner.start_threads();
-    runner.run()
-}
-
-/// Create a new subcommand trait object to match `parsed.command`.
-///
-/// This function knows how to create all of the possible `test <foo>` commands that can appear in
-/// a `.cton` test file.
-fn new_subtest(parsed: &TestCommand) -> subtest::Result<Box<subtest::SubTest>> {
-    match parsed.command {
-        "binemit" => binemit::subtest(parsed),
-        "cat" => cat::subtest(parsed),
-        "compile" => compile::subtest(parsed),
-        "domtree" => domtree::subtest(parsed),
-        "legalizer" => legalizer::subtest(parsed),
-        "licm" => licm::subtest(parsed),
-        "preopt" => preopt::subtest(parsed),
-        "print-cfg" => print_cfg::subtest(parsed),
-        "regalloc" => regalloc::subtest(parsed),
-        "simple-gvn" => simple_gvn::subtest(parsed),
-        "verifier" => verifier::subtest(parsed),
-        _ => Err(format!("unknown test command '{}'", parsed.command)),
-    }
-}
--- a/cranelift/src/print_cfg.rs
+++ b/cranelift/src/print_cfg.rs
@@ -1,20 +1,14 @@
 //! The `print-cfg` sub-command.
 //!
-//! Read a series of Cretonne IL files and print their control flow graphs
+//! Read a series of Cretonne IR files and print their control flow graphs
 //! in graphviz format.

-use std::borrow::Cow;
-use std::fmt::{Result, Write, Display, Formatter};
-
 use CommandResult;
-use cretonne::flowgraph::ControlFlowGraph;
-use cretonne::ir::Function;
-use cretonne::ir::instructions::BranchInfo;
-use cton_reader::{parse_functions, TestCommand};
-use filetest::subtest::{self, SubTest, Context, Result as STResult};
+use cretonne::cfg_printer::CFGPrinter;
+use cton_reader::parse_functions;
 use utils::read_to_string;

-pub fn run(files: Vec<String>) -> CommandResult {
+pub fn run(files: &[String]) -> CommandResult {
    for (i, f) in files.into_iter().enumerate() {
        if i != 0 {
            println!();
@@ -24,74 +18,8 @@ pub fn run(files: Vec<String>) -> CommandResult {
    Ok(())
 }

-struct CFGPrinter<'a> {
-    func: &'a Function,
-    cfg: ControlFlowGraph,
-}
-
-impl<'a> CFGPrinter<'a> {
-    pub fn new(func: &'a Function) -> CFGPrinter<'a> {
-        CFGPrinter {
-            func,
-            cfg: ControlFlowGraph::with_function(func),
-        }
-    }
-
-    /// Write the CFG for this function to `w`.
-    pub fn write(&self, w: &mut Write) -> Result {
-        self.header(w)?;
-        self.ebb_nodes(w)?;
-        self.cfg_connections(w)?;
-        writeln!(w, "}}")
-    }
-
-    fn header(&self, w: &mut Write) -> Result {
-        writeln!(w, "digraph \"{}\" {{", self.func.name)?;
-        if let Some(entry) = self.func.layout.entry_block() {
-            writeln!(w, "    {{rank=min; {}}}", entry)?;
-        }
-        Ok(())
-    }
-
-    fn ebb_nodes(&self, w: &mut Write) -> Result {
-        for ebb in &self.func.layout {
-            write!(w, "    {} [shape=record, label=\"{{{}", ebb, ebb)?;
-            // Add all outgoing branch instructions to the label.
-            for inst in self.func.layout.ebb_insts(ebb) {
-                let idata = &self.func.dfg[inst];
-                match idata.analyze_branch(&self.func.dfg.value_lists) {
-                    BranchInfo::SingleDest(dest, _) => {
-                        write!(w, " | <{}>{} {}", inst, idata.opcode(), dest)?
-                    }
-                    BranchInfo::Table(table) => {
-                        write!(w, " | <{}>{} {}", inst, idata.opcode(), table)?
-                    }
-                    BranchInfo::NotABranch => {}
-                }
-            }
-            writeln!(w, "}}\"]")?
-        }
-        Ok(())
-    }
-
-    fn cfg_connections(&self, w: &mut Write) -> Result {
-        for ebb in &self.func.layout {
-            for (parent, inst) in self.cfg.pred_iter(ebb) {
-                writeln!(w, "    {}:{} -> {}", parent, inst, ebb)?;
-            }
-        }
-        Ok(())
-    }
-}
-
-impl<'a> Display for CFGPrinter<'a> {
-    fn fmt(&self, f: &mut Formatter) -> Result {
-        self.write(f)
-    }
-}
-
-fn print_cfg(filename: String) -> CommandResult {
-    let buffer = read_to_string(&filename).map_err(
+fn print_cfg(filename: &str) -> CommandResult {
+    let buffer = read_to_string(filename).map_err(
        |e| format!("{}: {}", filename, e),
    )?;
    let items = parse_functions(&buffer).map_err(
@@ -107,29 +35,3 @@ fn print_cfg(filename: String) -> CommandResult {

    Ok(())
 }
-
-/// Object implementing the `test print-cfg` sub-test.
-struct TestPrintCfg;
-
-pub fn subtest(parsed: &TestCommand) -> STResult<Box<SubTest>> {
-    assert_eq!(parsed.command, "print-cfg");
-    if !parsed.options.is_empty() {
-        Err(format!("No options allowed on {}", parsed))
-    } else {
-        Ok(Box::new(TestPrintCfg))
-    }
-}
-
-impl SubTest for TestPrintCfg {
-    fn name(&self) -> Cow<str> {
-        Cow::from("print-cfg")
-    }
-
-    fn needs_verifier(&self) -> bool {
-        false
-    }
-
-    fn run(&self, func: Cow<Function>, context: &Context) -> STResult<()> {
-        subtest::run_filecheck(&CFGPrinter::new(&func).to_string(), context)
-    }
-}
--- a/cranelift/src/rsfilecheck.rs
+++ b/cranelift/src/rsfilecheck.rs
@@ -1,9 +1,13 @@
-use CommandResult;
-use utils::read_to_string;
-use filecheck::{CheckerBuilder, Checker, NO_VARIABLES};
-use std::io::{self, Read};
+//! The `filecheck` sub-command.
+//!
+//! This file is named to avoid a name collision with the filecheck crate.

-pub fn run(files: Vec<String>, verbose: bool) -> CommandResult {
+use CommandResult;
+use filecheck::{Checker, CheckerBuilder, NO_VARIABLES};
+use std::io::{self, Read};
+use utils::read_to_string;
+
+pub fn run(files: &[String], verbose: bool) -> CommandResult {
    if files.is_empty() {
        return Err("No check files".to_string());
    }
--- a/cranelift/src/utils.rs
+++ b/cranelift/src/utils.rs
@@ -1,13 +1,9 @@
 //! Utility functions.

-use cretonne::ir::entities::AnyEntity;
-use cretonne::{ir, verifier};
-use cretonne::result::CtonError;
+use cretonne::isa;
 use cretonne::isa::TargetIsa;
 use cretonne::settings::{self, FlagsOrIsa};
-use cretonne::isa;
 use cton_reader::{parse_options, Location};
-use std::fmt::Write;
 use std::fs::File;
 use std::io::{self, Read};
 use std::path::Path;
@@ -28,51 +24,6 @@ pub fn read_to_end<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
    Ok(buffer)
 }

-/// Look for a directive in a comment string.
-/// The directive is of the form "foo:" and should follow the leading `;` in the comment:
-///
-/// ; dominates: ebb3 ebb4
-///
-/// Return the comment text following the directive.
-pub fn match_directive<'a>(comment: &'a str, directive: &str) -> Option<&'a str> {
-    assert!(
-        directive.ends_with(':'),
-        "Directive must include trailing colon"
-    );
-    let text = comment.trim_left_matches(';').trim_left();
-    if text.starts_with(directive) {
-        Some(text[directive.len()..].trim())
-    } else {
-        None
-    }
-}
-
-/// Pretty-print a verifier error.
-pub fn pretty_verifier_error(
-    func: &ir::Function,
-    isa: Option<&TargetIsa>,
-    err: verifier::Error,
-) -> String {
-    let mut msg = err.to_string();
-    match err.location {
-        AnyEntity::Inst(inst) => {
-            write!(msg, "\n{}: {}\n\n", inst, func.dfg.display_inst(inst, isa)).unwrap()
-        }
-        _ => msg.push('\n'),
-    }
-    write!(msg, "{}", func.display(isa)).unwrap();
-    msg
-}
-
-/// Pretty-print a Cretonne error.
-pub fn pretty_error(func: &ir::Function, isa: Option<&TargetIsa>, err: CtonError) -> String {
-    if let CtonError::Verifier(e) = err {
-        pretty_verifier_error(func, isa, e)
-    } else {
-        err.to_string()
-    }
-}
-
 /// Like `FlagsOrIsa`, but holds ownership.
 pub enum OwnedFlagsOrIsa {
    Flags(settings::Flags),
@@ -90,10 +41,7 @@ impl OwnedFlagsOrIsa {
 }

 /// Parse "set" and "isa" commands.
-pub fn parse_sets_and_isa(
-    flag_set: Vec<String>,
-    flag_isa: String,
-) -> Result<OwnedFlagsOrIsa, String> {
+pub fn parse_sets_and_isa(flag_set: &[String], flag_isa: &str) -> Result<OwnedFlagsOrIsa, String> {
    let mut flag_builder = settings::builder();
    parse_options(
        flag_set.iter().map(|x| x.as_str()),
@@ -119,12 +67,3 @@ pub fn parse_sets_and_isa(
        Ok(OwnedFlagsOrIsa::Flags(settings::Flags::new(&flag_builder)))
    }
 }
-
-#[test]
-fn test_match_directive() {
-    assert_eq!(match_directive("; foo: bar  ", "foo:"), Some("bar"));
-    assert_eq!(match_directive(" foo:bar", "foo:"), Some("bar"));
-    assert_eq!(match_directive("foo:bar", "foo:"), Some("bar"));
-    assert_eq!(match_directive(";x foo: bar", "foo:"), None);
-    assert_eq!(match_directive(";;; foo: bar", "foo:"), Some("bar"));
-}
--- a/cranelift/src/wasm.rs
+++ b/cranelift/src/wasm.rs
@@ -1,19 +1,21 @@
 //! CLI tool to use the functions provided by the [cretonne-wasm](../cton_wasm/index.html) crate.
 //!
-//! Reads Wasm binary files, translates the functions' code to Cretonne IL.
+//! Reads Wasm binary files, translates the functions' code to Cretonne IR.
+#![cfg_attr(feature = "cargo-clippy", allow(too_many_arguments, cyclomatic_complexity))]

-use cton_wasm::{translate_module, DummyEnvironment, ModuleEnvironment};
-use std::path::PathBuf;
 use cretonne::Context;
+use cretonne::print_errors::{pretty_error, pretty_verifier_error};
 use cretonne::settings::FlagsOrIsa;
-use std::fs::File;
+use cton_wasm::{translate_module, DummyEnvironment, ModuleEnvironment};
 use std::error::Error;
+use std::fs::File;
 use std::io;
 use std::path::Path;
+use std::path::PathBuf;
 use std::process::Command;
 use tempdir::TempDir;
 use term;
-use utils::{pretty_verifier_error, pretty_error, parse_sets_and_isa, read_to_end};
+use utils::{parse_sets_and_isa, read_to_end};

 macro_rules! vprintln {
    ($x: expr, $($tts:tt)*) => {
@@ -37,8 +39,8 @@ pub fn run(
    flag_just_decode: bool,
    flag_check_translation: bool,
    flag_print: bool,
-    flag_set: Vec<String>,
-    flag_isa: String,
+    flag_set: &[String],
+    flag_isa: &str,
    flag_print_size: bool,
 ) -> Result<(), String> {
    let parsed = parse_sets_and_isa(flag_set, flag_isa)?;
@@ -52,8 +54,8 @@ pub fn run(
            flag_check_translation,
            flag_print,
            flag_print_size,
-            path.to_path_buf(),
-            name,
+            &path.to_path_buf(),
+            &name,
            parsed.as_fisa(),
        )?;
    }
@@ -66,8 +68,8 @@ fn handle_module(
    flag_check_translation: bool,
    flag_print: bool,
    flag_print_size: bool,
-    path: PathBuf,
-    name: String,
+    path: &PathBuf,
+    name: &str,
    fisa: FlagsOrIsa,
 ) -> Result<(), String> {
    let mut terminal = term::stdout().unwrap();
@@ -152,10 +154,9 @@ fn handle_module(
        context.func = func.clone();
        if flag_check_translation {
            context.verify(fisa).map_err(|err| {
-                pretty_verifier_error(&context.func, fisa.isa, err)
+                pretty_verifier_error(&context.func, fisa.isa, &err)
            })?;
-        } else {
-            if let Some(isa) = fisa.isa {
+        } else if let Some(isa) = fisa.isa {
            let compiled_size = context.compile(isa).map_err(|err| {
                pretty_error(&context.func, fisa.isa, err)
            })?;
@@ -169,13 +170,12 @@ fn handle_module(
                println!(
                    "Function #{} bytecode size: {} bytes",
                    func_index,
-                        dummy_environ.func_bytecode_sizes[func_index]
+                    dummy_environ.func_bytecode_sizes[def_index]
                );
            }
        } else {
            return Err(String::from("compilation requires a target isa"));
        }
-        }
        if flag_print {
            vprintln!(flag_verbose, "");
            if let Some(start_func) = dummy_environ.info.start_func {
@@ -193,10 +193,7 @@ fn handle_module(

    if !flag_check_translation && flag_print_size {
        println!("Total module code size: {} bytes", total_module_code_size);
-        let total_bytecode_size = dummy_environ.func_bytecode_sizes.iter().fold(
-            0,
-            |sum, x| sum + x,
-        );
+        let total_bytecode_size: usize = dummy_environ.func_bytecode_sizes.iter().sum();
        println!("Total module bytecode size: {} bytes", total_bytecode_size);
    }

--- a/cranelift/test-all.sh
+++ b/cranelift/test-all.sh
@@ -3,11 +3,10 @@ set -euo pipefail

 # This is the top-level test script:
 #
-# - Build documentation for Rust code in 'src/tools/target/doc'.
-# - Run unit tests for all Rust crates.
-# - Make a debug build of all crates.
-# - Make a release build of cton-util.
-# - Run file-level tests with the release build of cton-util.
+# - Make a debug build.
+# - Make a release build.
+# - Run unit tests for all Rust crates (including the filetests)
+# - Build API documentation.
 #
 # All tests run by this script should be passing at all times.

@@ -42,22 +41,26 @@ if [ -n "$needcheck" ]; then
    touch $tsfile || echo no target directory
 fi

-cd "$topdir"
-banner "Rust unit tests"
-cargo test --all
+# Make sure the code builds in debug mode.
+banner "Rust debug build"
+cargo build

-# Build cton-util for parser testing.
-cd "$topdir"
-banner "Rust documentation"
-echo "open $topdir/target/doc/cretonne/index.html"
+# Make sure the code builds in release mode, and run the unit tests. We run
+# these in release mode for speed, but note that the top-level Cargo.toml file
+# does enable debug assertions in release builds.
+banner "Rust release build and unit tests"
+cargo test --all --release
+
+# Make sure the documentation builds.
+banner "Rust documentation: $topdir/target/doc/cretonne/index.html"
 cargo doc
-banner "Rust release build"
-cargo build --release

-export CTONUTIL="$topdir/target/release/cton-util"
-
-cd "$topdir"
-banner "File tests"
-"$CTONUTIL" test filetests docs
+# Run clippy if we have it.
+banner "Rust linter"
+if $topdir/check-clippy.sh; then
+    $topdir/clippy-all.sh --write-mode=diff
+else
+    echo "\`cargo +nightly install clippy\` for optional rust linting"
+fi

 banner "OK"
--- a/cranelift/tests/filetests.rs
+++ b/cranelift/tests/filetests.rs
@@ -0,0 +1,7 @@
+extern crate cton_filetests;
+
+#[test]
+fn filetests() {
+    // Run all the filetests in the following directories.
+    cton_filetests::run(false, &["filetests".into(), "docs".into()]).expect("test harness");
+}
--- a/lib/cretonne/Cargo.toml
+++ b/lib/cretonne/Cargo.toml
@@ -1,13 +1,13 @@
 [package]
 authors = ["The Cretonne Project Developers"]
 name = "cretonne"
-version = "0.3.4"
+version = "0.4.1"
 description = "Low-level code generator library"
 license = "Apache-2.0"
 documentation = "https://cretonne.readthedocs.io/"
 repository = "https://github.com/Cretonne/cretonne"
 readme = "README.md"
-keywords = [ "compile", "compiler", "jit" ]
+keywords = ["compile", "compiler", "jit"]
 build = "build.rs"

 [lib]
@@ -32,3 +32,7 @@ optional = true
 default = ["std"]
 std = []
 core = ["hashmap_core"]
+
+[badges]
+maintenance = { status = "experimental" }
+travis-ci = { repository = "Cretonne/cretonne" }
--- a/lib/cretonne/README.md
+++ b/lib/cretonne/README.md
@@ -1,2 +1,2 @@
 This crate contains the core Cretonne code generator. It translates code from an
-intermediate language into executable machine code.
+intermediate representation into executable machine code.
--- a/lib/cretonne/build.rs
+++ b/lib/cretonne/build.rs
@@ -18,7 +18,6 @@
 // The build script expects to be run from the directory where this build.rs file lives. The
 // current directory is used to find the sources.

-
 use std::env;
 use std::process;

--- a/lib/cretonne/meta/base/formats.py
+++ b/lib/cretonne/meta/base/formats.py
@@ -2,7 +2,7 @@
 The cretonne.formats defines all instruction formats.

 Every instruction format has a corresponding `InstructionData` variant in the
-Rust representation of cretonne IL, so all instruction formats must be defined
+Rust representation of Cretonne IR, so all instruction formats must be defined
 in this module.
 """
 from __future__ import absolute_import
--- a/lib/cretonne/meta/base/instructions.py
+++ b/lib/cretonne/meta/base/instructions.py
@@ -588,6 +588,9 @@ stack_check = Instruction(
    Read the stack limit from ``GV`` and compare it to the stack pointer. If
    the stack pointer has reached or exceeded the limit, generate a trap with a
    ``stk_ovf`` code.
+
+    The global variable must be accessible and naturally aligned for a
+    pointer-sized value.
    """,
    ins=GV, can_trap=True)

--- a/lib/cretonne/meta/base/legalize.py
+++ b/lib/cretonne/meta/base/legalize.py
@@ -41,6 +41,8 @@ widen = XFormGroup('widen', """

        The transformations in the 'widen' group work by expressing
        instructions in terms of larger types.
+
+        This group is not yet implemented.
        """)

 expand = XFormGroup('expand', """
--- a/lib/cretonne/meta/base/settings.py
+++ b/lib/cretonne/meta/base/settings.py
@@ -20,10 +20,10 @@ opt_level = EnumSetting(

 enable_verifier = BoolSetting(
        """
-        Run the Cretonne IL verifier at strategic times during compilation.
+        Run the Cretonne IR verifier at strategic times during compilation.

        This makes compilation slower but catches many bugs. The verifier is
-        disabled by default, except when reading Cretonne IL from a text file.
+        disabled by default, except when reading Cretonne IR from a text file.
        """,
        default=True)

--- a/lib/cretonne/meta/build.py
+++ b/lib/cretonne/meta/build.py
@@ -14,19 +14,27 @@ import gen_legalizer
 import gen_registers
 import gen_binemit

-parser = argparse.ArgumentParser(description='Generate sources for Cretonne.')
-parser.add_argument('--out-dir', help='set output directory')

-args = parser.parse_args()
-out_dir = args.out_dir
+def main():
+    # type: () -> None
+    parser = argparse.ArgumentParser(
+            description='Generate sources for Cretonne.')
+    parser.add_argument('--out-dir', help='set output directory')

-isas = isa.all_isas()
+    args = parser.parse_args()
+    out_dir = args.out_dir

-gen_types.generate(out_dir)
-gen_instr.generate(isas, out_dir)
-gen_settings.generate(isas, out_dir)
-gen_encoding.generate(isas, out_dir)
-gen_legalizer.generate(isas, out_dir)
-gen_registers.generate(isas, out_dir)
-gen_binemit.generate(isas, out_dir)
-gen_build_deps.generate()
+    isas = isa.all_isas()
+
+    gen_types.generate(out_dir)
+    gen_instr.generate(isas, out_dir)
+    gen_settings.generate(isas, out_dir)
+    gen_encoding.generate(isas, out_dir)
+    gen_legalizer.generate(isas, out_dir)
+    gen_registers.generate(isas, out_dir)
+    gen_binemit.generate(isas, out_dir)
+    gen_build_deps.generate()
+
+
+if __name__ == "__main__":
+    main()
--- a/lib/cretonne/meta/cdsl/ast.py
+++ b/lib/cretonne/meta/cdsl/ast.py
@@ -559,7 +559,7 @@ class Enumerator(Literal):
    is an AST leaf node representing one of the values.

    :param kind: The enumerated `ImmediateKind` containing the value.
-    :param value: The textual IL representation of the value.
+    :param value: The textual IR representation of the value.

    `Enumerator` nodes are not usually created directly. They are created by
    using the dot syntax on immediate kinds: `intcc.ult`.
--- a/lib/cretonne/meta/cdsl/isa.py
+++ b/lib/cretonne/meta/cdsl/isa.py
@@ -12,7 +12,7 @@ from .instructions import InstructionGroup
 try:
    from typing import Tuple, Union, Any, Iterable, Sequence, List, Set, Dict, TYPE_CHECKING  # noqa
    if TYPE_CHECKING:
-        from .instructions import MaybeBoundInst, InstructionGroup, InstructionFormat  # noqa
+        from .instructions import MaybeBoundInst, InstructionFormat  # noqa
        from .predicates import PredNode, PredKey  # noqa
        from .settings import SettingGroup  # noqa
        from .registers import RegBank  # noqa
@@ -172,8 +172,7 @@ class TargetISA(object):
        """
        for cpumode in self.cpumodes:
            self.legalize_code(cpumode.default_legalize)
-            for x in sorted(cpumode.type_legalize.values(),
-                            key=lambda x: x.name):
+            for x in cpumode.type_legalize.values():
                self.legalize_code(x)

    def legalize_code(self, xgrp):
@@ -232,7 +231,7 @@ class CPUMode(object):
        # Tables for configuring legalization actions when no valid encoding
        # exists for an instruction.
        self.default_legalize = None  # type: XFormGroup
-        self.type_legalize = dict()  # type: Dict[ValueType, XFormGroup]
+        self.type_legalize = OrderedDict()  # type: OrderedDict[ValueType, XFormGroup]  # noqa

    def __str__(self):
        # type: () -> str
--- a/lib/cretonne/meta/check.sh
+++ b/lib/cretonne/meta/check.sh
@@ -2,7 +2,7 @@
 set -euo pipefail
 cd $(dirname "$0")

-runif() {
+function runif() {
    if command -v "$1" > /dev/null; then
        echo "   === $1 ==="
        "$@"
--- a/lib/cretonne/meta/gen_binemit.py
+++ b/lib/cretonne/meta/gen_binemit.py
@@ -152,7 +152,7 @@ def gen_isa(isa, fmt):
            fmt.line('let bits = encoding.bits();')
            with fmt.indented('match func.encodings[inst].recipe() {', '}'):
                for i, recipe in enumerate(isa.all_recipes):
-                    fmt.comment(recipe.name)
+                    fmt.comment('Recipe {}'.format(recipe.name))
                    with fmt.indented('{} => {{'.format(i), '}'):
                        gen_recipe(recipe, fmt)
                fmt.line('_ => {},')
--- a/lib/cretonne/meta/gen_encoding.py
+++ b/lib/cretonne/meta/gen_encoding.py
@@ -600,8 +600,8 @@ def make_tables(cpumode):
        table[ty][inst].encodings.append(enc)

    # Ensure there are level 1 table entries for all types with a custom
-    # legalize action. Try to be stable relative to dict ordering.
-    for ty in sorted(cpumode.type_legalize.keys(), key=str):
+    # legalize action.
+    for ty in cpumode.type_legalize.keys():
        table[ty]

    return table
@@ -756,7 +756,7 @@ def emit_recipe_constraints(isa, fmt):
            'static RECIPE_CONSTRAINTS: [RecipeConstraints; {}] = ['
            .format(len(isa.all_recipes)), '];'):
        for r in isa.all_recipes:
-            fmt.comment(r.name)
+            fmt.comment('Constraints for recipe {}:'.format(r.name))
            tied_i2o, tied_o2i = r.ties()
            fixed_ins, fixed_outs = r.fixed_ops()
            with fmt.indented('RecipeConstraints {', '},'):
@@ -830,7 +830,7 @@ def emit_recipe_sizing(isa, fmt):
            'static RECIPE_SIZING: [RecipeSizing; {}] = ['
            .format(len(isa.all_recipes)), '];'):
        for r in isa.all_recipes:
-            fmt.comment(r.name)
+            fmt.comment('Code size information for recipe {}:'.format(r.name))
            with fmt.indented('RecipeSizing {', '},'):
                fmt.format('bytes: {},', r.size)
                if r.branch_range:
--- a/lib/cretonne/meta/gen_instr.py
+++ b/lib/cretonne/meta/gen_instr.py
@@ -49,11 +49,11 @@ def gen_formats(fmt):
        with fmt.indented(
                "fn from(inst: &'a InstructionData) -> InstructionFormat {",
                '}'):
-            with fmt.indented('match *inst {', '}'):
+            m = srcgen.Match('*inst')
            for f in InstructionFormat.all_formats:
-                    fmt.line(('InstructionData::{} {{ .. }} => ' +
-                              'InstructionFormat::{},')
-                             .format(f.name, f.name))
+                m.arm('InstructionData::' + f.name, ['..'],
+                      'InstructionFormat::' + f.name)
+            fmt.match(m)
    fmt.line()


@@ -74,7 +74,7 @@ def gen_arguments_method(fmt, is_mut):
            'pool: &\'a {m}ir::ValueListPool) -> '
            '&{m}[Value] {{'
            .format(f=method, m=mut), '}'):
-        with fmt.indented('match *self {', '}'):
+        m = srcgen.Match('*self')
        for f in InstructionFormat.all_formats:
            n = 'InstructionData::' + f.name

@@ -82,25 +82,56 @@ def gen_arguments_method(fmt, is_mut):
            # list. We don't split them up, just return it all as variable
            # arguments. (I expect the distinction to go away).
            if f.has_value_list:
-                    arg = ''.format(mut)
-                    fmt.line(
-                        '{} {{ ref {}args, .. }} => args.{}(pool),'
-                        .format(n, mut, as_slice))
+                m.arm(n, ['ref {}args'.format(mut), '..'],
+                      'args.{}(pool)'.format(as_slice))
                continue

            # Fixed args.
+            fields = []
            if f.num_value_operands == 0:
                arg = '&{}[]'.format(mut)
-                    capture = ''
            elif f.num_value_operands == 1:
-                    capture = 'ref {}arg, '.format(mut)
+                fields.append('ref {}arg'.format(mut))
                arg = '{}(arg)'.format(rslice)
            else:
-                    capture = 'ref {}args, '.format(mut)
-                    arg = 'args'
+                args = 'args_arity{}'.format(f.num_value_operands)
+                fields.append('args: ref {}{}'.format(mut, args))
+                arg = args
+            fields.append('..')
+            m.arm(n, fields, arg)
+        fmt.match(m)
+
+
+def gen_instruction_data(fmt):
+    # type: (srcgen.Formatter) -> None
+    """
+    Generate the InstructionData enum.
+
+    Every variant must contain `opcode` and `ty` fields. An instruction that
+    doesn't produce a value should have its `ty` field set to `VOID`. The size
+    of `InstructionData` should be kept at 16 bytes on 64-bit architectures. If
+    more space is needed to represent an instruction, use a `Box<AuxData>` to
+    store the additional information out of line.
+    """
+
+    fmt.line('#[derive(Clone, Debug, Hash, PartialEq, Eq)]')
+    fmt.line('#[allow(missing_docs)]')
+    with fmt.indented('pub enum InstructionData {', '}'):
+        for f in InstructionFormat.all_formats:
+            with fmt.indented('{} {{'.format(f.name), '},'):
+                fmt.line('opcode: Opcode,')
+                if f.typevar_operand is None:
+                    pass
+                elif f.has_value_list:
+                    fmt.line('args: ValueList,')
+                elif f.num_value_operands == 1:
+                    fmt.line('arg: Value,')
+                else:
+                    fmt.line('args: [Value; {}],'.format(f.num_value_operands))
+                for field in f.imm_fields:
                    fmt.line(
-                        '{} {{ {}.. }} => {},'
-                        .format(n, capture, arg))
+                            '{}: {},'
+                            .format(field.member, field.kind.rust_type))


 def gen_instruction_data_impl(fmt):
@@ -123,39 +154,37 @@ def gen_instruction_data_impl(fmt):
    with fmt.indented('impl InstructionData {', '}'):
        fmt.doc_comment('Get the opcode of this instruction.')
        with fmt.indented('pub fn opcode(&self) -> Opcode {', '}'):
-            with fmt.indented('match *self {', '}'):
+            m = srcgen.Match('*self')
            for f in InstructionFormat.all_formats:
-                    fmt.line(
-                            'InstructionData::{} {{ opcode, .. }} => opcode,'
-                            .format(f.name))
+                m.arm('InstructionData::' + f.name, ['opcode', '..'],
+                      'opcode')
+            fmt.match(m)
        fmt.line()

        fmt.doc_comment('Get the controlling type variable operand.')
        with fmt.indented(
                'pub fn typevar_operand(&self, pool: &ir::ValueListPool) -> '
                'Option<Value> {', '}'):
-            with fmt.indented('match *self {', '}'):
+            m = srcgen.Match('*self')
            for f in InstructionFormat.all_formats:
                n = 'InstructionData::' + f.name
                if f.typevar_operand is None:
-                        fmt.line(n + ' { .. } => None,')
+                    m.arm(n, ['..'], 'None')
                elif f.has_value_list:
                    # We keep all arguments in a value list.
                    i = f.typevar_operand
-                        fmt.line(
-                                '{} {{ ref args, .. }} => '
-                                'args.get({}, pool),'.format(n, i))
+                    m.arm(n, ['ref args', '..'],
+                          'args.get({}, pool)'.format(i))
                elif f.num_value_operands == 1:
                    # We have a single value operand called 'arg'.
-                        fmt.line(n + ' { arg, .. } => Some(arg),')
+                    m.arm(n, ['arg', '..'], 'Some(arg)')
                else:
                    # We have multiple value operands and an array `args`.
                    # Which `args` index to use?
-                        i = f.typevar_operand
-                        fmt.line(
-                                n +
-                                ' {{ ref args, .. }} => Some(args[{}]),'
-                                .format(i))
+                    args = 'args_arity{}'.format(f.num_value_operands)
+                    m.arm(n, ['args: ref {}'.format(args), '..'],
+                          'Some({}[{}])'.format(args, f.typevar_operand))
+            fmt.match(m)
        fmt.line()

        fmt.doc_comment(
@@ -184,13 +213,13 @@ def gen_instruction_data_impl(fmt):
        with fmt.indented(
                'pub fn take_value_list(&mut self) -> Option<ir::ValueList> {',
                '}'):
-            with fmt.indented('match *self {', '}'):
+            m = srcgen.Match('*self')
            for f in InstructionFormat.all_formats:
                n = 'InstructionData::' + f.name
                if f.has_value_list:
-                        fmt.line(
-                            n + ' { ref mut args, .. } => Some(args.take()),')
-                fmt.line('_ => None,')
+                    m.arm(n, ['ref mut args', '..'], 'Some(args.take())')
+            m.arm('_', [], 'None')
+            fmt.match(m)
        fmt.line()

        fmt.doc_comment(
@@ -275,14 +304,12 @@ def gen_opcodes(groups, fmt):
            fmt.doc_comment(Instruction.ATTRIBS[attr])
            with fmt.indented('pub fn {}(self) -> bool {{'
                              .format(attr), '}'):
-                with fmt.indented('match self {', '}'):
+                m = srcgen.Match('self')
                for i in instrs:
                    if getattr(i, attr):
-                            fmt.format(
-                                    'Opcode::{} => true,',
-                                    i.camel_name, i.name)
-
-                    fmt.line('_ => false,')
+                        m.arm('Opcode::' + i.camel_name, [], 'true')
+                m.arm('_', [], 'false')
+                fmt.match(m)
            fmt.line()
    fmt.line()

@@ -299,9 +326,10 @@ def gen_opcodes(groups, fmt):

    # Generate a private opcode_name function.
    with fmt.indented('fn opcode_name(opc: Opcode) -> &\'static str {', '}'):
-        with fmt.indented('match opc {', '}'):
+        m = srcgen.Match('opc')
        for i in instrs:
-                fmt.format('Opcode::{} => "{}",', i.camel_name, i.name)
+            m.arm('Opcode::' + i.camel_name, [], '"{}"'.format(i.name))
+        fmt.match(m)
    fmt.line()

    # Generate an opcode hash table for looking up opcodes by name.
@@ -655,7 +683,7 @@ def gen_builder(insts, fmt):
    fmt.doc_comment("""
            Convenience methods for building instructions.

-            The `InstrBuilder` trait has one method per instruction opcode for
+            The `InstBuilder` trait has one method per instruction opcode for
            conveniently constructing the instruction with minimum arguments.
            Polymorphic instructions infer their result types from the input
            arguments when possible. In some cases, an explicit `ctrl_typevar`
@@ -682,13 +710,15 @@ def generate(isas, out_dir):
    # opcodes.rs
    fmt = srcgen.Formatter()
    gen_formats(fmt)
+    gen_instruction_data(fmt)
+    fmt.line()
    gen_instruction_data_impl(fmt)
    fmt.line()
    instrs = gen_opcodes(groups, fmt)
    gen_type_constraints(fmt, instrs)
    fmt.update_file('opcodes.rs', out_dir)

-    # builder.rs
+    # inst_builder.rs
    fmt = srcgen.Formatter()
    gen_builder(instrs, fmt)
-    fmt.update_file('builder.rs', out_dir)
+    fmt.update_file('inst_builder.rs', out_dir)
--- a/lib/cretonne/meta/gen_legalizer.py
+++ b/lib/cretonne/meta/gen_legalizer.py
@@ -103,18 +103,19 @@ def emit_runtime_typecheck(check, fmt, type_sets):

        base_exp = build_derived_expr(tv.base)
        if (tv.derived_func == TypeVar.LANEOF):
-            return "{}.map(|t: Type| -> t.lane_type())".format(base_exp)
+            return "{}.map(|t: ir::Type| t.lane_type())".format(base_exp)
        elif (tv.derived_func == TypeVar.ASBOOL):
-            return "{}.map(|t: Type| -> t.as_bool())".format(base_exp)
+            return "{}.map(|t: ir::Type| t.as_bool())".format(base_exp)
        elif (tv.derived_func == TypeVar.HALFWIDTH):
-            return "{}.and_then(|t: Type| -> t.half_width())".format(base_exp)
+            return "{}.and_then(|t: ir::Type| t.half_width())".format(base_exp)
        elif (tv.derived_func == TypeVar.DOUBLEWIDTH):
-            return "{}.and_then(|t: Type| -> t.double_width())"\
+            return "{}.and_then(|t: ir::Type| t.double_width())"\
                .format(base_exp)
        elif (tv.derived_func == TypeVar.HALFVECTOR):
-            return "{}.and_then(|t: Type| -> t.half_vector())".format(base_exp)
+            return "{}.and_then(|t: ir::Type| t.half_vector())"\
+                .format(base_exp)
        elif (tv.derived_func == TypeVar.DOUBLEVECTOR):
-            return "{}.and_then(|t: Type| -> t.by(2))".format(base_exp)
+            return "{}.and_then(|t: ir::Type| t.by(2))".format(base_exp)
        else:
            assert False, "Unknown derived function {}".format(tv.derived_func)

--- a/lib/cretonne/meta/gen_settings.py
+++ b/lib/cretonne/meta/gen_settings.py
@@ -28,7 +28,7 @@ def gen_enum_types(sgrp, fmt):
        if not isinstance(setting, EnumSetting):
            continue
        ty = camel_case(setting.name)
-        fmt.doc_comment('Values for {}.'.format(setting))
+        fmt.doc_comment('Values for `{}`.'.format(setting))
        fmt.line('#[derive(Debug, PartialEq, Eq)]')
        with fmt.indented('pub enum {} {{'.format(ty), '}'):
            for v in setting.values:
@@ -57,12 +57,11 @@ def gen_getter(setting, sgrp, fmt):
        ty = camel_case(setting.name)
        proto = 'pub fn {}(&self) -> {}'.format(setting.name, ty)
        with fmt.indented(proto + ' {', '}'):
-            with fmt.indented(
-                    'match self.bytes[{}] {{'
-                    .format(setting.byte_offset), '}'):
+            m = srcgen.Match('self.bytes[{}]'.format(setting.byte_offset))
            for i, v in enumerate(setting.values):
-                    fmt.line('{} => {}::{},'.format(i, ty, camel_case(v)))
-                fmt.line('_ => panic!("Invalid enum value"),')
+                m.arm(str(i), [], '{}::{}'.format(ty, camel_case(v)))
+            m.arm('_', [], 'panic!("Invalid enum value")')
+            fmt.match(m)
    else:
        raise AssertionError("Unknown setting kind")

--- a/lib/cretonne/meta/isa/intel/defs.py
+++ b/lib/cretonne/meta/isa/intel/defs.py
@@ -12,8 +12,8 @@ from base.immediates import floatcc
 ISA = TargetISA('intel', [base.instructions.GROUP, x86.GROUP])

 # CPU modes for 32-bit and 64-bit operation.
-I64 = CPUMode('I64', ISA)
-I32 = CPUMode('I32', ISA)
+X86_64 = CPUMode('I64', ISA)
+X86_32 = CPUMode('I32', ISA)

 # The set of floating point condition codes that are directly supported.
 # Other condition codes need to be reversed or expressed as two tests.
--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -5,7 +5,7 @@ from __future__ import absolute_import
 from cdsl.predicates import IsUnsignedInt, Not, And
 from base import instructions as base
 from base.formats import UnaryImm
-from .defs import I32, I64
+from .defs import X86_64, X86_32
 from . import recipes as r
 from . import settings as cfg
 from . import instructions as x86
@@ -22,16 +22,16 @@ except ImportError:
    pass


-I32.legalize_monomorphic(expand_flags)
-I32.legalize_type(
+X86_32.legalize_monomorphic(expand_flags)
+X86_32.legalize_type(
    default=narrow,
    b1=expand_flags,
    i32=intel_expand,
    f32=intel_expand,
    f64=intel_expand)

-I64.legalize_monomorphic(expand_flags)
-I64.legalize_type(
+X86_64.legalize_monomorphic(expand_flags)
+X86_64.legalize_type(
    default=narrow,
    b1=expand_flags,
    i32=intel_expand,
@@ -44,61 +44,61 @@ I64.legalize_type(
 # Helper functions for generating encodings.
 #

-def enc_i64(inst, recipe, *args, **kwargs):
+def enc_x86_64(inst, recipe, *args, **kwargs):
    # type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
    """
-    Add encodings for `inst` to I64 with and without a REX prefix.
+    Add encodings for `inst` to X86_64 with and without a REX prefix.
    """
-    I64.enc(inst, *recipe.rex(*args, **kwargs))
-    I64.enc(inst, *recipe(*args, **kwargs))
+    X86_64.enc(inst, *recipe.rex(*args, **kwargs))
+    X86_64.enc(inst, *recipe(*args, **kwargs))


 def enc_both(inst, recipe, *args, **kwargs):
    # type: (MaybeBoundInst, r.TailRecipe, *int, **Any) -> None
    """
-    Add encodings for `inst` to both I32 and I64.
+    Add encodings for `inst` to both X86_32 and X86_64.
    """
-    I32.enc(inst, *recipe(*args, **kwargs))
-    enc_i64(inst, recipe, *args, **kwargs)
+    X86_32.enc(inst, *recipe(*args, **kwargs))
+    enc_x86_64(inst, recipe, *args, **kwargs)


 def enc_i32_i64(inst, recipe, *args, **kwargs):
    # type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
    """
-    Add encodings for `inst.i32` to I32.
-    Add encodings for `inst.i32` to I64 with and without REX.
-    Add encodings for `inst.i64` to I64 with a REX.W prefix.
+    Add encodings for `inst.i32` to X86_32.
+    Add encodings for `inst.i32` to X86_64 with and without REX.
+    Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
    """
-    I32.enc(inst.i32, *recipe(*args, **kwargs))
+    X86_32.enc(inst.i32, *recipe(*args, **kwargs))

    # REX-less encoding must come after REX encoding so we don't use it by
    # default. Otherwise reg-alloc would never use r8 and up.
-    I64.enc(inst.i32, *recipe.rex(*args, **kwargs))
-    I64.enc(inst.i32, *recipe(*args, **kwargs))
+    X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs))
+    X86_64.enc(inst.i32, *recipe(*args, **kwargs))

-    I64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))
+    X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))


 def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
    # type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None
    """
-    Add encodings for `inst.i32` to I32.
-    Add encodings for `inst.i32` to I64 with and without REX.
-    Add encodings for `inst.i64` to I64 with a REX prefix, using the `w_bit`
+    Add encodings for `inst.i32` to X86_32.
+    Add encodings for `inst.i32` to X86_64 with and without REX.
+    Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
    argument to determine whether or not to set the REX.W bit.
    """
-    I32.enc(inst.i32.any, *recipe(*args, **kwargs))
+    X86_32.enc(inst.i32.any, *recipe(*args, **kwargs))

    # REX-less encoding must come after REX encoding so we don't use it by
    # default. Otherwise reg-alloc would never use r8 and up.
-    I64.enc(inst.i32.any, *recipe.rex(*args, **kwargs))
-    I64.enc(inst.i32.any, *recipe(*args, **kwargs))
+    X86_64.enc(inst.i32.any, *recipe.rex(*args, **kwargs))
+    X86_64.enc(inst.i32.any, *recipe(*args, **kwargs))

    if w_bit:
-        I64.enc(inst.i64.any, *recipe.rex(*args, w=1, **kwargs))
+        X86_64.enc(inst.i64.any, *recipe.rex(*args, w=1, **kwargs))
    else:
-        I64.enc(inst.i64.any, *recipe.rex(*args, **kwargs))
-        I64.enc(inst.i64.any, *recipe(*args, **kwargs))
+        X86_64.enc(inst.i64.any, *recipe.rex(*args, **kwargs))
+        X86_64.enc(inst.i64.any, *recipe(*args, **kwargs))


 for inst,           opc in [
@@ -141,19 +141,22 @@ for inst,               rrr in [
 # band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks.

 # Immediate constants.
-I32.enc(base.iconst.i32, *r.puid(0xb8))
+X86_32.enc(base.iconst.i32, *r.puid(0xb8))

-I64.enc(base.iconst.i32, *r.puid.rex(0xb8))
-I64.enc(base.iconst.i32, *r.puid(0xb8))
+X86_64.enc(base.iconst.i32, *r.puid.rex(0xb8))
+X86_64.enc(base.iconst.i32, *r.puid(0xb8))
 # The 32-bit immediate movl also zero-extends to 64 bits.
-I64.enc(base.iconst.i64, *r.puid.rex(0xb8),
+X86_64.enc(base.iconst.i64, *r.puid.rex(0xb8),
           instp=IsUnsignedInt(UnaryImm.imm, 32))
-I64.enc(base.iconst.i64, *r.puid(0xb8),
+X86_64.enc(base.iconst.i64, *r.puid(0xb8),
           instp=IsUnsignedInt(UnaryImm.imm, 32))
 # Sign-extended 32-bit immediate.
-I64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1))
+X86_64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1))
 # Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
-I64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1))
+X86_64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1))
+
+# bool constants.
+enc_both(base.bconst.b1, r.puid_bool, 0xb8)

 # Shifts and rotates.
 # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
@@ -164,38 +167,46 @@ for inst,           rrr in [
        (base.ishl, 4),
        (base.ushr, 5),
        (base.sshr, 7)]:
-    I32.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
-    I64.enc(inst.i64.any, *r.rc.rex(0xd3, rrr=rrr, w=1))
-    I64.enc(inst.i32.any, *r.rc.rex(0xd3, rrr=rrr))
-    I64.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
+    # Cannot use enc_i32_i64 for this pattern because instructions require
+    # .any suffix.
+    X86_32.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
+    X86_64.enc(inst.i64.any, *r.rc.rex(0xd3, rrr=rrr, w=1))
+    X86_64.enc(inst.i32.any, *r.rc.rex(0xd3, rrr=rrr))
+    X86_64.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
+
+for inst,           rrr in [
+        (base.ishl_imm, 4),
+        (base.ushr_imm, 5),
+        (base.sshr_imm, 7)]:
+    enc_i32_i64(inst, r.rib, 0xc1, rrr=rrr)

 # Population count.
-I32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
-I64.enc(base.popcnt.i64, *r.urm.rex(0xf3, 0x0f, 0xb8, w=1),
+X86_32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
+X86_64.enc(base.popcnt.i64, *r.urm.rex(0xf3, 0x0f, 0xb8, w=1),
           isap=cfg.use_popcnt)
-I64.enc(base.popcnt.i32, *r.urm.rex(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
-I64.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
+X86_64.enc(base.popcnt.i32, *r.urm.rex(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
+X86_64.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)

 # Count leading zero bits.
-I32.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
-I64.enc(base.clz.i64, *r.urm.rex(0xf3, 0x0f, 0xbd, w=1),
+X86_32.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
+X86_64.enc(base.clz.i64, *r.urm.rex(0xf3, 0x0f, 0xbd, w=1),
           isap=cfg.use_lzcnt)
-I64.enc(base.clz.i32, *r.urm.rex(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
-I64.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
+X86_64.enc(base.clz.i32, *r.urm.rex(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
+X86_64.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)

 # Count trailing zero bits.
-I32.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
-I64.enc(base.ctz.i64, *r.urm.rex(0xf3, 0x0f, 0xbc, w=1),
+X86_32.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
+X86_64.enc(base.ctz.i64, *r.urm.rex(0xf3, 0x0f, 0xbc, w=1),
           isap=cfg.use_bmi1)
-I64.enc(base.ctz.i32, *r.urm.rex(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
-I64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
+X86_64.enc(base.ctz.i32, *r.urm.rex(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
+X86_64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)

 #
 # Loads and stores.
 #
 for recipe in [r.st, r.stDisp8, r.stDisp32]:
    enc_i32_i64_ld_st(base.store, True, recipe, 0x89)
-    enc_i64(base.istore32.i64.any, recipe, 0x89)
+    enc_x86_64(base.istore32.i64.any, recipe, 0x89)
    enc_i32_i64_ld_st(base.istore16, False, recipe, 0x66, 0x89)

 # Byte stores are more complicated because the registers they can address
@@ -203,121 +214,121 @@ for recipe in [r.st, r.stDisp8, r.stDisp32]:
 # the corresponding st* recipes when a REX prefix is applied.
 for recipe in [r.st_abcd, r.stDisp8_abcd, r.stDisp32_abcd]:
    enc_both(base.istore8.i32.any, recipe, 0x88)
-    enc_i64(base.istore8.i64.any, recipe, 0x88)
+    enc_x86_64(base.istore8.i64.any, recipe, 0x88)

-enc_i32_i64(base.spill, r.spSib32, 0x89)
-enc_i32_i64(base.regspill, r.rsp32, 0x89)
+enc_i32_i64(base.spill, r.spillSib32, 0x89)
+enc_i32_i64(base.regspill, r.regspill32, 0x89)

 # Use a 32-bit write for spilling `b1` to avoid constraining the permitted
 # registers.
 # See MIN_SPILL_SLOT_SIZE which makes this safe.
-enc_both(base.spill.b1, r.spSib32, 0x89)
-enc_both(base.regspill.b1, r.rsp32, 0x89)
+enc_both(base.spill.b1, r.spillSib32, 0x89)
+enc_both(base.regspill.b1, r.regspill32, 0x89)

 for recipe in [r.ld, r.ldDisp8, r.ldDisp32]:
    enc_i32_i64_ld_st(base.load, True, recipe, 0x8b)
-    enc_i64(base.uload32.i64, recipe, 0x8b)
-    I64.enc(base.sload32.i64, *recipe.rex(0x63, w=1))
+    enc_x86_64(base.uload32.i64, recipe, 0x8b)
+    X86_64.enc(base.sload32.i64, *recipe.rex(0x63, w=1))
    enc_i32_i64_ld_st(base.uload16, True, recipe, 0x0f, 0xb7)
    enc_i32_i64_ld_st(base.sload16, True, recipe, 0x0f, 0xbf)
    enc_i32_i64_ld_st(base.uload8, True, recipe, 0x0f, 0xb6)
    enc_i32_i64_ld_st(base.sload8, True, recipe, 0x0f, 0xbe)

-enc_i32_i64(base.fill, r.fiSib32, 0x8b)
-enc_i32_i64(base.regfill, r.rfi32, 0x8b)
+enc_i32_i64(base.fill, r.fillSib32, 0x8b)
+enc_i32_i64(base.regfill, r.regfill32, 0x8b)

 # Load 32 bits from `b1` spill slots. See `spill.b1` above.
-enc_both(base.fill.b1, r.fiSib32, 0x8b)
-enc_both(base.regfill.b1, r.rfi32, 0x8b)
+enc_both(base.fill.b1, r.fillSib32, 0x8b)
+enc_both(base.regfill.b1, r.regfill32, 0x8b)

 # Push and Pop
-I32.enc(x86.push.i32, *r.pushq(0x50))
-enc_i64(x86.push.i64, r.pushq, 0x50)
+X86_32.enc(x86.push.i32, *r.pushq(0x50))
+enc_x86_64(x86.push.i64, r.pushq, 0x50)

-I32.enc(x86.pop.i32, *r.popq(0x58))
-enc_i64(x86.pop.i64, r.popq, 0x58)
+X86_32.enc(x86.pop.i32, *r.popq(0x58))
+enc_x86_64(x86.pop.i64, r.popq, 0x58)

 # Copy Special
-I64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
-I32.enc(base.copy_special, *r.copysp(0x89))
+X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
+X86_32.enc(base.copy_special, *r.copysp(0x89))

 # Adjust SP Imm
-I32.enc(base.adjust_sp_imm, *r.adjustsp8(0x83))
-I32.enc(base.adjust_sp_imm, *r.adjustsp32(0x81))
-I64.enc(base.adjust_sp_imm, *r.adjustsp8.rex(0x83, w=1))
-I64.enc(base.adjust_sp_imm, *r.adjustsp32.rex(0x81, w=1))
+X86_32.enc(base.adjust_sp_imm, *r.adjustsp8(0x83))
+X86_32.enc(base.adjust_sp_imm, *r.adjustsp32(0x81))
+X86_64.enc(base.adjust_sp_imm, *r.adjustsp8.rex(0x83, w=1))
+X86_64.enc(base.adjust_sp_imm, *r.adjustsp32.rex(0x81, w=1))

 #
 # Float loads and stores.
 #

-enc_both(base.load.f32.any, r.fld, 0x66, 0x0f, 0x6e)
-enc_both(base.load.f32.any, r.fldDisp8, 0x66, 0x0f, 0x6e)
-enc_both(base.load.f32.any, r.fldDisp32, 0x66, 0x0f, 0x6e)
+enc_both(base.load.f32.any, r.fld, 0xf3, 0x0f, 0x10)
+enc_both(base.load.f32.any, r.fldDisp8, 0xf3, 0x0f, 0x10)
+enc_both(base.load.f32.any, r.fldDisp32, 0xf3, 0x0f, 0x10)

-enc_both(base.load.f64.any, r.fld, 0xf3, 0x0f, 0x7e)
-enc_both(base.load.f64.any, r.fldDisp8, 0xf3, 0x0f, 0x7e)
-enc_both(base.load.f64.any, r.fldDisp32, 0xf3, 0x0f, 0x7e)
+enc_both(base.load.f64.any, r.fld, 0xf2, 0x0f, 0x10)
+enc_both(base.load.f64.any, r.fldDisp8, 0xf2, 0x0f, 0x10)
+enc_both(base.load.f64.any, r.fldDisp32, 0xf2, 0x0f, 0x10)

-enc_both(base.store.f32.any, r.fst, 0x66, 0x0f, 0x7e)
-enc_both(base.store.f32.any, r.fstDisp8, 0x66, 0x0f, 0x7e)
-enc_both(base.store.f32.any, r.fstDisp32, 0x66, 0x0f, 0x7e)
+enc_both(base.store.f32.any, r.fst, 0xf3, 0x0f, 0x11)
+enc_both(base.store.f32.any, r.fstDisp8, 0xf3, 0x0f, 0x11)
+enc_both(base.store.f32.any, r.fstDisp32, 0xf3, 0x0f, 0x11)

-enc_both(base.store.f64.any, r.fst, 0x66, 0x0f, 0xd6)
-enc_both(base.store.f64.any, r.fstDisp8, 0x66, 0x0f, 0xd6)
-enc_both(base.store.f64.any, r.fstDisp32, 0x66, 0x0f, 0xd6)
+enc_both(base.store.f64.any, r.fst, 0xf2, 0x0f, 0x11)
+enc_both(base.store.f64.any, r.fstDisp8, 0xf2, 0x0f, 0x11)
+enc_both(base.store.f64.any, r.fstDisp32, 0xf2, 0x0f, 0x11)

-enc_both(base.fill.f32, r.ffiSib32, 0x66, 0x0f, 0x6e)
-enc_both(base.regfill.f32, r.frfi32, 0x66, 0x0f, 0x6e)
-enc_both(base.fill.f64, r.ffiSib32, 0xf3, 0x0f, 0x7e)
-enc_both(base.regfill.f64, r.frfi32, 0xf3, 0x0f, 0x7e)
+enc_both(base.fill.f32, r.ffillSib32, 0xf3, 0x0f, 0x10)
+enc_both(base.regfill.f32, r.fregfill32, 0xf3, 0x0f, 0x10)
+enc_both(base.fill.f64, r.ffillSib32, 0xf2, 0x0f, 0x10)
+enc_both(base.regfill.f64, r.fregfill32, 0xf2, 0x0f, 0x10)

-enc_both(base.spill.f32, r.fspSib32, 0x66, 0x0f, 0x7e)
-enc_both(base.regspill.f32, r.frsp32, 0x66, 0x0f, 0x7e)
-enc_both(base.spill.f64, r.fspSib32, 0x66, 0x0f, 0xd6)
-enc_both(base.regspill.f64, r.frsp32, 0x66, 0x0f, 0xd6)
+enc_both(base.spill.f32, r.fspillSib32, 0xf3, 0x0f, 0x11)
+enc_both(base.regspill.f32, r.fregspill32, 0xf3, 0x0f, 0x11)
+enc_both(base.spill.f64, r.fspillSib32, 0xf2, 0x0f, 0x11)
+enc_both(base.regspill.f64, r.fregspill32, 0xf2, 0x0f, 0x11)

 #
 # Function addresses.
 #

-I32.enc(base.func_addr.i32, *r.fnaddr4(0xb8),
+X86_32.enc(base.func_addr.i32, *r.fnaddr4(0xb8),
           isap=Not(allones_funcaddrs))
-I64.enc(base.func_addr.i64, *r.fnaddr8.rex(0xb8, w=1),
+X86_64.enc(base.func_addr.i64, *r.fnaddr8.rex(0xb8, w=1),
           isap=And(Not(allones_funcaddrs), Not(is_pic)))

-I32.enc(base.func_addr.i32, *r.allones_fnaddr4(0xb8),
+X86_32.enc(base.func_addr.i32, *r.allones_fnaddr4(0xb8),
           isap=allones_funcaddrs)
-I64.enc(base.func_addr.i64, *r.allones_fnaddr8.rex(0xb8, w=1),
+X86_64.enc(base.func_addr.i64, *r.allones_fnaddr8.rex(0xb8, w=1),
           isap=And(allones_funcaddrs, Not(is_pic)))

-I64.enc(base.func_addr.i64, *r.got_fnaddr8.rex(0x8b, w=1),
+X86_64.enc(base.func_addr.i64, *r.got_fnaddr8.rex(0x8b, w=1),
           isap=is_pic)

 #
 # Global addresses.
 #

-I32.enc(base.globalsym_addr.i32, *r.gvaddr4(0xb8))
-I64.enc(base.globalsym_addr.i64, *r.gvaddr8.rex(0xb8, w=1),
+X86_32.enc(base.globalsym_addr.i32, *r.gvaddr4(0xb8))
+X86_64.enc(base.globalsym_addr.i64, *r.gvaddr8.rex(0xb8, w=1),
           isap=Not(is_pic))

-I64.enc(base.globalsym_addr.i64, *r.got_gvaddr8.rex(0x8b, w=1),
+X86_64.enc(base.globalsym_addr.i64, *r.got_gvaddr8.rex(0x8b, w=1),
           isap=is_pic)

 #
 # Call/return
 #
-I32.enc(base.call, *r.call_id(0xe8))
-I64.enc(base.call, *r.call_id(0xe8), isap=Not(is_pic))
-I64.enc(base.call, *r.call_plt_id(0xe8), isap=is_pic)
+X86_32.enc(base.call, *r.call_id(0xe8))
+X86_64.enc(base.call, *r.call_id(0xe8), isap=Not(is_pic))
+X86_64.enc(base.call, *r.call_plt_id(0xe8), isap=is_pic)

-I32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2))
-I64.enc(base.call_indirect.i64, *r.call_r.rex(0xff, rrr=2))
-I64.enc(base.call_indirect.i64, *r.call_r(0xff, rrr=2))
+X86_32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2))
+X86_64.enc(base.call_indirect.i64, *r.call_r.rex(0xff, rrr=2))
+X86_64.enc(base.call_indirect.i64, *r.call_r(0xff, rrr=2))

-I32.enc(base.x_return, *r.ret(0xc3))
-I64.enc(base.x_return, *r.ret(0xc3))
+X86_32.enc(base.x_return, *r.ret(0xc3))
+X86_64.enc(base.x_return, *r.ret(0xc3))

 #
 # Branches
@@ -341,10 +352,10 @@ enc_i32_i64(base.brnz, r.tjccd, 0x85)
 # Branch on a b1 value in a register only looks at the low 8 bits. See also
 # bint encodings below.
 #
-# Start with the worst-case encoding for I32 only. The register allocator can't
-# handle a branch with an ABCD-constrained operand.
-I32.enc(base.brz.b1, *r.t8jccd_long(0x84))
-I32.enc(base.brnz.b1, *r.t8jccd_long(0x85))
+# Start with the worst-case encoding for X86_32 only. The register allocator
+# can't handle a branch with an ABCD-constrained operand.
+X86_32.enc(base.brz.b1, *r.t8jccd_long(0x84))
+X86_32.enc(base.brnz.b1, *r.t8jccd_long(0x85))

 enc_both(base.brz.b1, r.t8jccb_abcd, 0x74)
 enc_both(base.brz.b1, r.t8jccd_abcd, 0x84)
@@ -354,26 +365,28 @@ enc_both(base.brnz.b1, r.t8jccd_abcd, 0x85)
 #
 # Trap as ud2
 #
-I32.enc(base.trap, *r.trap(0x0f, 0x0b))
-I64.enc(base.trap, *r.trap(0x0f, 0x0b))
+X86_32.enc(base.trap, *r.trap(0x0f, 0x0b))
+X86_64.enc(base.trap, *r.trap(0x0f, 0x0b))

 # Using a standard EncRecipe, not the TailRecipe.
-I32.enc(base.trapif, r.trapif, 0)
-I64.enc(base.trapif, r.trapif, 0)
-I32.enc(base.trapff, r.trapff, 0)
-I64.enc(base.trapff, r.trapff, 0)
+X86_32.enc(base.trapif, r.trapif, 0)
+X86_64.enc(base.trapif, r.trapif, 0)
+X86_32.enc(base.trapff, r.trapff, 0)
+X86_64.enc(base.trapff, r.trapff, 0)

 #
 # Comparisons
 #
 enc_i32_i64(base.icmp, r.icscc, 0x39)
+enc_i32_i64(base.icmp_imm, r.icsccib, 0x83, rrr=7)
+enc_i32_i64(base.icmp_imm, r.icsccid, 0x81, rrr=7)
 enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
 enc_i32_i64(base.ifcmp_imm, r.rcmpib, 0x83, rrr=7)
 enc_i32_i64(base.ifcmp_imm, r.rcmpid, 0x81, rrr=7)
 # TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).

-I32.enc(base.ifcmp_sp.i32, *r.rcmp_sp(0x39))
-I64.enc(base.ifcmp_sp.i64, *r.rcmp_sp.rex(0x39, w=1))
+X86_32.enc(base.ifcmp_sp.i32, *r.rcmp_sp(0x39))
+X86_64.enc(base.ifcmp_sp.i64, *r.rcmp_sp.rex(0x39, w=1))

 #
 # Convert flags to bool.
@@ -398,66 +411,68 @@ enc_i32_i64(x86.bsr, r.bsf_and_bsr, 0x0F, 0xBD)
 #
 # This assumes that b1 is represented as an 8-bit low register with the value 0
 # or 1.
-I32.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))
-I64.enc(base.bint.i64.b1, *r.urm.rex(0x0f, 0xb6))   # zext to i64 implicit.
-I64.enc(base.bint.i64.b1, *r.urm_abcd(0x0f, 0xb6))  # zext to i64 implicit.
-I64.enc(base.bint.i32.b1, *r.urm.rex(0x0f, 0xb6))
-I64.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))
+#
+# Encode movzbq as movzbl, because it's equivalent and shorter.
+X86_32.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
+X86_64.enc(base.bint.i64.b1, *r.urm_noflags.rex(0x0f, 0xb6))
+X86_64.enc(base.bint.i64.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
+X86_64.enc(base.bint.i32.b1, *r.urm_noflags.rex(0x0f, 0xb6))
+X86_64.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))

 # Numerical conversions.

 # Reducing an integer is a no-op.
-I32.enc(base.ireduce.i8.i32, r.null, 0)
-I32.enc(base.ireduce.i16.i32, r.null, 0)
-I64.enc(base.ireduce.i8.i32, r.null, 0)
-I64.enc(base.ireduce.i16.i32, r.null, 0)
-I64.enc(base.ireduce.i8.i64, r.null, 0)
-I64.enc(base.ireduce.i16.i64, r.null, 0)
-I64.enc(base.ireduce.i32.i64, r.null, 0)
+X86_32.enc(base.ireduce.i8.i32, r.null, 0)
+X86_32.enc(base.ireduce.i16.i32, r.null, 0)
+X86_64.enc(base.ireduce.i8.i32, r.null, 0)
+X86_64.enc(base.ireduce.i16.i32, r.null, 0)
+X86_64.enc(base.ireduce.i8.i64, r.null, 0)
+X86_64.enc(base.ireduce.i16.i64, r.null, 0)
+X86_64.enc(base.ireduce.i32.i64, r.null, 0)

 # TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
 # instructions for %al/%ax/%eax to %ax/%eax/%rax.

 # movsbl
-I32.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
-I64.enc(base.sextend.i32.i8, *r.urm.rex(0x0f, 0xbe))
-I64.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
+X86_32.enc(base.sextend.i32.i8, *r.urm_noflags(0x0f, 0xbe))
+X86_64.enc(base.sextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xbe))
+X86_64.enc(base.sextend.i32.i8, *r.urm_noflags(0x0f, 0xbe))

 # movswl
-I32.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
-I64.enc(base.sextend.i32.i16, *r.urm.rex(0x0f, 0xbf))
-I64.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
+X86_32.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
+X86_64.enc(base.sextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xbf))
+X86_64.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))

 # movsbq
-I64.enc(base.sextend.i64.i8, *r.urm.rex(0x0f, 0xbe, w=1))
+X86_64.enc(base.sextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xbe, w=1))

 # movswq
-I64.enc(base.sextend.i64.i16, *r.urm.rex(0x0f, 0xbf, w=1))
+X86_64.enc(base.sextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xbf, w=1))

 # movslq
-I64.enc(base.sextend.i64.i32, *r.urm.rex(0x63, w=1))
+X86_64.enc(base.sextend.i64.i32, *r.urm_noflags.rex(0x63, w=1))

 # movzbl
-I32.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
-I64.enc(base.uextend.i32.i8, *r.urm.rex(0x0f, 0xb6))
-I64.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
+X86_32.enc(base.uextend.i32.i8, *r.urm_noflags(0x0f, 0xb6))
+X86_64.enc(base.uextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xb6))
+X86_64.enc(base.uextend.i32.i8, *r.urm_noflags(0x0f, 0xb6))

 # movzwl
-I32.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
-I64.enc(base.uextend.i32.i16, *r.urm.rex(0x0f, 0xb7))
-I64.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
+X86_32.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
+X86_64.enc(base.uextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xb7))
+X86_64.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))

 # movzbq, encoded as movzbl because it's equivalent and shorter
-I64.enc(base.uextend.i64.i8, *r.urm.rex(0x0f, 0xb6))
-I64.enc(base.uextend.i64.i8, *r.urm(0x0f, 0xb6))
+X86_64.enc(base.uextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xb6))
+X86_64.enc(base.uextend.i64.i8, *r.urm_noflags(0x0f, 0xb6))

 # movzwq, encoded as movzwl because it's equivalent and shorter
-I64.enc(base.uextend.i64.i16, *r.urm.rex(0x0f, 0xb7))
-I64.enc(base.uextend.i64.i16, *r.urm(0x0f, 0xb7))
+X86_64.enc(base.uextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xb7))
+X86_64.enc(base.uextend.i64.i16, *r.urm_noflags(0x0f, 0xb7))

 # A 32-bit register copy clears the high 32 bits.
-I64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
-I64.enc(base.uextend.i64.i32, *r.umr(0x89))
+X86_64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
+X86_64.enc(base.uextend.i64.i32, *r.umr(0x89))


 #
@@ -469,8 +484,8 @@ enc_both(base.bitcast.f32.i32, r.frurm, 0x66, 0x0f, 0x6e)
 enc_both(base.bitcast.i32.f32, r.rfumr, 0x66, 0x0f, 0x7e)

 # movq
-I64.enc(base.bitcast.f64.i64, *r.frurm.rex(0x66, 0x0f, 0x6e, w=1))
-I64.enc(base.bitcast.i64.f64, *r.rfumr.rex(0x66, 0x0f, 0x7e, w=1))
+X86_64.enc(base.bitcast.f64.i64, *r.frurm.rex(0x66, 0x0f, 0x6e, w=1))
+X86_64.enc(base.bitcast.i64.f64, *r.rfumr.rex(0x66, 0x0f, 0x7e, w=1))

 # movaps
 enc_both(base.copy.f32, r.furm, 0x0f, 0x28)
@@ -492,11 +507,11 @@ enc_both(base.fdemote.f32.f64, r.furm, 0xf2, 0x0f, 0x5a)

 # cvttss2si
 enc_both(x86.cvtt2si.i32.f32, r.rfurm, 0xf3, 0x0f, 0x2c)
-I64.enc(x86.cvtt2si.i64.f32, *r.rfurm.rex(0xf3, 0x0f, 0x2c, w=1))
+X86_64.enc(x86.cvtt2si.i64.f32, *r.rfurm.rex(0xf3, 0x0f, 0x2c, w=1))

 # cvttsd2si
 enc_both(x86.cvtt2si.i32.f64, r.rfurm, 0xf2, 0x0f, 0x2c)
-I64.enc(x86.cvtt2si.i64.f64, *r.rfurm.rex(0xf2, 0x0f, 0x2c, w=1))
+X86_64.enc(x86.cvtt2si.i64.f64, *r.rfurm.rex(0xf2, 0x0f, 0x2c, w=1))

 # Exact square roots.
 enc_both(base.sqrt.f32, r.furm, 0xf3, 0x0f, 0x51)
--- a/lib/cretonne/meta/isa/intel/recipes.py
+++ b/lib/cretonne/meta/isa/intel/recipes.py
@@ -5,9 +5,11 @@ from __future__ import absolute_import
 from cdsl.isa import EncRecipe
 from cdsl.predicates import IsSignedInt, IsEqual, Or
 from cdsl.registers import RegClass
-from base.formats import Unary, UnaryImm, Binary, BinaryImm, MultiAry, NullAry
+from base.formats import Unary, UnaryImm, UnaryBool, Binary, BinaryImm
+from base.formats import MultiAry, NullAry
 from base.formats import Trap, Call, IndirectCall, Store, Load
-from base.formats import IntCompare, FloatCompare, IntCond, FloatCond
+from base.formats import IntCompare, IntCompareImm, FloatCompare
+from base.formats import IntCond, FloatCond
 from base.formats import IntSelect, IntCondTrap, FloatCondTrap
 from base.formats import Jump, Branch, BranchInt, BranchFloat
 from base.formats import Ternary, FuncAddr, UnaryGlobalVar
@@ -277,23 +279,27 @@ null = EncRecipe('null', Unary, size=0, ins=GPR, outs=0, emit='')
 # XX opcode, no ModR/M.
 trap = TailRecipe(
        'trap', Trap, size=0, ins=(), outs=(),
-        emit='PUT_OP(bits, BASE_REX, sink);')
+        emit='''
+        sink.trap(code, func.srclocs[inst]);
+        PUT_OP(bits, BASE_REX, sink);
+        ''')

 # Macro: conditional jump over a ud2.
 trapif = EncRecipe(
-        'trapif', IntCondTrap, size=4, ins=FLAG.eflags, outs=(),
+        'trapif', IntCondTrap, size=4, ins=FLAG.rflags, outs=(),
        clobbers_flags=False,
        emit='''
        // Jump over a 2-byte ud2.
        sink.put1(0x70 | (icc2opc(cond.inverse()) as u8));
        sink.put1(2);
        // ud2.
+        sink.trap(code, func.srclocs[inst]);
        sink.put1(0x0f);
        sink.put1(0x0b);
        ''')

 trapff = EncRecipe(
-        'trapff', FloatCondTrap, size=4, ins=FLAG.eflags, outs=(),
+        'trapff', FloatCondTrap, size=4, ins=FLAG.rflags, outs=(),
        clobbers_flags=False,
        instp=floatccs(FloatCondTrap),
        emit='''
@@ -301,6 +307,7 @@ trapff = EncRecipe(
        sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8));
        sink.put1(2);
        // ud2.
+        sink.trap(code, func.srclocs[inst]);
        sink.put1(0x0f);
        sink.put1(0x0b);
        ''')
@@ -358,7 +365,7 @@ rfumr = TailRecipe(
        ''')

 # XX /r, but for a unary operator with separate input/output register.
-# RM form.
+# RM form. Clobbers FLAGS.
 urm = TailRecipe(
        'urm', Unary, size=1, ins=GPR, outs=GPR,
        emit='''
@@ -366,10 +373,19 @@ urm = TailRecipe(
        modrm_rr(in_reg0, out_reg0, sink);
        ''')

-# XX /r. Same as urm, but input limited to ABCD.
-urm_abcd = TailRecipe(
-        'urm_abcd', Unary, size=1, ins=ABCD, outs=GPR,
-        when_prefixed=urm,
+# XX /r. Same as urm, but doesn't clobber FLAGS.
+urm_noflags = TailRecipe(
+        'urm_noflags', Unary, size=1, ins=GPR, outs=GPR,
+        clobbers_flags=False,
+        emit='''
+        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
+        modrm_rr(in_reg0, out_reg0, sink);
+        ''')
+
+# XX /r. Same as urm_noflags, but input limited to ABCD.
+urm_noflags_abcd = TailRecipe(
+        'urm_noflags_abcd', Unary, size=1, ins=ABCD, outs=GPR,
+        when_prefixed=urm_noflags,
        emit='''
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_rr(in_reg0, out_reg0, sink);
@@ -449,6 +465,7 @@ div = TailRecipe(
        'div', Ternary, size=1,
        ins=(GPR.rax, GPR.rdx, GPR), outs=(GPR.rax, GPR.rdx),
        emit='''
+        sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]);
        PUT_OP(bits, rex1(in_reg2), sink);
        modrm_r_bits(in_reg2, bits, sink);
        ''')
@@ -506,6 +523,17 @@ puid = TailRecipe(
        sink.put4(imm as u32);
        ''')

+# XX+rd id unary with bool immediate. Note no recipe predicate.
+puid_bool = TailRecipe(
+        'puid_bool', UnaryBool, size=4, ins=(), outs=GPR,
+        emit='''
+        // The destination register is encoded in the low bits of the opcode.
+        // No ModR/M.
+        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+        let imm: u32 = if imm.into() { 1 } else { 0 };
+        sink.put4(imm);
+        ''')
+
 # XX+rd iq unary with 64-bit immediate.
 puiq = TailRecipe(
        'puiq', UnaryImm, size=8, ins=(), outs=GPR,
@@ -666,6 +694,9 @@ st = TailRecipe(
        instp=IsEqual(Store.offset, 0),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_rm(in_reg1, in_reg0, sink);
        ''')
@@ -678,6 +709,9 @@ st_abcd = TailRecipe(
        when_prefixed=st,
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_rm(in_reg1, in_reg0, sink);
        ''')
@@ -688,6 +722,9 @@ fst = TailRecipe(
        instp=IsEqual(Store.offset, 0),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_rm(in_reg1, in_reg0, sink);
        ''')
@@ -698,6 +735,9 @@ stDisp8 = TailRecipe(
        instp=IsSignedInt(Store.offset, 8),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_disp8(in_reg1, in_reg0, sink);
        let offset: i32 = offset.into();
@@ -709,6 +749,9 @@ stDisp8_abcd = TailRecipe(
        when_prefixed=stDisp8,
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_disp8(in_reg1, in_reg0, sink);
        let offset: i32 = offset.into();
@@ -719,6 +762,9 @@ fstDisp8 = TailRecipe(
        instp=IsSignedInt(Store.offset, 8),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_disp8(in_reg1, in_reg0, sink);
        let offset: i32 = offset.into();
@@ -730,6 +776,9 @@ stDisp32 = TailRecipe(
        'stDisp32', Store, size=5, ins=(GPR, GPR_DEREF_SAFE), outs=(),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_disp32(in_reg1, in_reg0, sink);
        let offset: i32 = offset.into();
@@ -740,6 +789,9 @@ stDisp32_abcd = TailRecipe(
        when_prefixed=stDisp32,
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_disp32(in_reg1, in_reg0, sink);
        let offset: i32 = offset.into();
@@ -749,6 +801,9 @@ fstDisp32 = TailRecipe(
        'fstDisp32', Store, size=5, ins=(FPR, GPR_DEREF_SAFE), outs=(),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_disp32(in_reg1, in_reg0, sink);
        let offset: i32 = offset.into();
@@ -756,8 +811,8 @@ fstDisp32 = TailRecipe(
        ''')

 # Unary spill with SIB and 32-bit displacement.
-spSib32 = TailRecipe(
-        'spSib32', Unary, size=6, ins=GPR, outs=StackGPR32,
+spillSib32 = TailRecipe(
+        'spillSib32', Unary, size=6, ins=GPR, outs=StackGPR32,
        clobbers_flags=False,
        emit='''
        let base = stk_base(out_stk0.base);
@@ -766,8 +821,10 @@ spSib32 = TailRecipe(
        sib_noindex(base, sink);
        sink.put4(out_stk0.offset as u32);
        ''')
-fspSib32 = TailRecipe(
-        'fspSib32', Unary, size=6, ins=FPR, outs=StackFPR32,
+
+# Like spillSib32, but targeting an FPR rather than a GPR.
+fspillSib32 = TailRecipe(
+        'fspillSib32', Unary, size=6, ins=FPR, outs=StackFPR32,
        clobbers_flags=False,
        emit='''
        let base = stk_base(out_stk0.base);
@@ -778,8 +835,8 @@ fspSib32 = TailRecipe(
        ''')

 # Regspill using RSP-relative addressing.
-rsp32 = TailRecipe(
-        'rsp32', RegSpill, size=6, ins=GPR, outs=(),
+regspill32 = TailRecipe(
+        'regspill32', RegSpill, size=6, ins=GPR, outs=(),
        clobbers_flags=False,
        emit='''
        let dst = StackRef::sp(dst, &func.stack_slots);
@@ -789,8 +846,10 @@ rsp32 = TailRecipe(
        sib_noindex(base, sink);
        sink.put4(dst.offset as u32);
        ''')
-frsp32 = TailRecipe(
-        'frsp32', RegSpill, size=6, ins=FPR, outs=(),
+
+# Like regspill32, but targeting an FPR rather than a GPR.
+fregspill32 = TailRecipe(
+        'fregspill32', RegSpill, size=6, ins=FPR, outs=(),
        clobbers_flags=False,
        emit='''
        let dst = StackRef::sp(dst, &func.stack_slots);
@@ -811,6 +870,9 @@ ld = TailRecipe(
        instp=IsEqual(Load.offset, 0),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_rm(in_reg0, out_reg0, sink);
        ''')
@@ -821,6 +883,9 @@ fld = TailRecipe(
        instp=IsEqual(Load.offset, 0),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_rm(in_reg0, out_reg0, sink);
        ''')
@@ -831,6 +896,9 @@ ldDisp8 = TailRecipe(
        instp=IsSignedInt(Load.offset, 8),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_disp8(in_reg0, out_reg0, sink);
        let offset: i32 = offset.into();
@@ -843,6 +911,9 @@ fldDisp8 = TailRecipe(
        instp=IsSignedInt(Load.offset, 8),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_disp8(in_reg0, out_reg0, sink);
        let offset: i32 = offset.into();
@@ -855,6 +926,9 @@ ldDisp32 = TailRecipe(
        instp=IsSignedInt(Load.offset, 32),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_disp32(in_reg0, out_reg0, sink);
        let offset: i32 = offset.into();
@@ -867,6 +941,9 @@ fldDisp32 = TailRecipe(
        instp=IsSignedInt(Load.offset, 32),
        clobbers_flags=False,
        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
        modrm_disp32(in_reg0, out_reg0, sink);
        let offset: i32 = offset.into();
@@ -874,8 +951,8 @@ fldDisp32 = TailRecipe(
        ''')

 # Unary fill with SIB and 32-bit displacement.
-fiSib32 = TailRecipe(
-        'fiSib32', Unary, size=6, ins=StackGPR32, outs=GPR,
+fillSib32 = TailRecipe(
+        'fillSib32', Unary, size=6, ins=StackGPR32, outs=GPR,
        clobbers_flags=False,
        emit='''
        let base = stk_base(in_stk0.base);
@@ -884,8 +961,10 @@ fiSib32 = TailRecipe(
        sib_noindex(base, sink);
        sink.put4(in_stk0.offset as u32);
        ''')
-ffiSib32 = TailRecipe(
-        'ffiSib32', Unary, size=6, ins=StackFPR32, outs=FPR,
+
+# Like fillSib32, but targeting an FPR rather than a GPR.
+ffillSib32 = TailRecipe(
+        'ffillSib32', Unary, size=6, ins=StackFPR32, outs=FPR,
        clobbers_flags=False,
        emit='''
        let base = stk_base(in_stk0.base);
@@ -896,8 +975,8 @@ ffiSib32 = TailRecipe(
        ''')

 # Regfill with RSP-relative 32-bit displacement.
-rfi32 = TailRecipe(
-        'rfi32', RegFill, size=6, ins=StackGPR32, outs=(),
+regfill32 = TailRecipe(
+        'regfill32', RegFill, size=6, ins=StackGPR32, outs=(),
        clobbers_flags=False,
        emit='''
        let src = StackRef::sp(src, &func.stack_slots);
@@ -907,8 +986,10 @@ rfi32 = TailRecipe(
        sib_noindex(base, sink);
        sink.put4(src.offset as u32);
        ''')
-frfi32 = TailRecipe(
-        'frfi32', RegFill, size=6, ins=StackFPR32, outs=(),
+
+# Like regfill32, but targeting an FPR rather than a GPR.
+fregfill32 = TailRecipe(
+        'fregfill32', RegFill, size=6, ins=StackFPR32, outs=(),
        clobbers_flags=False,
        emit='''
        let src = StackRef::sp(src, &func.stack_slots);
@@ -977,7 +1058,7 @@ jmpd = TailRecipe(
        ''')

 brib = TailRecipe(
-        'brib', BranchInt, size=1, ins=FLAG.eflags, outs=(),
+        'brib', BranchInt, size=1, ins=FLAG.rflags, outs=(),
        branch_range=8,
        clobbers_flags=False,
        emit='''
@@ -986,7 +1067,7 @@ brib = TailRecipe(
        ''')

 brid = TailRecipe(
-        'brid', BranchInt, size=4, ins=FLAG.eflags, outs=(),
+        'brid', BranchInt, size=4, ins=FLAG.rflags, outs=(),
        branch_range=32,
        clobbers_flags=False,
        emit='''
@@ -995,7 +1076,7 @@ brid = TailRecipe(
        ''')

 brfb = TailRecipe(
-        'brfb', BranchFloat, size=1, ins=FLAG.eflags, outs=(),
+        'brfb', BranchFloat, size=1, ins=FLAG.rflags, outs=(),
        branch_range=8,
        clobbers_flags=False,
        instp=floatccs(BranchFloat),
@@ -1005,7 +1086,7 @@ brfb = TailRecipe(
        ''')

 brfd = TailRecipe(
-        'brfd', BranchFloat, size=4, ins=FLAG.eflags, outs=(),
+        'brfd', BranchFloat, size=4, ins=FLAG.rflags, outs=(),
        branch_range=32,
        clobbers_flags=False,
        instp=floatccs(BranchFloat),
@@ -1025,7 +1106,7 @@ brfd = TailRecipe(
 #

 seti = TailRecipe(
-        'seti', IntCond, size=1, ins=FLAG.eflags, outs=GPR,
+        'seti', IntCond, size=1, ins=FLAG.rflags, outs=GPR,
        requires_prefix=True,
        clobbers_flags=False,
        emit='''
@@ -1033,7 +1114,7 @@ seti = TailRecipe(
        modrm_r_bits(out_reg0, bits, sink);
        ''')
 seti_abcd = TailRecipe(
-        'seti_abcd', IntCond, size=1, ins=FLAG.eflags, outs=ABCD,
+        'seti_abcd', IntCond, size=1, ins=FLAG.rflags, outs=ABCD,
        when_prefixed=seti,
        clobbers_flags=False,
        emit='''
@@ -1042,7 +1123,7 @@ seti_abcd = TailRecipe(
        ''')

 setf = TailRecipe(
-        'setf', FloatCond, size=1, ins=FLAG.eflags, outs=GPR,
+        'setf', FloatCond, size=1, ins=FLAG.rflags, outs=GPR,
        requires_prefix=True,
        clobbers_flags=False,
        emit='''
@@ -1050,7 +1131,7 @@ setf = TailRecipe(
        modrm_r_bits(out_reg0, bits, sink);
        ''')
 setf_abcd = TailRecipe(
-        'setf_abcd', FloatCond, size=1, ins=FLAG.eflags, outs=ABCD,
+        'setf_abcd', FloatCond, size=1, ins=FLAG.rflags, outs=ABCD,
        when_prefixed=setf,
        clobbers_flags=False,
        emit='''
@@ -1064,7 +1145,7 @@ setf_abcd = TailRecipe(
 # 1 byte, modrm(r,r), is after the opcode
 #
 cmov = TailRecipe(
-        'cmov', IntSelect, size=1, ins=(FLAG.eflags, GPR, GPR), outs=2,
+        'cmov', IntSelect, size=1, ins=(FLAG.rflags, GPR, GPR), outs=2,
        requires_prefix=False,
        clobbers_flags=False,
        emit='''
@@ -1076,7 +1157,7 @@ cmov = TailRecipe(
 # Bit scan forwards and reverse
 #
 bsf_and_bsr = TailRecipe(
-        'bsf_and_bsr', Unary, size=1, ins=GPR, outs=(GPR, FLAG.eflags),
+        'bsf_and_bsr', Unary, size=1, ins=GPR, outs=(GPR, FLAG.rflags),
        requires_prefix=False,
        clobbers_flags=True,
        emit='''
@@ -1090,7 +1171,7 @@ bsf_and_bsr = TailRecipe(

 # XX /r, MR form. Compare two GPR registers and set flags.
 rcmp = TailRecipe(
-        'rcmp', Binary, size=1, ins=(GPR, GPR), outs=FLAG.eflags,
+        'rcmp', Binary, size=1, ins=(GPR, GPR), outs=FLAG.rflags,
        emit='''
        PUT_OP(bits, rex2(in_reg0, in_reg1), sink);
        modrm_rr(in_reg0, in_reg1, sink);
@@ -1098,7 +1179,7 @@ rcmp = TailRecipe(

 # XX /r, RM form. Compare two FPR registers and set flags.
 fcmp = TailRecipe(
-        'fcmp', Binary, size=1, ins=(FPR, FPR), outs=FLAG.eflags,
+        'fcmp', Binary, size=1, ins=(FPR, FPR), outs=FLAG.rflags,
        emit='''
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_rr(in_reg1, in_reg0, sink);
@@ -1106,7 +1187,7 @@ fcmp = TailRecipe(

 # XX /n, MI form with imm8.
 rcmpib = TailRecipe(
-        'rcmpib', BinaryImm, size=2, ins=GPR, outs=FLAG.eflags,
+        'rcmpib', BinaryImm, size=2, ins=GPR, outs=FLAG.rflags,
        instp=IsSignedInt(BinaryImm.imm, 8),
        emit='''
        PUT_OP(bits, rex1(in_reg0), sink);
@@ -1117,7 +1198,7 @@ rcmpib = TailRecipe(

 # XX /n, MI form with imm32.
 rcmpid = TailRecipe(
-        'rcmpid', BinaryImm, size=5, ins=GPR, outs=FLAG.eflags,
+        'rcmpid', BinaryImm, size=5, ins=GPR, outs=FLAG.rflags,
        instp=IsSignedInt(BinaryImm.imm, 32),
        emit='''
        PUT_OP(bits, rex1(in_reg0), sink);
@@ -1128,7 +1209,7 @@ rcmpid = TailRecipe(

 # Same as rcmp, but second operand is the stack pointer.
 rcmp_sp = TailRecipe(
-        'rcmp_sp', Unary, size=1, ins=GPR, outs=FLAG.eflags,
+        'rcmp_sp', Unary, size=1, ins=GPR, outs=FLAG.rflags,
        emit='''
        PUT_OP(bits, rex2(in_reg0, RU::rsp.into()), sink);
        modrm_rr(in_reg0, RU::rsp.into(), sink);
@@ -1289,12 +1370,67 @@ icscc = TailRecipe(
        modrm_rr(out_reg0, 0, sink);
        ''')

+icsccib = TailRecipe(
+        'icsccib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD,
+        instp=IsSignedInt(IntCompareImm.imm, 8),
+        emit='''
+        // Comparison instruction.
+        PUT_OP(bits, rex1(in_reg0), sink);
+        modrm_r_bits(in_reg0, bits, sink);
+        let imm: i64 = imm.into();
+        sink.put1(imm as u8);
+        // `setCC` instruction, no REX.
+        use ir::condcodes::IntCC::*;
+        let setcc = match cond {
+            Equal => 0x94,
+            NotEqual => 0x95,
+            SignedLessThan => 0x9c,
+            SignedGreaterThanOrEqual => 0x9d,
+            SignedGreaterThan => 0x9f,
+            SignedLessThanOrEqual => 0x9e,
+            UnsignedLessThan => 0x92,
+            UnsignedGreaterThanOrEqual => 0x93,
+            UnsignedGreaterThan => 0x97,
+            UnsignedLessThanOrEqual => 0x96,
+        };
+        sink.put1(0x0f);
+        sink.put1(setcc);
+        modrm_rr(out_reg0, 0, sink);
+        ''')
+
+icsccid = TailRecipe(
+        'icsccid', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD,
+        instp=IsSignedInt(IntCompareImm.imm, 32),
+        emit='''
+        // Comparison instruction.
+        PUT_OP(bits, rex1(in_reg0), sink);
+        modrm_r_bits(in_reg0, bits, sink);
+        let imm: i64 = imm.into();
+        sink.put4(imm as u32);
+        // `setCC` instruction, no REX.
+        use ir::condcodes::IntCC::*;
+        let setcc = match cond {
+            Equal => 0x94,
+            NotEqual => 0x95,
+            SignedLessThan => 0x9c,
+            SignedGreaterThanOrEqual => 0x9d,
+            SignedGreaterThan => 0x9f,
+            SignedLessThanOrEqual => 0x9e,
+            UnsignedLessThan => 0x92,
+            UnsignedGreaterThanOrEqual => 0x93,
+            UnsignedGreaterThan => 0x97,
+            UnsignedLessThanOrEqual => 0x96,
+        };
+        sink.put1(0x0f);
+        sink.put1(setcc);
+        modrm_rr(out_reg0, 0, sink);
+        ''')

 # Make a FloatCompare instruction predicate with the supported condition codes.

 # Same thing for floating point.
 #
-# The ucomiss/ucomisd instructions set the EFLAGS bits CF/PF/CF like this:
+# The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
 #
 #    ZPC OSA
 # UN 111 000
--- a/lib/cretonne/meta/isa/intel/registers.py
+++ b/lib/cretonne/meta/isa/intel/registers.py
@@ -43,7 +43,7 @@ FlagRegs = RegBank(
        'Flag registers',
        units=1,
        pressure_tracking=False,
-        names=['eflags'])
+        names=['rflags'])

 GPR = RegClass(IntRegs)
 # Certain types of deref encodings cannot be used with all registers.
--- a/lib/cretonne/meta/srcgen.py
+++ b/lib/cretonne/meta/srcgen.py
@@ -8,9 +8,10 @@ source code.
 from __future__ import absolute_import
 import sys
 import os
+from collections import OrderedDict

 try:
-    from typing import Any, List  # noqa
+    from typing import Any, List, Set, Tuple  # noqa
 except ImportError:
    pass

@@ -146,6 +147,52 @@ class Formatter(object):
        for l in parse_multiline(s):
            self.line('/// ' + l if l else '///')

+    def match(self, m):
+        # type: (Match) -> None
+        """
+        Add a match expression.
+
+        Example:
+
+            >>> f = Formatter()
+            >>> m = Match('x')
+            >>> m.arm('Orange', ['a', 'b'], 'some body')
+            >>> m.arm('Yellow', ['a', 'b'], 'some body')
+            >>> m.arm('Green', ['a', 'b'], 'different body')
+            >>> m.arm('Blue', ['x', 'y'], 'some body')
+            >>> f.match(m)
+            >>> f.writelines()
+            match x {
+                Orange { a, b } |
+                Yellow { a, b } => {
+                    some body
+                }
+                Green { a, b } => {
+                    different body
+                }
+                Blue { x, y } => {
+                    some body
+                }
+            }
+
+        """
+        with self.indented('match {} {{'.format(m.expr), '}'):
+            for (fields, body), names in m.arms.items():
+                with self.indented('', '}'):
+                    names_left = len(names)
+                    for name in names.keys():
+                        fields_str = ', '.join(fields)
+                        if len(fields) != 0:
+                            fields_str = '{{ {} }} '.format(fields_str)
+                        names_left -= 1
+                        if names_left > 0:
+                            suffix = '|'
+                        else:
+                            suffix = '=> {'
+                        self.outdented_line(name + ' ' + fields_str + suffix)
+                        if names_left == 0:
+                            self.multi_line(body)
+

 def _indent(s):
    # type: (str) -> int
@@ -195,3 +242,36 @@ def parse_multiline(s):
    while trimmed and not trimmed[0]:
        trimmed.pop(0)
    return trimmed
+
+
+class Match(object):
+    """
+    Match formatting class.
+
+    Match objects collect all the information needed to emit a Rust `match`
+    expression, automatically deduplicating overlapping identical arms.
+
+    Example:
+
+        >>> m = Match('x')
+        >>> m.arm('Orange', ['a', 'b'], 'some body')
+        >>> m.arm('Yellow', ['a', 'b'], 'some body')
+        >>> m.arm('Green', ['a', 'b'], 'different body')
+        >>> m.arm('Blue', ['x', 'y'], 'some body')
+        >>> assert(len(m.arms) == 3)
+
+    Note that this class is ignorant of Rust types, and considers two fields
+    with the same name to be equivalent.
+    """
+
+    def __init__(self, expr):
+        # type: (str) -> None
+        self.expr = expr
+        self.arms = OrderedDict()  # type: OrderedDict[Tuple[Tuple[str, ...], str], OrderedDict[str, None]]  # noqa
+
+    def arm(self, name, fields, body):
+        # type: (str, List[str], str) -> None
+        key = (tuple(fields), body)
+        if key not in self.arms:
+            self.arms[key] = OrderedDict()
+        self.arms[key][name] = None
--- a/lib/cretonne/meta/test_gen_legalizer.py
+++ b/lib/cretonne/meta/test_gen_legalizer.py
@@ -148,9 +148,9 @@ class TestRuntimeChecks(TestCase):
                self.v5 << vselect(self.v1, self.v3, self.v4),
        )
        x = XForm(r, r)
-        tv2_exp = 'Some({}).map(|t: Type| -> t.as_bool())'\
+        tv2_exp = 'Some({}).map(|t: ir::Type| t.as_bool())'\
            .format(self.v2.get_typevar().name)
-        tv3_exp = 'Some({}).map(|t: Type| -> t.as_bool())'\
+        tv3_exp = 'Some({}).map(|t: ir::Type| t.as_bool())'\
            .format(self.v3.get_typevar().name)

        self.check_yo_check(
--- a/lib/cretonne/src/abi.rs
+++ b/lib/cretonne/src/abi.rs
@@ -3,7 +3,7 @@
 //! This module provides functions and data structures that are useful for implementing the
 //! `TargetIsa::legalize_signature()` method.

-use ir::{ArgumentLoc, AbiParam, ArgumentExtension, Type};
+use ir::{AbiParam, ArgumentExtension, ArgumentLoc, Type};
 use std::cmp::Ordering;
 use std::vec::Vec;

@@ -186,8 +186,8 @@ pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use ir::types;
    use ir::AbiParam;
+    use ir::types;

    #[test]
    fn legalize() {
--- a/lib/cretonne/src/bforest/map.rs
+++ b/lib/cretonne/src/bforest/map.rs
@@ -1,8 +1,8 @@
 //! Forest of maps.

+use super::{Comparator, Forest, Node, NodeData, NodePool, Path, INNER_SIZE};
 use packed_option::PackedOption;
 use std::marker::PhantomData;
-use super::{INNER_SIZE, Comparator, Forest, NodePool, Node, NodeData, Path};

 /// Tag type defining forest types for a map.
 struct MapTypes<K, V, C>(PhantomData<(K, V, C)>);
@@ -424,10 +424,10 @@ where

 #[cfg(test)]
 mod test {
+    use super::super::NodeData;
+    use super::*;
    use std::mem;
    use std::vec::Vec;
-    use super::*;
-    use super::super::NodeData;

    #[test]
    fn node_size() {
--- a/lib/cretonne/src/bforest/mod.rs
+++ b/lib/cretonne/src/bforest/mod.rs
@@ -22,8 +22,8 @@ mod path;
 mod pool;
 mod set;

-pub use self::map::{MapForest, Map, MapCursor, MapIter};
-pub use self::set::{SetForest, Set, SetCursor, SetIter};
+pub use self::map::{Map, MapCursor, MapForest, MapIter};
+pub use self::set::{Set, SetCursor, SetForest, SetIter};

 use self::node::NodeData;
 use self::path::Path;
--- a/lib/cretonne/src/bforest/node.rs
+++ b/lib/cretonne/src/bforest/node.rs
@@ -1,8 +1,8 @@
 //! B+-tree nodes.

+use super::{slice_insert, slice_shift, Forest, Node, SetValue, INNER_SIZE};
 use std::borrow::{Borrow, BorrowMut};
 use std::fmt;
-use super::{Forest, Node, INNER_SIZE, SetValue, slice_insert, slice_shift};

 /// B+-tree node.
 ///
@@ -579,9 +579,9 @@ where

 #[cfg(test)]
 mod test {
+    use super::*;
    use std::mem;
    use std::string::ToString;
-    use super::*;

    // Forest impl for a set implementation.
    struct TF();
--- a/lib/cretonne/src/bforest/path.rs
+++ b/lib/cretonne/src/bforest/path.rs
@@ -1,9 +1,9 @@
 //! A path from the root of a B+-tree to a leaf node.

+use super::node::Removed;
+use super::{slice_insert, slice_shift, Comparator, Forest, Node, NodeData, NodePool, MAX_PATH};
 use std::borrow::Borrow;
 use std::marker::PhantomData;
-use super::{Forest, Node, NodeData, NodePool, MAX_PATH, Comparator, slice_insert, slice_shift};
-use super::node::Removed;

 #[cfg(test)]
 use std::fmt;
@@ -55,8 +55,8 @@ impl<F: Forest> Path<F> {
        for level in 0.. {
            self.size = level + 1;
            self.node[level] = node;
-            match &pool[node] {
-                &NodeData::Inner { size, keys, tree } => {
+            match pool[node] {
+                NodeData::Inner { size, keys, tree } => {
                    // Invariant: `tree[i]` contains keys smaller than
                    // `keys[i]`, greater or equal to `keys[i-1]`.
                    let i = match comp.search(key, &keys[0..size.into()]) {
@@ -68,7 +68,7 @@ impl<F: Forest> Path<F> {
                    self.entry[level] = i as u8;
                    node = tree[i];
                }
-                &NodeData::Leaf { size, keys, vals } => {
+                NodeData::Leaf { size, keys, vals } => {
                    // For a leaf we want either the found key or an insert position.
                    return match comp.search(key, &keys.borrow()[0..size.into()]) {
                        Ok(i) => {
@@ -81,7 +81,7 @@ impl<F: Forest> Path<F> {
                        }
                    };
                }
-                &NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
+                NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
            }
        }
        unreachable!();
@@ -94,10 +94,10 @@ impl<F: Forest> Path<F> {
            self.size = level + 1;
            self.node[level] = node;
            self.entry[level] = 0;
-            match &pool[node] {
-                &NodeData::Inner { tree, .. } => node = tree[0],
-                &NodeData::Leaf { keys, vals, .. } => return (keys.borrow()[0], vals.borrow()[0]),
-                &NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
+            match pool[node] {
+                NodeData::Inner { tree, .. } => node = tree[0],
+                NodeData::Leaf { keys, vals, .. } => return (keys.borrow()[0], vals.borrow()[0]),
+                NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
            }
        }
        unreachable!();
@@ -205,17 +205,17 @@ impl<F: Forest> Path<F> {
        let mut node = root;
        for l in level.. {
            self.node[l] = node;
-            match &pool[node] {
-                &NodeData::Inner { size, ref tree, .. } => {
+            match pool[node] {
+                NodeData::Inner { size, ref tree, .. } => {
                    self.entry[l] = size;
                    node = tree[usize::from(size)];
                }
-                &NodeData::Leaf { size, .. } => {
+                NodeData::Leaf { size, .. } => {
                    self.entry[l] = size - 1;
                    self.size = l + 1;
                    break;
                }
-                &NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
+                NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
            }
        }
        node
@@ -405,8 +405,8 @@ impl<F: Forest> Path<F> {
        let crit_key = pool[self.leaf_node()].leaf_crit_key();
        let crit_node = self.node[crit_level];

-        match &mut pool[crit_node] {
-            &mut NodeData::Inner { size, ref mut keys, .. } => {
+        match pool[crit_node] {
+            NodeData::Inner { size, ref mut keys, .. } => {
                debug_assert!(crit_kidx < size);
                keys[usize::from(crit_kidx)] = crit_key;
            }
@@ -414,7 +414,6 @@ impl<F: Forest> Path<F> {
        }
    }

-
    /// Given that the current leaf node is in an unhealthy (underflowed or even empty) status,
    /// balance it with sibling nodes.
    ///
@@ -437,7 +436,7 @@ impl<F: Forest> Path<F> {

        // Discard the root node if it has shrunk to a single sub-tree.
        let mut ns = 0;
-        while let &NodeData::Inner { size: 0, ref tree, .. } = &pool[self.node[ns]] {
+        while let NodeData::Inner { size: 0, ref tree, .. } = pool[self.node[ns]] {
            ns += 1;
            self.node[ns] = tree[0];
        }
@@ -529,14 +528,12 @@ impl<F: Forest> Path<F> {
            // current entry[level] was one off the end of the node, it will now point at a proper
            // entry.
            debug_assert!(usize::from(self.entry[level]) < pool[self.node[level]].entries());
-        } else {
+        } else if usize::from(self.entry[level]) >= pool[self.node[level]].entries() {
            // There's no right sibling at this level, so the node can't be rebalanced.
            // Check if we are in an off-the-end position.
-            if usize::from(self.entry[level]) >= pool[self.node[level]].entries() {
            self.size = 0;
        }
    }
-    }

    /// The current node at `level` has become empty.
    ///
@@ -581,8 +578,8 @@ impl<F: Forest> Path<F> {
    ///
    /// Returns `None` if the current node is a right-most node so no right sibling exists.
    fn right_sibling_branch_level(&self, level: usize, pool: &NodePool<F>) -> Option<usize> {
-        (0..level).rposition(|l| match &pool[self.node[l]] {
-            &NodeData::Inner { size, .. } => self.entry[l] < size,
+        (0..level).rposition(|l| match pool[self.node[l]] {
+            NodeData::Inner { size, .. } => self.entry[l] < size,
            _ => panic!("Expected inner node"),
        })
    }
@@ -622,8 +619,8 @@ impl<F: Forest> Path<F> {
        let bl = self.right_sibling_branch_level(level, pool).expect(
            "No right sibling exists",
        );
-        match &mut pool[self.node[bl]] {
-            &mut NodeData::Inner { ref mut keys, .. } => {
+        match pool[self.node[bl]] {
+            NodeData::Inner { ref mut keys, .. } => {
                keys[usize::from(self.entry[bl])] = crit_key;
            }
            _ => panic!("Expected inner node"),
@@ -647,8 +644,8 @@ impl<F: Forest> Path<F> {
    /// Check the internal consistency of this path.
    pub fn verify(&self, pool: &NodePool<F>) {
        for level in 0..self.size {
-            match &pool[self.node[level]] {
-                &NodeData::Inner { size, tree, .. } => {
+            match pool[self.node[level]] {
+                NodeData::Inner { size, tree, .. } => {
                    assert!(
                        level < self.size - 1,
                        "Expected leaf node at level {}",
@@ -668,7 +665,7 @@ impl<F: Forest> Path<F> {
                        level
                    );
                }
-                &NodeData::Leaf { size, .. } => {
+                NodeData::Leaf { size, .. } => {
                    assert_eq!(level, self.size - 1, "Expected inner node");
                    assert!(
                        self.entry[level] <= size,
@@ -677,7 +674,7 @@ impl<F: Forest> Path<F> {
                        size,
                    );
                }
-                &NodeData::Free { .. } => {
+                NodeData::Free { .. } => {
                    panic!("Free {} in path", self.node[level]);
                }
            }
@@ -702,9 +699,9 @@ impl<F: Forest> fmt::Display for Path<F> {

 #[cfg(test)]
 mod test {
-    use std::cmp::Ordering;
+    use super::super::{Forest, NodeData, NodePool};
    use super::*;
-    use super::super::{Forest, NodePool, NodeData};
+    use std::cmp::Ordering;

    struct TC();

--- a/lib/cretonne/src/bforest/pool.rs
+++ b/lib/cretonne/src/bforest/pool.rs
@@ -1,8 +1,8 @@
 //! B+-tree node pool.

+use super::{Forest, Node, NodeData};
 use entity::PrimaryMap;
 use std::ops::{Index, IndexMut};
-use super::{Forest, Node, NodeData};

 /// A pool of nodes, including a free list.
 pub(super) struct NodePool<F: Forest> {
@@ -57,6 +57,7 @@ impl<F: Forest> NodePool<F> {
    pub fn free_tree(&mut self, node: Node) {
        if let NodeData::Inner { size, tree, .. } = self[node] {
            // Note that we have to capture `tree` by value to avoid borrow checker trouble.
+            #[cfg_attr(feature = "cargo-clippy", allow(needless_range_loop))]
            for i in 0..usize::from(size + 1) {
                // Recursively free sub-trees. This recursion can never be deeper than `MAX_PATH`,
                // and since most trees have less than a handful of nodes, it is worthwhile to
@@ -76,11 +77,11 @@ impl<F: Forest> NodePool<F> {
        NodeData<F>: ::std::fmt::Display,
        F::Key: ::std::fmt::Display,
    {
+        use super::Comparator;
+        use entity::SparseSet;
        use std::borrow::Borrow;
        use std::cmp::Ordering;
        use std::vec::Vec;
-        use super::Comparator;
-        use entity::SparseSet;

        // The root node can't be an inner node with just a single sub-tree. It should have been
        // pruned.
@@ -105,8 +106,8 @@ impl<F: Forest> NodePool<F> {
            );
            let mut lower = lkey;

-            match &self[node] {
-                &NodeData::Inner { size, keys, tree } => {
+            match self[node] {
+                NodeData::Inner { size, keys, tree } => {
                    let size = size as usize;
                    let capacity = tree.len();
                    let keys = &keys[0..size];
@@ -148,7 +149,7 @@ impl<F: Forest> NodePool<F> {
                        lower = upper;
                    }
                }
-                &NodeData::Leaf { size, keys, .. } => {
+                NodeData::Leaf { size, keys, .. } => {
                    let size = size as usize;
                    let capacity = keys.borrow().len();
                    let keys = &keys.borrow()[0..size];
@@ -191,7 +192,7 @@ impl<F: Forest> NodePool<F> {
                        lower = upper;
                    }
                }
-                &NodeData::Free { .. } => panic!("Free {} reached", node),
+                NodeData::Free { .. } => panic!("Free {} reached", node),
            }
        }
    }
--- a/lib/cretonne/src/bforest/set.rs
+++ b/lib/cretonne/src/bforest/set.rs
@@ -1,8 +1,8 @@
 //! Forest of sets.

+use super::{Comparator, Forest, Node, NodeData, NodePool, Path, SetValue, INNER_SIZE};
 use packed_option::PackedOption;
 use std::marker::PhantomData;
-use super::{INNER_SIZE, Comparator, Forest, NodePool, Node, NodeData, Path, SetValue};

 /// Tag type defining forest types for a set.
 struct SetTypes<K, C>(PhantomData<(K, C)>);
@@ -351,10 +351,10 @@ where

 #[cfg(test)]
 mod test {
+    use super::super::NodeData;
+    use super::*;
    use std::mem;
    use std::vec::Vec;
-    use super::*;
-    use super::super::NodeData;

    #[test]
    fn node_size() {
--- a/lib/cretonne/src/binemit/memorysink.rs
+++ b/lib/cretonne/src/binemit/memorysink.rs
@@ -14,13 +14,13 @@
 //! relocations to a `RelocSink` trait object. Relocations are less frequent than the
 //! `CodeSink::put*` methods, so the performance impact of the virtual callbacks is less severe.

-use ir::{ExternalName, JumpTable};
-use super::{CodeSink, CodeOffset, Reloc, Addend};
+use super::{Addend, CodeOffset, CodeSink, Reloc};
+use ir::{ExternalName, JumpTable, SourceLoc, TrapCode};
 use std::ptr::write_unaligned;

 /// A `CodeSink` that writes binary machine code directly into memory.
 ///
-/// A `MemoryCodeSink` object should be used when emitting a Cretonne IL function into executable
+/// A `MemoryCodeSink` object should be used when emitting a Cretonne IR function into executable
 /// memory. It writes machine code directly to a raw pointer without any bounds checking, so make
 /// sure to allocate enough memory for the whole function. The number of bytes required is returned
 /// by the `Context::compile()` function.
@@ -33,15 +33,21 @@ pub struct MemoryCodeSink<'a> {
    data: *mut u8,
    offset: isize,
    relocs: &'a mut RelocSink,
+    traps: &'a mut TrapSink,
 }

 impl<'a> MemoryCodeSink<'a> {
    /// Create a new memory code sink that writes a function to the memory pointed to by `data`.
-    pub fn new(data: *mut u8, relocs: &mut RelocSink) -> MemoryCodeSink {
+    pub fn new<'sink>(
+        data: *mut u8,
+        relocs: &'sink mut RelocSink,
+        traps: &'sink mut TrapSink,
+    ) -> MemoryCodeSink<'sink> {
        MemoryCodeSink {
            data,
            offset: 0,
            relocs,
+            traps,
        }
    }
 }
@@ -58,6 +64,12 @@ pub trait RelocSink {
    fn reloc_jt(&mut self, CodeOffset, Reloc, JumpTable);
 }

+/// A trait for receiving trap codes and offsets.
+pub trait TrapSink {
+    /// Add trap information for a specific offset.
+    fn trap(&mut self, CodeOffset, SourceLoc, TrapCode);
+}
+
 impl<'a> CodeSink for MemoryCodeSink<'a> {
    fn offset(&self) -> CodeOffset {
        self.offset as CodeOffset
@@ -105,4 +117,9 @@ impl<'a> CodeSink for MemoryCodeSink<'a> {
        let ofs = self.offset();
        self.relocs.reloc_jt(ofs, rel, jt);
    }
+
+    fn trap(&mut self, code: TrapCode, srcloc: SourceLoc) {
+        let ofs = self.offset();
+        self.traps.trap(ofs, srcloc, code);
+    }
 }
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`doc-valid-idents = [ "WebAssembly", "NaN", "SetCC" ]`