Merge remote-tracking branch 'origin/master' into no_std

2018-03-30 15:14:30 -07:00
parent fc7b0a7e51 b523b69c16
commit 07693048f0
260 changed files with 4509 additions and 5074 deletions
--- a/cranelift/Cargo.toml
+++ b/cranelift/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "cretonne-tools"
 authors = ["The Cretonne Project Developers"]
-version = "0.3.4"
+version = "0.4.1"
 description = "Binaries for testing the Cretonne library"
 license = "Apache-2.0"
 documentation = "https://cretonne.readthedocs.io/"
@@ -13,18 +13,18 @@ name = "cton-util"
 path = "src/cton-util.rs"

 [dependencies]
-cretonne = { path = "lib/cretonne", version = "0.3.4" }
-cretonne-reader = { path = "lib/reader", version = "0.3.4" }
-cretonne-frontend = { path = "lib/frontend", version = "0.3.4" }
-cretonne-wasm = { path = "lib/wasm", version = "0.3.4" }
-cretonne-native = { path = "lib/native", version = "0.3.4" }
-filecheck = { path = "lib/filecheck" }
+cretonne = { path = "lib/cretonne", version = "0.4.1" }
+cretonne-reader = { path = "lib/reader", version = "0.4.1" }
+cretonne-frontend = { path = "lib/frontend", version = "0.4.1" }
+cretonne-wasm = { path = "lib/wasm", version = "0.4.1" }
+cretonne-native = { path = "lib/native", version = "0.4.1" }
+cretonne-filetests = { path = "lib/filetests", version = "0.4.1" }
+filecheck = "0.2.1"
 docopt = "0.8.0"
 serde = "1.0.8"
 serde_derive = "1.0.8"
-num_cpus = "1.5.1"
-tempdir="0.3.5"
-term = "0.5"
+tempdir = "0.3.5"
+term = "0.5.1"

 [workspace]

--- a/cranelift/clippy.toml
+++ b/cranelift/clippy.toml
@@ -0,0 +1 @@
+doc-valid-idents = [ "WebAssembly", "NaN", "SetCC" ]
--- a/cranelift/docs/callex.cton
+++ b/cranelift/docs/callex.cton
@@ -1,6 +1,6 @@
 test verifier

-function %gcd(i32 uext, i32 uext) -> i32 uext native {
+function %gcd(i32 uext, i32 uext) -> i32 uext system_v {
    fn1 = function %divmod(i32 uext, i32 uext) -> i32 uext, i32 uext

 ebb1(v1: i32, v2: i32):
--- a/cranelift/docs/compare-llvm.rst
+++ b/cranelift/docs/compare-llvm.rst
@@ -16,8 +16,8 @@ highlighting some of the differences and similarities. Both projects:
 - Use an ISA-agnostic input language in order to mostly abstract away the
  differences between target instruction set architectures.
 - Depend extensively on SSA form.
- Have both textual and in-memory forms of their primary intermediate language.
-  (LLVM also has a binary bitcode format; Cretonne doesn't.)
+- Have both textual and in-memory forms of their primary intermediate
+  representation. (LLVM also has a binary bitcode format; Cretonne doesn't.)
 - Can target multiple ISAs.
 - Can cross-compile by default without rebuilding the code generator.

@@ -41,8 +41,8 @@ LLVM uses multiple intermediate representations as it translates a program to
 binary machine code:

 `LLVM IR <https://llvm.org/docs/LangRef.html>`_
-    This is the primary intermediate language which has textual, binary, and
-    in-memory representations. It serves two main purposes:
+    This is the primary intermediate representation which has textual, binary, and
+    in-memory forms. It serves two main purposes:

    - An ISA-agnostic, stable(ish) input language that front ends can generate
      easily.
@@ -89,9 +89,9 @@ representation. Some target ISAs have a fast instruction selector that can
 translate simple code directly to MachineInstrs, bypassing SelectionDAG when
 possible.

-:doc:`Cretonne <langref>` uses a single intermediate language to cover these
-levels of abstraction. This is possible in part because of Cretonne's smaller
-scope.
+:doc:`Cretonne <langref>` uses a single intermediate representation to cover
+these levels of abstraction. This is possible in part because of Cretonne's
+smaller scope.

 - Cretonne does not provide assemblers and disassemblers, so it is not
  necessary to be able to represent every weird instruction in an ISA. Only
@@ -102,7 +102,7 @@ scope.
 - SSA form is preserved throughout. After register allocation, each SSA value
  is annotated with an assigned ISA register or stack slot.

-The Cretonne intermediate language is similar to LLVM IR, but at a slightly
+The Cretonne intermediate representation is similar to LLVM IR, but at a slightly
 lower level of abstraction.

 Program structure
@@ -112,12 +112,12 @@ In LLVM IR, the largest representable unit is the *module* which corresponds
 more or less to a C translation unit. It is a collection of functions and
 global variables that may contain references to external symbols too.

-In Cretonne IL, the largest representable unit is the *function*. This is so
+In Cretonne IR, the largest representable unit is the *function*. This is so
 that functions can easily be compiled in parallel without worrying about
 references to shared data structures. Cretonne does not have any
 inter-procedural optimizations like inlining.

-An LLVM IR function is a graph of *basic blocks*. A Cretonne IL function is a
+An LLVM IR function is a graph of *basic blocks*. A Cretonne IR function is a
 graph of *extended basic blocks* that may contain internal branch instructions.
 The main difference is that an LLVM conditional branch instruction has two
 target basic blocks---a true and a false edge. A Cretonne branch instruction
--- a/cranelift/docs/cton_domain.py
+++ b/cranelift/docs/cton_domain.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Sphinx domain for documenting compiler intermediate languages.
+# Sphinx domain for documenting compiler intermediate representations.
 #
 # This defines a 'cton' Sphinx domain with the following directives and roles:
 #
@@ -29,10 +29,10 @@ import sphinx.ext.autodoc

 class CtonObject(ObjectDescription):
    """
-    Any kind of Cretonne IL object.
+    Any kind of Cretonne IR object.

    This is a shared base class for the different kinds of indexable objects
-    in the Cretonne IL reference.
+    in the Cretonne IR reference.
    """
    option_spec = {
        'noindex': directives.flag,
@@ -98,7 +98,7 @@ def parse_type(name, signode):


 class CtonType(CtonObject):
-    """A Cretonne IL type description."""
+    """A Cretonne IR type description."""

    def handle_signature(self, sig, signode):
        """
@@ -112,7 +112,7 @@ class CtonType(CtonObject):
        return name

    def get_index_text(self, name):
-        return name + ' (IL type)'
+        return name + ' (IR type)'


 sep_equal = re.compile('\s*=\s*')
@@ -127,7 +127,7 @@ def parse_params(s, signode):


 class CtonInst(CtonObject):
-    """A Cretonne IL instruction."""
+    """A Cretonne IR instruction."""

    doc_field_types = [
        TypedField('argument', label=l_('Arguments'),
@@ -176,11 +176,11 @@ class CtonInst(CtonObject):


 class CtonInstGroup(CtonObject):
-    """A Cretonne IL instruction group."""
+    """A Cretonne IR instruction group."""


 class CretonneDomain(Domain):
-    """Cretonne domain for intermediate language objects."""
+    """Cretonne domain for IR objects."""
    name = 'cton'
    label = 'Cretonne'

--- a/cranelift/docs/example.cton
+++ b/cranelift/docs/example.cton
@@ -1,6 +1,6 @@
 test verifier

-function %average(i32, i32) -> f32 native {
+function %average(i32, i32) -> f32 system_v {
    ss1 = explicit_slot 8         ; Stack slot for ``sum``.

 ebb1(v1: i32, v2: i32):
--- a/cranelift/docs/langref.rst
+++ b/cranelift/docs/langref.rst
@@ -5,19 +5,19 @@ Cretonne Language Reference
 .. default-domain:: cton
 .. highlight:: cton

-The Cretonne intermediate language (:term:`IL`) has two equivalent
-representations: an *in-memory data structure* that the code generator library
-is using, and a *text format* which is used for test cases and debug output.
-Files containing Cretonne textual IL have the ``.cton`` filename extension.
+The Cretonne intermediate representation (:term:`IR`) has two primary forms:
+an *in-memory data structure* that the code generator library is using, and a
+*text format* which is used for test cases and debug output.
+Files containing Cretonne textual IR have the ``.cton`` filename extension.

-This reference uses the text format to describe IL semantics but glosses over
+This reference uses the text format to describe IR semantics but glosses over
 the finer details of the lexical and syntactic structure of the format.


 Overall structure
 =================

-Cretonne compiles functions independently. A ``.cton`` IL file may contain
+Cretonne compiles functions independently. A ``.cton`` IR file may contain
 multiple functions, and the programmatic API can create multiple function
 handles at the same time, but the functions don't share any data or reference
 each other directly.
@@ -27,7 +27,7 @@ This is a simple C function that computes the average of an array of floats:
 .. literalinclude:: example.c
    :language: c

-Here is the same function compiled into Cretonne IL:
+Here is the same function compiled into Cretonne IR:

 .. literalinclude:: example.cton
    :language: cton
@@ -77,7 +77,7 @@ variable value for the next iteration.

 The `cton_frontend` crate contains utilities for translating from programs
 containing multiple assignments to the same variables into SSA form for
-Cretonne :term:`IL`.
+Cretonne :term:`IR`.

 Such variables can also be presented to Cretonne as :term:`stack slot`\s.
 Stack slots are accessed with the :inst:`stack_store` and :inst:`stack_load`
@@ -303,7 +303,7 @@ indicate the different kinds of immediate operands on an instruction.
    A floating point condition code. See the :inst:`fcmp` instruction for details.

 The two IEEE floating point immediate types :type:`ieee32` and :type:`ieee64`
-are displayed as hexadecimal floating point literals in the textual :term:`IL`
+are displayed as hexadecimal floating point literals in the textual :term:`IR`
 format. Decimal floating point literals are not allowed because some computer
 systems can round differently when converting to binary. The hexadecimal
 floating point format is mostly the same as the one used by C99, but extended
@@ -400,11 +400,11 @@ convention:
    param        : type [paramext] [paramspecial]
    paramext     : "uext" | "sext"
    paramspecial : "sret" | "link" | "fp" | "csr" | "vmctx"
-    callconv     : "native" | "spiderwasm"
+    callconv     : "system_v" | "spiderwasm"

 Parameters and return values have flags whose meaning is mostly target
-dependent. They make it possible to call native functions on the target
-platform. When calling other Cretonne functions, the flags are not necessary.
+dependent. These flags support interfacing with code produced by other
+compilers.

 Functions that are called directly must be declared in the :term:`function
 preamble`:
@@ -563,7 +563,7 @@ runtime data structures.
    alignment for storing a pointer.

    Chains of ``deref`` global variables are possible, but cycles are not
-    allowed. They will be caught by the IL verifier.
+    allowed. They will be caught by the IR verifier.

    :arg BaseGV: Global variable containing the base pointer.
    :arg Offset: Byte offset from the loaded base pointer to the global
@@ -654,6 +654,11 @@ trap when accessed.
            address space reserved for the heap, not including the guard pages.
    :arg GuardBytes: Size of the guard pages in bytes.

+When the base is a global variable, it must be :term:`accessible` and naturally
+aligned for a pointer value.
+
+The ``reserved_reg`` option is not yet implemented.
+
 Dynamic heaps
 ~~~~~~~~~~~~~

@@ -672,6 +677,11 @@ is resized. The bound of a dynamic heap is stored in a global variable.
    :arg BoundGV: Global variable containing the current heap bound in bytes.
    :arg GuardBytes: Size of the guard pages in bytes.

+When the base is a global variable, it must be :term:`accessible` and naturally
+aligned for a pointer value.
+
+The ``reserved_reg`` option is not yet implemented.
+
 Heap examples
 ~~~~~~~~~~~~~

@@ -1144,19 +1154,11 @@ Glossary
        The extended basic blocks which contain all the executable code in a
        function. The function body follows the function preamble.

-    intermediate language
-    IL
-        The language used to describe functions to Cretonne. This reference
-        describes the syntax and semantics of the Cretonne IL. The IL has two
-        forms: Textual and an in-memory intermediate representation
-        (:term:`IR`).
-
    intermediate representation
    IR
-        The in-memory representation of :term:`IL`. The data structures
-        Cretonne uses to represent a program internally are called the
-        intermediate representation. Cretonne's IR can be converted to text
-        losslessly.
+        The language used to describe functions to Cretonne. This reference
+        describes the syntax and semantics of Cretonne IR. The IR has two
+        forms: Textual, and an in-memory data structure.

    stack slot
        A fixed size memory allocation in the current function's activation
--- a/cranelift/docs/testing.rst
+++ b/cranelift/docs/testing.rst
@@ -89,7 +89,7 @@ easier to provide substantial input functions for the compiler tests.

 File tests are :file:`*.cton` files in the :file:`filetests/` directory
 hierarchy. Each file has a header describing what to test followed by a number
-of input functions in the :doc:`Cretonne textual intermediate language
+of input functions in the :doc:`Cretonne textual intermediate representation
 <langref>`:

 .. productionlist::
@@ -136,13 +136,15 @@ This example will run the legalizer test twice. Both runs will have
 ``opt_level=best``, but they will have different ``is_64bit`` settings. The 32-bit
 run will also have the RISC-V specific flag ``supports_m`` disabled.

+The filetests are run automatically as part of `cargo test`, and they can
+also be run manually with the `cton-util test` command.
+
 Filecheck
 ---------

 Many of the test commands described below use *filecheck* to verify their
 output. Filecheck is a Rust implementation of the LLVM tool of the same name.
-See the :file:`lib/filecheck` `documentation <https://docs.rs/filecheck/>`_ for
-details of its syntax.
+See the `documentation <https://docs.rs/filecheck/>`_ for details of its syntax.

 Comments in :file:`.cton` files are associated with the entity they follow.
 This typically means an instruction or the whole function. Those tests that
@@ -164,7 +166,7 @@ Cretonne's tests don't need this.
 ----------

 This is one of the simplest file tests, used for testing the conversion to and
-from textual IL. The ``test cat`` command simply parses each function and
+from textual IR. The ``test cat`` command simply parses each function and
 converts it back to text again. The text of each function is then matched
 against the associated filecheck directives.

@@ -186,7 +188,7 @@ Example::
 `test verifier`
 ---------------

-Run each function through the IL verifier and check that it produces the
+Run each function through the IR verifier and check that it produces the
 expected error messages.

 Expected error messages are indicated with an ``error:`` directive *on the
@@ -324,6 +326,38 @@ Test the simple GVN pass.
 The simple GVN pass is run on each function, and then results are run
 through filecheck.

+`test licm`
+-----------------
+
+Test the LICM pass.
+
+The LICM pass is run on each function, and then results are run
+through filecheck.
+
+`test dce`
+-----------------
+
+Test the DCE pass.
+
+The DCE pass is run on each function, and then results are run
+through filecheck.
+
+`test preopt`
+-----------------
+
+Test the preopt pass.
+
+The preopt pass is run on each function, and then results are run
+through filecheck.
+
+`test postopt`
+-----------------
+
+Test the postopt pass.
+
+The postopt pass is run on each function, and then results are run
+through filecheck.
+
 `test compile`
 --------------

@@ -333,4 +367,4 @@ Each function is passed through the full ``Context::compile()`` function
 which is normally used to compile code. This type of test often depends
 on assertions or verifier errors, but it is also possible to use
 filecheck directives which will be matched against the final form of the
-Cretonne IL right before binary machine code emission.
+Cretonne IR right before binary machine code emission.
--- a/cranelift/filetests/dce/basic.cton
+++ b/cranelift/filetests/dce/basic.cton
@@ -0,0 +1,46 @@
+test dce
+
+function %simple() -> i32 {
+ebb0:
+    v2 = iconst.i32 2
+    v3 = iconst.i32 3
+    return v3
+}
+; sameln: function %simple
+; nextln: ebb0:
+; nextln:     v3 = iconst.i32 3
+; nextln:     return v3
+; nextln: }
+
+function %some_branching(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+    v3 = iconst.i32 70
+    v4 = iconst.i32 71
+    v5 = iconst.i32 72
+    v8 = iconst.i32 73
+    brz v0, ebb1
+    jump ebb2(v8)
+
+ebb1:
+    v2 = iadd v0, v3
+    return v0
+
+ebb2(v9: i32):
+    v6 = iadd v1, v4
+    v7 = iadd v6, v9
+    return v7
+}
+; sameln: function %some_branching
+; nextln: ebb0(v0: i32, v1: i32):
+; nextln:     v4 = iconst.i32 71
+; nextln:     v8 = iconst.i32 73
+; nextln:     brz v0, ebb1
+; nextln:     jump ebb2(v8)
+; nextln: 
+; nextln: ebb1:
+; nextln:     return v0
+; nextln: 
+; nextln: ebb2(v9: i32):
+; nextln:     v6 = iadd.i32 v1, v4
+; nextln:     v7 = iadd v6, v9
+; nextln:     return v7
--- a/cranelift/filetests/domtree/loops.cton
+++ b/cranelift/filetests/domtree/loops.cton
@@ -59,7 +59,7 @@ function %test(i32) {
 ; nextln: ebb5:
 ; nextln: }

-function %loop2(i32) native {
+function %loop2(i32) system_v {
    ebb0(v0: i32):
        brz v0, ebb1    ; dominates: ebb1 ebb3 ebb4 ebb5
        jump ebb2       ; dominates: ebb2
--- a/cranelift/filetests/domtree/loops2.cton
+++ b/cranelift/filetests/domtree/loops2.cton
@@ -43,7 +43,7 @@ function %loop1(i32) {
 ; nextln: ebb9:
 ; nextln: }

-function %loop2(i32) native {
+function %loop2(i32) system_v {
    ebb0(v0: i32):
        brz v0, ebb1    ; dominates: ebb1 ebb3 ebb4 ebb5
        jump ebb2       ; dominates: ebb2
--- a/cranelift/filetests/isa/intel/abi-bool.cton
+++ b/cranelift/filetests/isa/intel/abi-bool.cton
@@ -2,7 +2,7 @@ test compile
 set is_64bit=1
 isa intel haswell

-function %foo(i64, i64, i64, i32) -> b1 native {
+function %foo(i64, i64, i64, i32) -> b1 system_v {
 ebb3(v0: i64, v1: i64, v2: i64, v3: i32):
    v5 = icmp ne v2, v2
    v8 = iconst.i64 0
--- a/cranelift/filetests/isa/intel/abi32.cton
+++ b/cranelift/filetests/isa/intel/abi32.cton
@@ -5,14 +5,14 @@ isa intel
 ; regex: V=v\d+

 function %f() {
-    sig0 = (i32) -> i32 native
-    ; check: sig0 = (i32 [0]) -> i32 [%rax] native
+    sig0 = (i32) -> i32 system_v
+    ; check: sig0 = (i32 [0]) -> i32 [%rax] system_v

-    sig1 = (i64) -> b1 native
-    ; check: sig1 = (i32 [0], i32 [4]) -> b1 [%rax] native
+    sig1 = (i64) -> b1 system_v
+    ; check: sig1 = (i32 [0], i32 [4]) -> b1 [%rax] system_v

-    sig2 = (f32, i64) -> f64 native
-    ; check: sig2 = (f32 [0], i32 [4], i32 [8]) -> f64 [%xmm0] native
+    sig2 = (f32, i64) -> f64 system_v
+    ; check: sig2 = (f32 [0], i32 [4], i32 [8]) -> f64 [%xmm0] system_v

 ebb0:
    return
--- a/cranelift/filetests/isa/intel/abi64.cton
+++ b/cranelift/filetests/isa/intel/abi64.cton
@@ -6,14 +6,14 @@ isa intel
 ; regex: V=v\d+

 function %f() {
-    sig0 = (i32) -> i32 native
-    ; check: sig0 = (i32 [%rdi]) -> i32 [%rax] native
+    sig0 = (i32) -> i32 system_v
+    ; check: sig0 = (i32 [%rdi]) -> i32 [%rax] system_v

-    sig1 = (i64) -> b1 native
-    ; check: sig1 = (i64 [%rdi]) -> b1 [%rax] native
+    sig1 = (i64) -> b1 system_v
+    ; check: sig1 = (i64 [%rdi]) -> b1 [%rax] system_v

-    sig2 = (f32, i64) -> f64 native
-    ; check: sig2 = (f32 [%xmm0], i64 [%rdi]) -> f64 [%xmm0] native
+    sig2 = (f32, i64) -> f64 system_v
+    ; check: sig2 = (f32 [%xmm0], i64 [%rdi]) -> f64 [%xmm0] system_v

 ebb0:
    return
--- a/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount.cton
+++ b/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount.cton
@@ -47,28 +47,23 @@ ebb1(v20: i32):
 function %i64_popcount(i64) -> i64 {
 ebb0(v30: i64):
  v31 = popcnt v30;
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: iconst.i64
  ; check: band
  ; check: isub
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: band
  ; check: isub
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: band
  ; check: isub
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: iadd
  ; check: iconst.i64
  ; check: band
  ; check: iconst.i64
  ; check: imul
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  return v31;
 }

@@ -78,27 +73,22 @@ ebb0(v30: i64):
 function %i32_popcount(i32) -> i32 {
 ebb0(v40: i32):
  v41 = popcnt v40;
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: iconst.i32
  ; check: band
  ; check: isub
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: band
  ; check: isub
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: band
  ; check: isub
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  ; check: iadd
  ; check: iconst.i32
  ; check: band
  ; check: iconst.i32
  ; check: imul
-  ; check: iconst.i32
-  ; check: ushr
+  ; check: ushr_imm
  return v41;
 }
--- a/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount_encoding.cton
+++ b/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount_encoding.cton
@@ -15,56 +15,56 @@ ebb0:

    [-,%r11]                 v10 = iconst.i64 0x1234
    ; asm: bsfq %r11, %rcx
-    [-,%rcx,%eflags]         v11, v12 = x86_bsf v10    ; bin: 49 0f bc cb
+    [-,%rcx,%rflags]         v11, v12 = x86_bsf v10    ; bin: 49 0f bc cb

    [-,%rdx]                 v14 = iconst.i64 0x5678
    ; asm: bsfq %rdx, %r12
-    [-,%r12,%eflags]         v15, v16 = x86_bsf v14    ; bin: 4c 0f bc e2
+    [-,%r12,%rflags]         v15, v16 = x86_bsf v14    ; bin: 4c 0f bc e2

    ; asm: bsfq %rdx, %rdi
-    [-,%rdi,%eflags]         v17, v18 = x86_bsf v14    ; bin: 48 0f bc fa
+    [-,%rdi,%rflags]         v17, v18 = x86_bsf v14    ; bin: 48 0f bc fa


    ; 32-bit wide bsf

    [-,%r11]                 v20 = iconst.i32 0x1234
    ; asm: bsfl %r11d, %ecx
-    [-,%rcx,%eflags]         v21, v22 = x86_bsf v20    ; bin: 41 0f bc cb
+    [-,%rcx,%rflags]         v21, v22 = x86_bsf v20    ; bin: 41 0f bc cb

    [-,%rdx]                 v24 = iconst.i32 0x5678
    ; asm: bsfl %edx, %r12d
-    [-,%r12,%eflags]         v25, v26 = x86_bsf v24    ; bin: 44 0f bc e2
+    [-,%r12,%rflags]         v25, v26 = x86_bsf v24    ; bin: 44 0f bc e2

    ; asm: bsfl %edx, %esi
-    [-,%rsi,%eflags]         v27, v28 = x86_bsf v24    ; bin: 0f bc f2
+    [-,%rsi,%rflags]         v27, v28 = x86_bsf v24    ; bin: 0f bc f2


    ; 64-bit wide bsr

    [-,%r11]                 v30 = iconst.i64 0x1234
    ; asm: bsrq %r11, %rcx
-    [-,%rcx,%eflags]         v31, v32 = x86_bsr v30    ; bin: 49 0f bd cb
+    [-,%rcx,%rflags]         v31, v32 = x86_bsr v30    ; bin: 49 0f bd cb

    [-,%rdx]                 v34 = iconst.i64 0x5678
    ; asm: bsrq %rdx, %r12
-    [-,%r12,%eflags]         v35, v36 = x86_bsr v34    ; bin: 4c 0f bd e2
+    [-,%r12,%rflags]         v35, v36 = x86_bsr v34    ; bin: 4c 0f bd e2

    ; asm: bsrq %rdx, %rdi
-    [-,%rdi,%eflags]         v37, v38 = x86_bsr v34    ; bin: 48 0f bd fa
+    [-,%rdi,%rflags]         v37, v38 = x86_bsr v34    ; bin: 48 0f bd fa


    ; 32-bit wide bsr

    [-,%r11]                 v40 = iconst.i32 0x1234
    ; asm: bsrl %r11d, %ecx
-    [-,%rcx,%eflags]         v41, v42 = x86_bsr v40    ; bin: 41 0f bd cb
+    [-,%rcx,%rflags]         v41, v42 = x86_bsr v40    ; bin: 41 0f bd cb

    [-,%rdx]                 v44 = iconst.i32 0x5678
    ; asm: bsrl %edx, %r12d
-    [-,%r12,%eflags]         v45, v46 = x86_bsr v44    ; bin: 44 0f bd e2
+    [-,%r12,%rflags]         v45, v46 = x86_bsr v44    ; bin: 44 0f bd e2

    ; asm: bsrl %edx, %esi
-    [-,%rsi,%eflags]         v47, v48 = x86_bsr v44    ; bin: 0f bd f2
+    [-,%rsi,%rflags]         v47, v48 = x86_bsr v44    ; bin: 0f bd f2


    ; 64-bit wide cmov
--- a/cranelift/filetests/isa/intel/binary32-float.cton
+++ b/cranelift/filetests/isa/intel/binary32-float.cton
@@ -147,48 +147,48 @@ ebb0:

    ; Load/Store

-    ; asm: movd (%ecx), %xmm5
-    [-,%xmm5]           v100 = load.f32 v0                      ; bin: 66 0f 6e 29
-    ; asm: movd (%esi), %xmm2
-    [-,%xmm2]           v101 = load.f32 v1                      ; bin: 66 0f 6e 16
-    ; asm: movd 50(%ecx), %xmm5
-    [-,%xmm5]           v110 = load.f32 v0+50                   ; bin: 66 0f 6e 69 32
-    ; asm: movd -50(%esi), %xmm2
-    [-,%xmm2]           v111 = load.f32 v1-50                   ; bin: 66 0f 6e 56 ce
-    ; asm: movd 10000(%ecx), %xmm5
-    [-,%xmm5]           v120 = load.f32 v0+10000                ; bin: 66 0f 6e a9 00002710
-    ; asm: movd -10000(%esi), %xmm2
-    [-,%xmm2]           v121 = load.f32 v1-10000                ; bin: 66 0f 6e 96 ffffd8f0
+    ; asm: movss (%ecx), %xmm5
+    [-,%xmm5]           v100 = load.f32 v0                      ; bin: heap_oob f3 0f 10 29
+    ; asm: movss (%esi), %xmm2
+    [-,%xmm2]           v101 = load.f32 v1                      ; bin: heap_oob f3 0f 10 16
+    ; asm: movss 50(%ecx), %xmm5
+    [-,%xmm5]           v110 = load.f32 v0+50                   ; bin: heap_oob f3 0f 10 69 32
+    ; asm: movss -50(%esi), %xmm2
+    [-,%xmm2]           v111 = load.f32 v1-50                   ; bin: heap_oob f3 0f 10 56 ce
+    ; asm: movss 10000(%ecx), %xmm5
+    [-,%xmm5]           v120 = load.f32 v0+10000                ; bin: heap_oob f3 0f 10 a9 00002710
+    ; asm: movss -10000(%esi), %xmm2
+    [-,%xmm2]           v121 = load.f32 v1-10000                ; bin: heap_oob f3 0f 10 96 ffffd8f0

-    ; asm: movd %xmm5, (%ecx)
-    [-]                 store.f32 v100, v0                      ; bin: 66 0f 7e 29
-    ; asm: movd %xmm2, (%esi)
-    [-]                 store.f32 v101, v1                      ; bin: 66 0f 7e 16
-    ; asm: movd %xmm5, 50(%ecx)
-    [-]                 store.f32 v100, v0+50                   ; bin: 66 0f 7e 69 32
-    ; asm: movd %xmm2, -50(%esi)
-    [-]                 store.f32 v101, v1-50                   ; bin: 66 0f 7e 56 ce
-    ; asm: movd %xmm5, 10000(%ecx)
-    [-]                 store.f32 v100, v0+10000                ; bin: 66 0f 7e a9 00002710
-    ; asm: movd %xmm2, -10000(%esi)
-    [-]                 store.f32 v101, v1-10000                ; bin: 66 0f 7e 96 ffffd8f0
+    ; asm: movss %xmm5, (%ecx)
+    [-]                 store.f32 v100, v0                      ; bin: heap_oob f3 0f 11 29
+    ; asm: movss %xmm2, (%esi)
+    [-]                 store.f32 v101, v1                      ; bin: heap_oob f3 0f 11 16
+    ; asm: movss %xmm5, 50(%ecx)
+    [-]                 store.f32 v100, v0+50                   ; bin: heap_oob f3 0f 11 69 32
+    ; asm: movss %xmm2, -50(%esi)
+    [-]                 store.f32 v101, v1-50                   ; bin: heap_oob f3 0f 11 56 ce
+    ; asm: movss %xmm5, 10000(%ecx)
+    [-]                 store.f32 v100, v0+10000                ; bin: heap_oob f3 0f 11 a9 00002710
+    ; asm: movss %xmm2, -10000(%esi)
+    [-]                 store.f32 v101, v1-10000                ; bin: heap_oob f3 0f 11 96 ffffd8f0

    ; Spill / Fill.

-    ; asm: movd %xmm5, 1032(%esp)
-    [-,ss1]             v200 = spill v100                       ; bin: 66 0f 7e ac 24 00000408
-    ; asm: movd %xmm2, 1032(%esp)
-    [-,ss1]             v201 = spill v101                       ; bin: 66 0f 7e 94 24 00000408
+    ; asm: movss %xmm5, 1032(%esp)
+    [-,ss1]             v200 = spill v100                       ; bin: f3 0f 11 ac 24 00000408
+    ; asm: movss %xmm2, 1032(%esp)
+    [-,ss1]             v201 = spill v101                       ; bin: f3 0f 11 94 24 00000408

-    ; asm: movd 1032(%esp), %xmm5
-    [-,%xmm5]           v210 = fill v200                        ; bin: 66 0f 6e ac 24 00000408
-    ; asm: movd 1032(%esp), %xmm2
-    [-,%xmm2]           v211 = fill v201                        ; bin: 66 0f 6e 94 24 00000408
+    ; asm: movss 1032(%esp), %xmm5
+    [-,%xmm5]           v210 = fill v200                        ; bin: f3 0f 10 ac 24 00000408
+    ; asm: movss 1032(%esp), %xmm2
+    [-,%xmm2]           v211 = fill v201                        ; bin: f3 0f 10 94 24 00000408

-    ; asm: movd %xmm5, 1032(%rsp)
-    regspill v100, %xmm5 -> ss1                                 ; bin: 66 0f 7e ac 24 00000408
-    ; asm: movd 1032(%rsp), %xmm5
-    regfill v100, ss1 -> %xmm5                                  ; bin: 66 0f 6e ac 24 00000408
+    ; asm: movss %xmm5, 1032(%rsp)
+    regspill v100, %xmm5 -> ss1                                 ; bin: f3 0f 11 ac 24 00000408
+    ; asm: movss 1032(%rsp), %xmm5
+    regfill v100, ss1 -> %xmm5                                  ; bin: f3 0f 10 ac 24 00000408

    ; Comparisons.
    ;
@@ -221,11 +221,11 @@ ebb0:
    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 0f 2e d5 0f 96 c2

    ; asm: ucomiss %xmm2, %xmm5
-    [-,%eflags]         v310 = ffcmp v10, v11                   ; bin: 0f 2e ea
+    [-,%rflags]         v310 = ffcmp v10, v11                   ; bin: 0f 2e ea
    ; asm: ucomiss %xmm2, %xmm5
-    [-,%eflags]         v311 = ffcmp v11, v10                   ; bin: 0f 2e d5
+    [-,%rflags]         v311 = ffcmp v11, v10                   ; bin: 0f 2e d5
    ; asm: ucomiss %xmm5, %xmm5
-    [-,%eflags]         v312 = ffcmp v10, v10                   ; bin: 0f 2e ed
+    [-,%rflags]         v312 = ffcmp v10, v10                   ; bin: 0f 2e ed

    return
 }
@@ -362,48 +362,48 @@ ebb0:

    ; Load/Store

-    ; asm: movq (%ecx), %xmm5
-    [-,%xmm5]           v100 = load.f64 v0                      ; bin: f3 0f 7e 29
-    ; asm: movq (%esi), %xmm2
-    [-,%xmm2]           v101 = load.f64 v1                      ; bin: f3 0f 7e 16
-    ; asm: movq 50(%ecx), %xmm5
-    [-,%xmm5]           v110 = load.f64 v0+50                   ; bin: f3 0f 7e 69 32
-    ; asm: movq -50(%esi), %xmm2
-    [-,%xmm2]           v111 = load.f64 v1-50                   ; bin: f3 0f 7e 56 ce
-    ; asm: movq 10000(%ecx), %xmm5
-    [-,%xmm5]           v120 = load.f64 v0+10000                ; bin: f3 0f 7e a9 00002710
-    ; asm: movq -10000(%esi), %xmm2
-    [-,%xmm2]           v121 = load.f64 v1-10000                ; bin: f3 0f 7e 96 ffffd8f0
+    ; asm: movsd (%ecx), %xmm5
+    [-,%xmm5]           v100 = load.f64 v0                      ; bin: heap_oob f2 0f 10 29
+    ; asm: movsd (%esi), %xmm2
+    [-,%xmm2]           v101 = load.f64 v1                      ; bin: heap_oob f2 0f 10 16
+    ; asm: movsd 50(%ecx), %xmm5
+    [-,%xmm5]           v110 = load.f64 v0+50                   ; bin: heap_oob f2 0f 10 69 32
+    ; asm: movsd -50(%esi), %xmm2
+    [-,%xmm2]           v111 = load.f64 v1-50                   ; bin: heap_oob f2 0f 10 56 ce
+    ; asm: movsd 10000(%ecx), %xmm5
+    [-,%xmm5]           v120 = load.f64 v0+10000                ; bin: heap_oob f2 0f 10 a9 00002710
+    ; asm: movsd -10000(%esi), %xmm2
+    [-,%xmm2]           v121 = load.f64 v1-10000                ; bin: heap_oob f2 0f 10 96 ffffd8f0

-    ; asm: movq %xmm5, (%ecx)
-    [-]                 store.f64 v100, v0                      ; bin: 66 0f d6 29
-    ; asm: movq %xmm2, (%esi)
-    [-]                 store.f64 v101, v1                      ; bin: 66 0f d6 16
-    ; asm: movq %xmm5, 50(%ecx)
-    [-]                 store.f64 v100, v0+50                   ; bin: 66 0f d6 69 32
-    ; asm: movq %xmm2, -50(%esi)
-    [-]                 store.f64 v101, v1-50                   ; bin: 66 0f d6 56 ce
-    ; asm: movq %xmm5, 10000(%ecx)
-    [-]                 store.f64 v100, v0+10000                ; bin: 66 0f d6 a9 00002710
-    ; asm: movq %xmm2, -10000(%esi)
-    [-]                 store.f64 v101, v1-10000                ; bin: 66 0f d6 96 ffffd8f0
+    ; asm: movsd %xmm5, (%ecx)
+    [-]                 store.f64 v100, v0                      ; bin: heap_oob f2 0f 11 29
+    ; asm: movsd %xmm2, (%esi)
+    [-]                 store.f64 v101, v1                      ; bin: heap_oob f2 0f 11 16
+    ; asm: movsd %xmm5, 50(%ecx)
+    [-]                 store.f64 v100, v0+50                   ; bin: heap_oob f2 0f 11 69 32
+    ; asm: movsd %xmm2, -50(%esi)
+    [-]                 store.f64 v101, v1-50                   ; bin: heap_oob f2 0f 11 56 ce
+    ; asm: movsd %xmm5, 10000(%ecx)
+    [-]                 store.f64 v100, v0+10000                ; bin: heap_oob f2 0f 11 a9 00002710
+    ; asm: movsd %xmm2, -10000(%esi)
+    [-]                 store.f64 v101, v1-10000                ; bin: heap_oob f2 0f 11 96 ffffd8f0

    ; Spill / Fill.

-    ; asm: movq %xmm5, 1032(%esp)
-    [-,ss1]             v200 = spill v100                       ; bin: 66 0f d6 ac 24 00000408
-    ; asm: movq %xmm2, 1032(%esp)
-    [-,ss1]             v201 = spill v101                       ; bin: 66 0f d6 94 24 00000408
+    ; asm: movsd %xmm5, 1032(%esp)
+    [-,ss1]             v200 = spill v100                       ; bin: f2 0f 11 ac 24 00000408
+    ; asm: movsd %xmm2, 1032(%esp)
+    [-,ss1]             v201 = spill v101                       ; bin: f2 0f 11 94 24 00000408

-    ; asm: movq 1032(%esp), %xmm5
-    [-,%xmm5]           v210 = fill v200                        ; bin: f3 0f 7e ac 24 00000408
-    ; asm: movq 1032(%esp), %xmm2
-    [-,%xmm2]           v211 = fill v201                        ; bin: f3 0f 7e 94 24 00000408
+    ; asm: movsd 1032(%esp), %xmm5
+    [-,%xmm5]           v210 = fill v200                        ; bin: f2 0f 10 ac 24 00000408
+    ; asm: movsd 1032(%esp), %xmm2
+    [-,%xmm2]           v211 = fill v201                        ; bin: f2 0f 10 94 24 00000408

-    ; asm: movq %xmm5, 1032(%rsp)
-    regspill v100, %xmm5 -> ss1                                 ; bin: 66 0f d6 ac 24 00000408
-    ; asm: movq 1032(%rsp), %xmm5
-    regfill v100, ss1 -> %xmm5                                  ; bin: f3 0f 7e ac 24 00000408
+    ; asm: movsd %xmm5, 1032(%rsp)
+    regspill v100, %xmm5 -> ss1                                 ; bin: f2 0f 11 ac 24 00000408
+    ; asm: movsd 1032(%rsp), %xmm5
+    regfill v100, ss1 -> %xmm5                                  ; bin: f2 0f 10 ac 24 00000408

    ; Comparisons.
    ;
@@ -436,11 +436,11 @@ ebb0:
    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 66 0f 2e d5 0f 96 c2

    ; asm: ucomisd %xmm2, %xmm5
-    [-,%eflags]         v310 = ffcmp v10, v11                   ; bin: 66 0f 2e ea
+    [-,%rflags]         v310 = ffcmp v10, v11                   ; bin: 66 0f 2e ea
    ; asm: ucomisd %xmm2, %xmm5
-    [-,%eflags]         v311 = ffcmp v11, v10                   ; bin: 66 0f 2e d5
+    [-,%rflags]         v311 = ffcmp v11, v10                   ; bin: 66 0f 2e d5
    ; asm: ucomisd %xmm5, %xmm5
-    [-,%eflags]         v312 = ffcmp v10, v10                   ; bin: 66 0f 2e ed
+    [-,%rflags]         v312 = ffcmp v10, v10                   ; bin: 66 0f 2e ed

    return
 }
@@ -448,7 +448,7 @@ ebb0:
 function %cpuflags_float(f32 [%xmm0]) {
 ebb0(v0: f32 [%xmm0]):
    ; asm: ucomiss %xmm0, %xmm0
-    [-,%eflags]         v1 = ffcmp v0, v0                       ; bin: 0f 2e c0
+    [-,%rflags]         v1 = ffcmp v0, v0                       ; bin: 0f 2e c0

    jump ebb1

@@ -471,21 +471,21 @@ ebb1:
    brff ule v1, ebb1                                           ; bin: 76 f0

    ; asm: jp .+4; ud2
-    trapff ord v1, user0                                        ; bin: 7a 02 0f 0b
+    trapff ord v1, user0                                        ; bin: 7a 02 user0 0f 0b
    ; asm: jnp .+4; ud2
-    trapff uno v1, user0                                        ; bin: 7b 02 0f 0b
+    trapff uno v1, user0                                        ; bin: 7b 02 user0 0f 0b
    ; asm: je .+4; ud2
-    trapff one v1, user0                                        ; bin: 74 02 0f 0b
+    trapff one v1, user0                                        ; bin: 74 02 user0 0f 0b
    ; asm: jne .+4; ud2
-    trapff ueq v1, user0                                        ; bin: 75 02 0f 0b
+    trapff ueq v1, user0                                        ; bin: 75 02 user0 0f 0b
    ; asm: jna .+4; ud2
-    trapff gt v1, user0                                         ; bin: 76 02 0f 0b
+    trapff gt v1, user0                                         ; bin: 76 02 user0 0f 0b
    ; asm: jnae .+4; ud2
-    trapff ge v1, user0                                         ; bin: 72 02 0f 0b
+    trapff ge v1, user0                                         ; bin: 72 02 user0 0f 0b
    ; asm: jnb .+4; ud2
-    trapff ult v1, user0                                        ; bin: 73 02 0f 0b
+    trapff ult v1, user0                                        ; bin: 73 02 user0 0f 0b
    ; asm: jnbe .+4; ud2
-    trapff ule v1, user0                                        ; bin: 77 02 0f 0b
+    trapff ule v1, user0                                        ; bin: 77 02 user0 0f 0b

    ; asm: setnp %bl
    [-,%rbx]            v10 = trueff ord v1                     ; bin: 0f 9b c3
--- a/cranelift/filetests/isa/intel/binary32.cton
+++ b/cranelift/filetests/isa/intel/binary32.cton
@@ -1,4 +1,4 @@
-; binary emission of 32-bit code.
+; binary emission of x86-32 code.
 test binemit
 set is_compressed
 isa intel haswell
@@ -25,6 +25,9 @@ ebb0:
    ; asm: movl $2, %esi
    [-,%rsi]            v2 = iconst.i32 2        ; bin: be 00000002

+    ; asm: movb $1, %cl
+    [-,%rcx]            v9007 = bconst.b1 true      ; bin: b9 00000001
+
    ; Integer Register-Register Operations.

    ; asm: addl %esi, %ecx
@@ -125,13 +128,13 @@ ebb0:
    ; asm: movl $2, %edx
    [-,%rdx]      v53 = iconst.i32 2                    ; bin: ba 00000002
    ; asm: idivl %ecx
-    [-,%rax,%rdx] v54, v55 = x86_sdivmodx v52, v53, v1  ; bin: f7 f9
+    [-,%rax,%rdx] v54, v55 = x86_sdivmodx v52, v53, v1  ; bin: int_divz f7 f9
    ; asm: idivl %esi
-    [-,%rax,%rdx] v56, v57 = x86_sdivmodx v52, v53, v2  ; bin: f7 fe
+    [-,%rax,%rdx] v56, v57 = x86_sdivmodx v52, v53, v2  ; bin: int_divz f7 fe
    ; asm: divl %ecx
-    [-,%rax,%rdx] v58, v59 = x86_udivmodx v52, v53, v1  ; bin: f7 f1
+    [-,%rax,%rdx] v58, v59 = x86_udivmodx v52, v53, v1  ; bin: int_divz f7 f1
    ; asm: divl %esi
-    [-,%rax,%rdx] v60, v61 = x86_udivmodx v52, v53, v2  ; bin: f7 f6
+    [-,%rax,%rdx] v60, v61 = x86_udivmodx v52, v53, v2  ; bin: int_divz f7 f6

    ; Register copies.

@@ -152,105 +155,105 @@ ebb0:
    ; Register indirect addressing with no displacement.

    ; asm: movl %ecx, (%esi)
-    store v1, v2                                ; bin: 89 0e
+    store v1, v2                                ; bin: heap_oob 89 0e
    ; asm: movl %esi, (%ecx)
-    store v2, v1                                ; bin: 89 31
+    store v2, v1                                ; bin: heap_oob 89 31
    ; asm: movw %cx, (%esi)
-    istore16 v1, v2                             ; bin: 66 89 0e
+    istore16 v1, v2                             ; bin: heap_oob 66 89 0e
    ; asm: movw %si, (%ecx)
-    istore16 v2, v1                             ; bin: 66 89 31
+    istore16 v2, v1                             ; bin: heap_oob 66 89 31
    ; asm: movb %cl, (%esi)
-    istore8 v1, v2                              ; bin: 88 0e
+    istore8 v1, v2                              ; bin: heap_oob 88 0e
    ; Can't store %sil in 32-bit mode (needs REX prefix).

    ; asm: movl (%ecx), %edi
-    [-,%rdi]            v100 = load.i32 v1      ; bin: 8b 39
+    [-,%rdi]            v100 = load.i32 v1      ; bin: heap_oob 8b 39
    ; asm: movl (%esi), %edx
-    [-,%rdx]            v101 = load.i32 v2      ; bin: 8b 16
+    [-,%rdx]            v101 = load.i32 v2      ; bin: heap_oob 8b 16
    ; asm: movzwl (%ecx), %edi
-    [-,%rdi]            v102 = uload16.i32 v1   ; bin: 0f b7 39
+    [-,%rdi]            v102 = uload16.i32 v1   ; bin: heap_oob 0f b7 39
    ; asm: movzwl (%esi), %edx
-    [-,%rdx]            v103 = uload16.i32 v2   ; bin: 0f b7 16
+    [-,%rdx]            v103 = uload16.i32 v2   ; bin: heap_oob 0f b7 16
    ; asm: movswl (%ecx), %edi
-    [-,%rdi]            v104 = sload16.i32 v1   ; bin: 0f bf 39
+    [-,%rdi]            v104 = sload16.i32 v1   ; bin: heap_oob 0f bf 39
    ; asm: movswl (%esi), %edx
-    [-,%rdx]            v105 = sload16.i32 v2   ; bin: 0f bf 16
+    [-,%rdx]            v105 = sload16.i32 v2   ; bin: heap_oob 0f bf 16
    ; asm: movzbl (%ecx), %edi
-    [-,%rdi]            v106 = uload8.i32 v1    ; bin: 0f b6 39
+    [-,%rdi]            v106 = uload8.i32 v1    ; bin: heap_oob 0f b6 39
    ; asm: movzbl (%esi), %edx
-    [-,%rdx]            v107 = uload8.i32 v2    ; bin: 0f b6 16
+    [-,%rdx]            v107 = uload8.i32 v2    ; bin: heap_oob 0f b6 16
    ; asm: movsbl (%ecx), %edi
-    [-,%rdi]            v108 = sload8.i32 v1    ; bin: 0f be 39
+    [-,%rdi]            v108 = sload8.i32 v1    ; bin: heap_oob 0f be 39
    ; asm: movsbl (%esi), %edx
-    [-,%rdx]            v109 = sload8.i32 v2    ; bin: 0f be 16
+    [-,%rdx]            v109 = sload8.i32 v2    ; bin: heap_oob 0f be 16

    ; Register-indirect with 8-bit signed displacement.

    ; asm: movl %ecx, 100(%esi)
-    store v1, v2+100                            ; bin: 89 4e 64
+    store v1, v2+100                            ; bin: heap_oob 89 4e 64
    ; asm: movl %esi, -100(%ecx)
-    store v2, v1-100                            ; bin: 89 71 9c
+    store v2, v1-100                            ; bin: heap_oob 89 71 9c
    ; asm: movw %cx, 100(%esi)
-    istore16 v1, v2+100                         ; bin: 66 89 4e 64
+    istore16 v1, v2+100                         ; bin: heap_oob 66 89 4e 64
    ; asm: movw %si, -100(%ecx)
-    istore16 v2, v1-100                         ; bin: 66 89 71 9c
+    istore16 v2, v1-100                         ; bin: heap_oob 66 89 71 9c
    ; asm: movb %cl, 100(%esi)
-    istore8 v1, v2+100                          ; bin: 88 4e 64
+    istore8 v1, v2+100                          ; bin: heap_oob 88 4e 64

    ; asm: movl 50(%ecx), %edi
-    [-,%rdi]            v110 = load.i32 v1+50           ; bin: 8b 79 32
+    [-,%rdi]            v110 = load.i32 v1+50           ; bin: heap_oob 8b 79 32
    ; asm: movl -50(%esi), %edx
-    [-,%rdx]            v111 = load.i32 v2-50           ; bin: 8b 56 ce
+    [-,%rdx]            v111 = load.i32 v2-50           ; bin: heap_oob 8b 56 ce
    ; asm: movzwl 50(%ecx), %edi
-    [-,%rdi]            v112 = uload16.i32 v1+50        ; bin: 0f b7 79 32
+    [-,%rdi]            v112 = uload16.i32 v1+50        ; bin: heap_oob 0f b7 79 32
    ; asm: movzwl -50(%esi), %edx
-    [-,%rdx]            v113 = uload16.i32 v2-50        ; bin: 0f b7 56 ce
+    [-,%rdx]            v113 = uload16.i32 v2-50        ; bin: heap_oob 0f b7 56 ce
    ; asm: movswl 50(%ecx), %edi
-    [-,%rdi]            v114 = sload16.i32 v1+50        ; bin: 0f bf 79 32
+    [-,%rdi]            v114 = sload16.i32 v1+50        ; bin: heap_oob 0f bf 79 32
    ; asm: movswl -50(%esi), %edx
-    [-,%rdx]            v115 = sload16.i32 v2-50        ; bin: 0f bf 56 ce
+    [-,%rdx]            v115 = sload16.i32 v2-50        ; bin: heap_oob 0f bf 56 ce
    ; asm: movzbl 50(%ecx), %edi
-    [-,%rdi]            v116 = uload8.i32 v1+50         ; bin: 0f b6 79 32
+    [-,%rdi]            v116 = uload8.i32 v1+50         ; bin: heap_oob 0f b6 79 32
    ; asm: movzbl -50(%esi), %edx
-    [-,%rdx]            v117 = uload8.i32 v2-50         ; bin: 0f b6 56 ce
+    [-,%rdx]            v117 = uload8.i32 v2-50         ; bin: heap_oob 0f b6 56 ce
    ; asm: movsbl 50(%ecx), %edi
-    [-,%rdi]            v118 = sload8.i32 v1+50         ; bin: 0f be 79 32
+    [-,%rdi]            v118 = sload8.i32 v1+50         ; bin: heap_oob 0f be 79 32
    ; asm: movsbl -50(%esi), %edx
-    [-,%rdx]            v119 = sload8.i32 v2-50         ; bin: 0f be 56 ce
+    [-,%rdx]            v119 = sload8.i32 v2-50         ; bin: heap_oob 0f be 56 ce

    ; Register-indirect with 32-bit signed displacement.

    ; asm: movl %ecx, 10000(%esi)
-    store v1, v2+10000                          ; bin: 89 8e 00002710
+    store v1, v2+10000                          ; bin: heap_oob 89 8e 00002710
    ; asm: movl %esi, -10000(%ecx)
-    store v2, v1-10000                          ; bin: 89 b1 ffffd8f0
+    store v2, v1-10000                          ; bin: heap_oob 89 b1 ffffd8f0
    ; asm: movw %cx, 10000(%esi)
-    istore16 v1, v2+10000                       ; bin: 66 89 8e 00002710
+    istore16 v1, v2+10000                       ; bin: heap_oob 66 89 8e 00002710
    ; asm: movw %si, -10000(%ecx)
-    istore16 v2, v1-10000                       ; bin: 66 89 b1 ffffd8f0
+    istore16 v2, v1-10000                       ; bin: heap_oob 66 89 b1 ffffd8f0
    ; asm: movb %cl, 10000(%esi)
-    istore8 v1, v2+10000                        ; bin: 88 8e 00002710
+    istore8 v1, v2+10000                        ; bin: heap_oob 88 8e 00002710

    ; asm: movl 50000(%ecx), %edi
-    [-,%rdi]            v120 = load.i32 v1+50000           ; bin: 8b b9 0000c350
+    [-,%rdi]            v120 = load.i32 v1+50000           ; bin: heap_oob 8b b9 0000c350
    ; asm: movl -50000(%esi), %edx
-    [-,%rdx]            v121 = load.i32 v2-50000           ; bin: 8b 96 ffff3cb0
+    [-,%rdx]            v121 = load.i32 v2-50000           ; bin: heap_oob 8b 96 ffff3cb0
    ; asm: movzwl 50000(%ecx), %edi
-    [-,%rdi]            v122 = uload16.i32 v1+50000        ; bin: 0f b7 b9 0000c350
+    [-,%rdi]            v122 = uload16.i32 v1+50000        ; bin: heap_oob 0f b7 b9 0000c350
    ; asm: movzwl -50000(%esi), %edx
-    [-,%rdx]            v123 = uload16.i32 v2-50000        ; bin: 0f b7 96 ffff3cb0
+    [-,%rdx]            v123 = uload16.i32 v2-50000        ; bin: heap_oob 0f b7 96 ffff3cb0
    ; asm: movswl 50000(%ecx), %edi
-    [-,%rdi]            v124 = sload16.i32 v1+50000        ; bin: 0f bf b9 0000c350
+    [-,%rdi]            v124 = sload16.i32 v1+50000        ; bin: heap_oob 0f bf b9 0000c350
    ; asm: movswl -50000(%esi), %edx
-    [-,%rdx]            v125 = sload16.i32 v2-50000        ; bin: 0f bf 96 ffff3cb0
+    [-,%rdx]            v125 = sload16.i32 v2-50000        ; bin: heap_oob 0f bf 96 ffff3cb0
    ; asm: movzbl 50000(%ecx), %edi
-    [-,%rdi]            v126 = uload8.i32 v1+50000         ; bin: 0f b6 b9 0000c350
+    [-,%rdi]            v126 = uload8.i32 v1+50000         ; bin: heap_oob 0f b6 b9 0000c350
    ; asm: movzbl -50000(%esi), %edx
-    [-,%rdx]            v127 = uload8.i32 v2-50000         ; bin: 0f b6 96 ffff3cb0
+    [-,%rdx]            v127 = uload8.i32 v2-50000         ; bin: heap_oob 0f b6 96 ffff3cb0
    ; asm: movsbl 50000(%ecx), %edi
-    [-,%rdi]            v128 = sload8.i32 v1+50000         ; bin: 0f be b9 0000c350
+    [-,%rdi]            v128 = sload8.i32 v1+50000         ; bin: heap_oob 0f be b9 0000c350
    ; asm: movsbl -50000(%esi), %edx
-    [-,%rdx]            v129 = sload8.i32 v2-50000         ; bin: 0f be 96 ffff3cb0
+    [-,%rdx]            v129 = sload8.i32 v2-50000         ; bin: heap_oob 0f be 96 ffff3cb0

    ; Bit-counting instructions.

@@ -403,6 +406,13 @@ ebb0:
    ; asm: addl $-2147483648, %esp
    adjust_sp_imm -2147483648                   ; bin: 81 c4 80000000

+    ; Shift immediates
+    ; asm: shll $2, %esi
+    [-,%rsi]             v513 = ishl_imm v2, 2    ; bin: c1 e6 02
+    ; asm: sarl $5, %esi
+    [-,%rsi]             v514 = sshr_imm v2, 5    ; bin: c1 fe 05
+    ; asm: shrl $8, %esi
+    [-,%rsi]             v515 = ushr_imm v2, 8    ; bin: c1 ee 08

    ; asm: testl %ecx, %ecx
    ; asm: je ebb1
@@ -427,7 +437,7 @@ ebb1:

    ; asm: ebb2:
 ebb2:
-    trap user0                                  ; bin: 0f 0b
+    trap user0                                  ; bin: user0 0f 0b
 }

 ; Special branch encodings only for I32 mode.
@@ -466,9 +476,9 @@ ebb0:

 ebb1:
    ; asm: cmpl %esi, %ecx
-    [-,%eflags]         v10 = ifcmp v1, v2      ; bin: 39 f1
+    [-,%rflags]         v10 = ifcmp v1, v2      ; bin: 39 f1
    ; asm: cmpl %ecx, %esi
-    [-,%eflags]         v11 = ifcmp v2, v1      ; bin: 39 ce
+    [-,%rflags]         v11 = ifcmp v2, v1      ; bin: 39 ce

    ; asm: je ebb1
    brif eq v11, ebb1                           ; bin: 74 fa
@@ -514,41 +524,41 @@ ebb1:

    ; The trapif instructions are encoded as macros: a conditional jump over a ud2.
    ; asm: jne .+4; ud2
-    trapif eq v11, user0                           ; bin: 75 02 0f 0b
+    trapif eq v11, user0                           ; bin: 75 02 user0 0f 0b
    ; asm: je .+4; ud2
-    trapif ne v11, user0                           ; bin: 74 02 0f 0b
+    trapif ne v11, user0                           ; bin: 74 02 user0 0f 0b
    ; asm: jnl .+4; ud2
-    trapif slt v11, user0                          ; bin: 7d 02 0f 0b
+    trapif slt v11, user0                          ; bin: 7d 02 user0 0f 0b
    ; asm: jnge .+4; ud2
-    trapif sge v11, user0                          ; bin: 7c 02 0f 0b
+    trapif sge v11, user0                          ; bin: 7c 02 user0 0f 0b
    ; asm: jng .+4; ud2
-    trapif sgt v11, user0                          ; bin: 7e 02 0f 0b
+    trapif sgt v11, user0                          ; bin: 7e 02 user0 0f 0b
    ; asm: jnle .+4; ud2
-    trapif sle v11, user0                          ; bin: 7f 02 0f 0b
+    trapif sle v11, user0                          ; bin: 7f 02 user0 0f 0b
    ; asm: jnb .+4; ud2
-    trapif ult v11, user0                          ; bin: 73 02 0f 0b
+    trapif ult v11, user0                          ; bin: 73 02 user0 0f 0b
    ; asm: jnae .+4; ud2
-    trapif uge v11, user0                          ; bin: 72 02 0f 0b
+    trapif uge v11, user0                          ; bin: 72 02 user0 0f 0b
    ; asm: jna .+4; ud2
-    trapif ugt v11, user0                          ; bin: 76 02 0f 0b
+    trapif ugt v11, user0                          ; bin: 76 02 user0 0f 0b
    ; asm: jnbe .+4; ud2
-    trapif ule v11, user0                          ; bin: 77 02 0f 0b
+    trapif ule v11, user0                          ; bin: 77 02 user0 0f 0b

    ; Stack check.
    ; asm: cmpl %esp, %ecx
-    [-,%eflags]         v40 = ifcmp_sp v1       ; bin: 39 e1
+    [-,%rflags]         v40 = ifcmp_sp v1       ; bin: 39 e1
    ; asm: cmpl %esp, %esi
-    [-,%eflags]         v41 = ifcmp_sp v2       ; bin: 39 e6
+    [-,%rflags]         v41 = ifcmp_sp v2       ; bin: 39 e6

    ; asm: cmpl $-100, %ecx
-    [-,%eflags]         v42 = ifcmp_imm v1, -100   ; bin: 83 f9 9c
+    [-,%rflags]         v42 = ifcmp_imm v1, -100   ; bin: 83 f9 9c
    ; asm: cmpl $100, %esi
-    [-,%eflags]         v43 = ifcmp_imm v2, 100    ; bin: 83 fe 64
+    [-,%rflags]         v43 = ifcmp_imm v2, 100    ; bin: 83 fe 64

    ; asm: cmpl $-10000, %ecx
-    [-,%eflags]         v44 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0
+    [-,%rflags]         v44 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0
    ; asm: cmpl $10000, %esi
-    [-,%eflags]         v45 = ifcmp_imm v2, 10000  ; bin: 81 fe 00002710
+    [-,%rflags]         v45 = ifcmp_imm v2, 10000  ; bin: 81 fe 00002710

    return
 }
@@ -566,7 +576,7 @@ ebb0:
    ; asm: movzbl %cl, %esi
    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b6 f1

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }

 ; Tests for i32/i16 conversion instructions.
@@ -582,5 +592,5 @@ ebb0:
    ; asm: movzwl %cx, %esi
    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b7 f1

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }
--- a/cranelift/filetests/isa/intel/binary64-float.cton
+++ b/cranelift/filetests/isa/intel/binary64-float.cton
@@ -157,52 +157,52 @@ ebb0:

    ; Load/Store

-    ; asm: movd (%r14), %xmm5
-    [-,%xmm5]           v100 = load.f32 v3                      ; bin: 66 41 0f 6e 2e
-    ; asm: movd (%rax), %xmm10
-    [-,%xmm10]          v101 = load.f32 v2                      ; bin: 66 44 0f 6e 10
-    ; asm: movd 50(%r14), %xmm5
-    [-,%xmm5]           v110 = load.f32 v3+50                   ; bin: 66 41 0f 6e 6e 32
-    ; asm: movd -50(%rax), %xmm10
-    [-,%xmm10]          v111 = load.f32 v2-50                   ; bin: 66 44 0f 6e 50 ce
-    ; asm: movd 10000(%r14), %xmm5
-    [-,%xmm5]           v120 = load.f32 v3+10000                ; bin: 66 41 0f 6e ae 00002710
-    ; asm: movd -10000(%rax), %xmm10
-    [-,%xmm10]          v121 = load.f32 v2-10000                ; bin: 66 44 0f 6e 90 ffffd8f0
+    ; asm: movss (%r14), %xmm5
+    [-,%xmm5]           v100 = load.f32 v3                      ; bin: heap_oob f3 41 0f 10 2e
+    ; asm: movss (%rax), %xmm10
+    [-,%xmm10]          v101 = load.f32 v2                      ; bin: heap_oob f3 44 0f 10 10
+    ; asm: movss 50(%r14), %xmm5
+    [-,%xmm5]           v110 = load.f32 v3+50                   ; bin: heap_oob f3 41 0f 10 6e 32
+    ; asm: movss -50(%rax), %xmm10
+    [-,%xmm10]          v111 = load.f32 v2-50                   ; bin: heap_oob f3 44 0f 10 50 ce
+    ; asm: movss 10000(%r14), %xmm5
+    [-,%xmm5]           v120 = load.f32 v3+10000                ; bin: heap_oob f3 41 0f 10 ae 00002710
+    ; asm: movss -10000(%rax), %xmm10
+    [-,%xmm10]          v121 = load.f32 v2-10000                ; bin: heap_oob f3 44 0f 10 90 ffffd8f0

-    ; asm: movd %xmm5, (%r14)
-    [-]                 store.f32 v100, v3                      ; bin: 66 41 0f 7e 2e
-    ; asm: movd %xmm10, (%rax)
-    [-]                 store.f32 v101, v2                      ; bin: 66 44 0f 7e 10
-    ; asm: movd %xmm5, (%r13)
-    [-]                 store.f32 v100, v4                      ; bin: 66 41 0f 7e 6d 00
-    ; asm: movd %xmm10, (%r13)
-    [-]                 store.f32 v101, v4                      ; bin: 66 45 0f 7e 55 00
-    ; asm: movd %xmm5, 50(%r14)
-    [-]                 store.f32 v100, v3+50                   ; bin: 66 41 0f 7e 6e 32
-    ; asm: movd %xmm10, -50(%rax)
-    [-]                 store.f32 v101, v2-50                   ; bin: 66 44 0f 7e 50 ce
-    ; asm: movd %xmm5, 10000(%r14)
-    [-]                 store.f32 v100, v3+10000                ; bin: 66 41 0f 7e ae 00002710
-    ; asm: movd %xmm10, -10000(%rax)
-    [-]                 store.f32 v101, v2-10000                ; bin: 66 44 0f 7e 90 ffffd8f0
+    ; asm: movss %xmm5, (%r14)
+    [-]                 store.f32 v100, v3                      ; bin: heap_oob f3 41 0f 11 2e
+    ; asm: movss %xmm10, (%rax)
+    [-]                 store.f32 v101, v2                      ; bin: heap_oob f3 44 0f 11 10
+    ; asm: movss %xmm5, (%r13)
+    [-]                 store.f32 v100, v4                      ; bin: heap_oob f3 41 0f 11 6d 00
+    ; asm: movss %xmm10, (%r13)
+    [-]                 store.f32 v101, v4                      ; bin: heap_oob f3 45 0f 11 55 00
+    ; asm: movss %xmm5, 50(%r14)
+    [-]                 store.f32 v100, v3+50                   ; bin: heap_oob f3 41 0f 11 6e 32
+    ; asm: movss %xmm10, -50(%rax)
+    [-]                 store.f32 v101, v2-50                   ; bin: heap_oob f3 44 0f 11 50 ce
+    ; asm: movss %xmm5, 10000(%r14)
+    [-]                 store.f32 v100, v3+10000                ; bin: heap_oob f3 41 0f 11 ae 00002710
+    ; asm: movss %xmm10, -10000(%rax)
+    [-]                 store.f32 v101, v2-10000                ; bin: heap_oob f3 44 0f 11 90 ffffd8f0

    ; Spill / Fill.

-    ; asm: movd %xmm5, 1032(%rsp)
-    [-,ss1]             v200 = spill v100                       ; bin: 66 0f 7e ac 24 00000408
-    ; asm: movd %xmm10, 1032(%rsp)
-    [-,ss1]             v201 = spill v101                       ; bin: 66 44 0f 7e 94 24 00000408
+    ; asm: movss %xmm5, 1032(%rsp)
+    [-,ss1]             v200 = spill v100                       ; bin: f3 0f 11 ac 24 00000408
+    ; asm: movss %xmm10, 1032(%rsp)
+    [-,ss1]             v201 = spill v101                       ; bin: f3 44 0f 11 94 24 00000408

-    ; asm: movd 1032(%rsp), %xmm5
-    [-,%xmm5]           v210 = fill v200                        ; bin: 66 0f 6e ac 24 00000408
-    ; asm: movd 1032(%rsp), %xmm10
-    [-,%xmm10]          v211 = fill v201                        ; bin: 66 44 0f 6e 94 24 00000408
+    ; asm: movss 1032(%rsp), %xmm5
+    [-,%xmm5]           v210 = fill v200                        ; bin: f3 0f 10 ac 24 00000408
+    ; asm: movss 1032(%rsp), %xmm10
+    [-,%xmm10]          v211 = fill v201                        ; bin: f3 44 0f 10 94 24 00000408

-    ; asm: movd %xmm5, 1032(%rsp)
-    regspill v100, %xmm5 -> ss1                                 ; bin: 66 0f 7e ac 24 00000408
-    ; asm: movd 1032(%rsp), %xmm5
-    regfill v100, ss1 -> %xmm5                                  ; bin: 66 0f 6e ac 24 00000408
+    ; asm: movss %xmm5, 1032(%rsp)
+    regspill v100, %xmm5 -> ss1                                 ; bin: f3 0f 11 ac 24 00000408
+    ; asm: movss 1032(%rsp), %xmm5
+    regfill v100, ss1 -> %xmm5                                  ; bin: f3 0f 10 ac 24 00000408

    ; Comparisons.
    ;
@@ -235,11 +235,11 @@ ebb0:
    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 44 0f 2e d5 0f 96 c2

    ; asm: ucomiss %xmm10, %xmm5
-    [-,%eflags]         v310 = ffcmp v10, v11                   ; bin: 41 0f 2e ea
+    [-,%rflags]         v310 = ffcmp v10, v11                   ; bin: 41 0f 2e ea
    ; asm: ucomiss %xmm10, %xmm5
-    [-,%eflags]         v311 = ffcmp v11, v10                   ; bin: 44 0f 2e d5
+    [-,%rflags]         v311 = ffcmp v11, v10                   ; bin: 44 0f 2e d5
    ; asm: ucomiss %xmm5, %xmm5
-    [-,%eflags]         v312 = ffcmp v10, v10                   ; bin: 0f 2e ed
+    [-,%rflags]         v312 = ffcmp v10, v10                   ; bin: 0f 2e ed

    return
 }
@@ -392,52 +392,52 @@ ebb0:

    ; Load/Store

-    ; asm: movq (%r14), %xmm5
-    [-,%xmm5]           v100 = load.f64 v3                      ; bin: f3 41 0f 7e 2e
-    ; asm: movq (%rax), %xmm10
-    [-,%xmm10]          v101 = load.f64 v2                      ; bin: f3 44 0f 7e 10
-    ; asm: movq 50(%r14), %xmm5
-    [-,%xmm5]           v110 = load.f64 v3+50                   ; bin: f3 41 0f 7e 6e 32
-    ; asm: movq -50(%rax), %xmm10
-    [-,%xmm10]          v111 = load.f64 v2-50                   ; bin: f3 44 0f 7e 50 ce
-    ; asm: movq 10000(%r14), %xmm5
-    [-,%xmm5]           v120 = load.f64 v3+10000                ; bin: f3 41 0f 7e ae 00002710
-    ; asm: movq -10000(%rax), %xmm10
-    [-,%xmm10]          v121 = load.f64 v2-10000                ; bin: f3 44 0f 7e 90 ffffd8f0
+    ; asm: movsd (%r14), %xmm5
+    [-,%xmm5]           v100 = load.f64 v3                      ; bin: heap_oob f2 41 0f 10 2e
+    ; asm: movsd (%rax), %xmm10
+    [-,%xmm10]          v101 = load.f64 v2                      ; bin: heap_oob f2 44 0f 10 10
+    ; asm: movsd 50(%r14), %xmm5
+    [-,%xmm5]           v110 = load.f64 v3+50                   ; bin: heap_oob f2 41 0f 10 6e 32
+    ; asm: movsd -50(%rax), %xmm10
+    [-,%xmm10]          v111 = load.f64 v2-50                   ; bin: heap_oob f2 44 0f 10 50 ce
+    ; asm: movsd 10000(%r14), %xmm5
+    [-,%xmm5]           v120 = load.f64 v3+10000                ; bin: heap_oob f2 41 0f 10 ae 00002710
+    ; asm: movsd -10000(%rax), %xmm10
+    [-,%xmm10]          v121 = load.f64 v2-10000                ; bin: heap_oob f2 44 0f 10 90 ffffd8f0

-    ; asm: movq %xmm5, (%r14)
-    [-]                 store.f64 v100, v3                      ; bin: 66 41 0f d6 2e
-    ; asm: movq %xmm10, (%rax)
-    [-]                 store.f64 v101, v2                      ; bin: 66 44 0f d6 10
-    ; asm: movq %xmm5, (%r13)
-    [-]                 store.f64 v100, v4                      ; bin: 66 41 0f d6 6d 00
-    ; asm: movq %xmm10, (%r13)
-    [-]                 store.f64 v101, v4                      ; bin: 66 45 0f d6 55 00
-    ; asm: movq %xmm5, 50(%r14)
-    [-]                 store.f64 v100, v3+50                   ; bin: 66 41 0f d6 6e 32
-    ; asm: movq %xmm10, -50(%rax)
-    [-]                 store.f64 v101, v2-50                   ; bin: 66 44 0f d6 50 ce
-    ; asm: movq %xmm5, 10000(%r14)
-    [-]                 store.f64 v100, v3+10000                ; bin: 66 41 0f d6 ae 00002710
-    ; asm: movq %xmm10, -10000(%rax)
-    [-]                 store.f64 v101, v2-10000                ; bin: 66 44 0f d6 90 ffffd8f0
+    ; asm: movsd %xmm5, (%r14)
+    [-]                 store.f64 v100, v3                      ; bin: heap_oob f2 41 0f 11 2e
+    ; asm: movsd %xmm10, (%rax)
+    [-]                 store.f64 v101, v2                      ; bin: heap_oob f2 44 0f 11 10
+    ; asm: movsd %xmm5, (%r13)
+    [-]                 store.f64 v100, v4                      ; bin: heap_oob f2 41 0f 11 6d 00
+    ; asm: movsd %xmm10, (%r13)
+    [-]                 store.f64 v101, v4                      ; bin: heap_oob f2 45 0f 11 55 00
+    ; asm: movsd %xmm5, 50(%r14)
+    [-]                 store.f64 v100, v3+50                   ; bin: heap_oob f2 41 0f 11 6e 32
+    ; asm: movsd %xmm10, -50(%rax)
+    [-]                 store.f64 v101, v2-50                   ; bin: heap_oob f2 44 0f 11 50 ce
+    ; asm: movsd %xmm5, 10000(%r14)
+    [-]                 store.f64 v100, v3+10000                ; bin: heap_oob f2 41 0f 11 ae 00002710
+    ; asm: movsd %xmm10, -10000(%rax)
+    [-]                 store.f64 v101, v2-10000                ; bin: heap_oob f2 44 0f 11 90 ffffd8f0

    ; Spill / Fill.

-    ; asm: movq %xmm5, 1032(%rsp)
-    [-,ss1]             v200 = spill v100                       ; bin: 66 0f d6 ac 24 00000408
-    ; asm: movq %xmm10, 1032(%rsp)
-    [-,ss1]             v201 = spill v101                       ; bin: 66 44 0f d6 94 24 00000408
+    ; asm: movsd %xmm5, 1032(%rsp)
+    [-,ss1]             v200 = spill v100                       ; bin: f2 0f 11 ac 24 00000408
+    ; asm: movsd %xmm10, 1032(%rsp)
+    [-,ss1]             v201 = spill v101                       ; bin: f2 44 0f 11 94 24 00000408

-    ; asm: movq 1032(%rsp), %xmm5
-    [-,%xmm5]           v210 = fill v200                        ; bin: f3 0f 7e ac 24 00000408
-    ; asm: movq 1032(%rsp), %xmm10
-    [-,%xmm10]          v211 = fill v201                        ; bin: f3 44 0f 7e 94 24 00000408
+    ; asm: movsd 1032(%rsp), %xmm5
+    [-,%xmm5]           v210 = fill v200                        ; bin: f2 0f 10 ac 24 00000408
+    ; asm: movsd 1032(%rsp), %xmm10
+    [-,%xmm10]          v211 = fill v201                        ; bin: f2 44 0f 10 94 24 00000408

-    ; asm: movq %xmm5, 1032(%rsp)
-    regspill v100, %xmm5 -> ss1                                 ; bin: 66 0f d6 ac 24 00000408
-    ; asm: movq 1032(%rsp), %xmm5
-    regfill v100, ss1 -> %xmm5                                  ; bin: f3 0f 7e ac 24 00000408
+    ; asm: movsd %xmm5, 1032(%rsp)
+    regspill v100, %xmm5 -> ss1                                 ; bin: f2 0f 11 ac 24 00000408
+    ; asm: movsd 1032(%rsp), %xmm5
+    regfill v100, ss1 -> %xmm5                                  ; bin: f2 0f 10 ac 24 00000408

    ; Comparisons.
    ;
@@ -470,11 +470,11 @@ ebb0:
    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 66 44 0f 2e d5 0f 96 c2

    ; asm: ucomisd %xmm10, %xmm5
-    [-,%eflags]         v310 = ffcmp v10, v11                   ; bin: 66 41 0f 2e ea
+    [-,%rflags]         v310 = ffcmp v10, v11                   ; bin: 66 41 0f 2e ea
    ; asm: ucomisd %xmm10, %xmm5
-    [-,%eflags]         v311 = ffcmp v11, v10                   ; bin: 66 44 0f 2e d5
+    [-,%rflags]         v311 = ffcmp v11, v10                   ; bin: 66 44 0f 2e d5
    ; asm: ucomisd %xmm5, %xmm5
-    [-,%eflags]         v312 = ffcmp v10, v10                   ; bin: 66 0f 2e ed
+    [-,%rflags]         v312 = ffcmp v10, v10                   ; bin: 66 0f 2e ed

    return
 }
@@ -482,7 +482,7 @@ ebb0:
 function %cpuflags_float(f32 [%xmm0]) {
 ebb0(v0: f32 [%xmm0]):
    ; asm: ucomiss %xmm0, %xmm0
-    [-,%eflags]         v1 = ffcmp v0, v0                       ; bin: 0f 2e c0
+    [-,%rflags]         v1 = ffcmp v0, v0                       ; bin: 0f 2e c0

    jump ebb1

@@ -505,21 +505,21 @@ ebb1:
    brff ule v1, ebb1                                           ; bin: 76 f0

    ; asm: jp .+4; ud2
-    trapff ord v1, user0                                        ; bin: 7a 02 0f 0b
+    trapff ord v1, user0                                        ; bin: 7a 02 user0 0f 0b
    ; asm: jnp .+4; ud2
-    trapff uno v1, user0                                        ; bin: 7b 02 0f 0b
+    trapff uno v1, user0                                        ; bin: 7b 02 user0 0f 0b
    ; asm: je .+4; ud2
-    trapff one v1, user0                                        ; bin: 74 02 0f 0b
+    trapff one v1, user0                                        ; bin: 74 02 user0 0f 0b
    ; asm: jne .+4; ud2
-    trapff ueq v1, user0                                        ; bin: 75 02 0f 0b
+    trapff ueq v1, user0                                        ; bin: 75 02 user0 0f 0b
    ; asm: jna .+4; ud2
-    trapff gt v1, user0                                         ; bin: 76 02 0f 0b
+    trapff gt v1, user0                                         ; bin: 76 02 user0 0f 0b
    ; asm: jnae .+4; ud2
-    trapff ge v1, user0                                         ; bin: 72 02 0f 0b
+    trapff ge v1, user0                                         ; bin: 72 02 user0 0f 0b
    ; asm: jnb .+4; ud2
-    trapff ult v1, user0                                        ; bin: 73 02 0f 0b
+    trapff ult v1, user0                                        ; bin: 73 02 user0 0f 0b
    ; asm: jnbe .+4; ud2
-    trapff ule v1, user0                                        ; bin: 77 02 0f 0b
+    trapff ule v1, user0                                        ; bin: 77 02 user0 0f 0b

    ; asm: setnp %bl
    [-,%rbx]            v10 = trueff ord v1                     ; bin: 0f 9b c3
--- a/cranelift/filetests/isa/intel/binary64.cton
+++ b/cranelift/filetests/isa/intel/binary64.cton
@@ -1,4 +1,4 @@
-; binary emission of 64-bit code.
+; binary emission of x86-64 code.
 test binemit
 set is_64bit
 set is_compressed
@@ -38,6 +38,11 @@ ebb0:
    ; asm: movq $0xffffffff88001122, %r14                     # 32-bit sign-extended constant.
    [-,%r14]            v5 = iconst.i64 0xffff_ffff_8800_1122 ; bin: 49 c7 c6 88001122

+    ; asm: movb $1, %cl
+    [-,%rcx]            v9007 = bconst.b1 true      ; bin: b9 00000001
+    ; asm: movb $1, %sil
+    [-,%r10]            v9008 = bconst.b1 true      ; bin: 41 ba 00000001
+
    ; Integer Register-Register Operations.

    ; asm: addq %rsi, %rcx
@@ -170,146 +175,146 @@ ebb0:
    ; Register indirect addressing with no displacement.

    ; asm: movq %rcx, (%r10)
-    store v1, v3                                ; bin: 49 89 0a
+    store v1, v3                                ; bin: heap_oob 49 89 0a
    ; asm: movq %r10, (%rcx)
-    store v3, v1                                ; bin: 4c 89 11
+    store v3, v1                                ; bin: heap_oob 4c 89 11
    ; asm: movl %ecx, (%r10)
-    istore32 v1, v3                             ; bin: 41 89 0a
+    istore32 v1, v3                             ; bin: heap_oob 41 89 0a
    ; asm: movl %r10d, (%rcx)
-    istore32 v3, v1                             ; bin: 44 89 11
+    istore32 v3, v1                             ; bin: heap_oob 44 89 11
    ; asm: movw %cx, (%r10)
-    istore16 v1, v3                             ; bin: 66 41 89 0a
+    istore16 v1, v3                             ; bin: heap_oob 66 41 89 0a
    ; asm: movw %r10w, (%rcx)
-    istore16 v3, v1                             ; bin: 66 44 89 11
+    istore16 v3, v1                             ; bin: heap_oob 66 44 89 11
    ; asm: movb %cl, (%r10)
-    istore8 v1, v3                              ; bin: 41 88 0a
+    istore8 v1, v3                              ; bin: heap_oob 41 88 0a
    ; asm: movb %r10b, (%rcx)
-    istore8 v3, v1                              ; bin: 44 88 11
+    istore8 v3, v1                              ; bin: heap_oob 44 88 11

    ; asm: movq (%rcx), %r14
-    [-,%r14]            v120 = load.i64 v1      ; bin: 4c 8b 31
+    [-,%r14]            v120 = load.i64 v1      ; bin: heap_oob 4c 8b 31
    ; asm: movq (%r10), %rdx
-    [-,%rdx]            v121 = load.i64 v3      ; bin: 49 8b 12
+    [-,%rdx]            v121 = load.i64 v3      ; bin: heap_oob 49 8b 12
    ; asm: movl (%rcx), %r14d
-    [-,%r14]            v122 = uload32.i64 v1   ; bin: 44 8b 31
+    [-,%r14]            v122 = uload32.i64 v1   ; bin: heap_oob 44 8b 31
    ; asm: movl (%r10), %edx
-    [-,%rdx]            v123 = uload32.i64 v3   ; bin: 41 8b 12
+    [-,%rdx]            v123 = uload32.i64 v3   ; bin: heap_oob 41 8b 12
    ; asm: movslq (%rcx), %r14
-    [-,%r14]            v124 = sload32.i64 v1   ; bin: 4c 63 31
+    [-,%r14]            v124 = sload32.i64 v1   ; bin: heap_oob 4c 63 31
    ; asm: movslq (%r10), %rdx
-    [-,%rdx]            v125 = sload32.i64 v3   ; bin: 49 63 12
+    [-,%rdx]            v125 = sload32.i64 v3   ; bin: heap_oob 49 63 12
    ; asm: movzwq (%rcx), %r14
-    [-,%r14]            v126 = uload16.i64 v1   ; bin: 4c 0f b7 31
+    [-,%r14]            v126 = uload16.i64 v1   ; bin: heap_oob 4c 0f b7 31
    ; asm: movzwq (%r10), %rdx
-    [-,%rdx]            v127 = uload16.i64 v3   ; bin: 49 0f b7 12
+    [-,%rdx]            v127 = uload16.i64 v3   ; bin: heap_oob 49 0f b7 12
    ; asm: movswq (%rcx), %r14
-    [-,%r14]            v128 = sload16.i64 v1   ; bin: 4c 0f bf 31
+    [-,%r14]            v128 = sload16.i64 v1   ; bin: heap_oob 4c 0f bf 31
    ; asm: movswq (%r10), %rdx
-    [-,%rdx]            v129 = sload16.i64 v3   ; bin: 49 0f bf 12
+    [-,%rdx]            v129 = sload16.i64 v3   ; bin: heap_oob 49 0f bf 12
    ; asm: movzbq (%rcx), %r14
-    [-,%r14]            v130 = uload8.i64 v1    ; bin: 4c 0f b6 31
+    [-,%r14]            v130 = uload8.i64 v1    ; bin: heap_oob 4c 0f b6 31
    ; asm: movzbq (%r10), %rdx
-    [-,%rdx]            v131 = uload8.i64 v3    ; bin: 49 0f b6 12
+    [-,%rdx]            v131 = uload8.i64 v3    ; bin: heap_oob 49 0f b6 12
    ; asm: movsbq (%rcx), %r14
-    [-,%r14]            v132 = sload8.i64 v1    ; bin: 4c 0f be 31
+    [-,%r14]            v132 = sload8.i64 v1    ; bin: heap_oob 4c 0f be 31
    ; asm: movsbq (%r10), %rdx
-    [-,%rdx]            v133 = sload8.i64 v3    ; bin: 49 0f be 12
+    [-,%rdx]            v133 = sload8.i64 v3    ; bin: heap_oob 49 0f be 12

    ; Register-indirect with 8-bit signed displacement.

    ; asm: movq %rcx, 100(%r10)
-    store v1, v3+100                            ; bin: 49 89 4a 64
+    store v1, v3+100                            ; bin: heap_oob 49 89 4a 64
    ; asm: movq %r10, -100(%rcx)
-    store v3, v1-100                            ; bin: 4c 89 51 9c
+    store v3, v1-100                            ; bin: heap_oob 4c 89 51 9c
    ; asm: movl %ecx, 100(%r10)
-    istore32 v1, v3+100                         ; bin: 41 89 4a 64
+    istore32 v1, v3+100                         ; bin: heap_oob 41 89 4a 64
    ; asm: movl %r10d, -100(%rcx)
-    istore32 v3, v1-100                         ; bin: 44 89 51 9c
+    istore32 v3, v1-100                         ; bin: heap_oob 44 89 51 9c
    ; asm: movw %cx, 100(%r10)
-    istore16 v1, v3+100                         ; bin: 66 41 89 4a 64
+    istore16 v1, v3+100                         ; bin: heap_oob 66 41 89 4a 64
    ; asm: movw %r10w, -100(%rcx)
-    istore16 v3, v1-100                         ; bin: 66 44 89 51 9c
+    istore16 v3, v1-100                         ; bin: heap_oob 66 44 89 51 9c
    ; asm: movb %cl, 100(%r10)
-    istore8 v1, v3+100                          ; bin: 41 88 4a 64
+    istore8 v1, v3+100                          ; bin: heap_oob 41 88 4a 64
    ; asm: movb %r10b, 100(%rcx)
-    istore8 v3, v1+100                          ; bin: 44 88 51 64
+    istore8 v3, v1+100                          ; bin: heap_oob 44 88 51 64

    ; asm: movq 50(%rcx), %r10
-    [-,%r10]            v140 = load.i64 v1+50           ; bin: 4c 8b 51 32
+    [-,%r10]            v140 = load.i64 v1+50           ; bin: heap_oob 4c 8b 51 32
    ; asm: movq -50(%r10), %rdx
-    [-,%rdx]            v141 = load.i64 v3-50           ; bin: 49 8b 52 ce
+    [-,%rdx]            v141 = load.i64 v3-50           ; bin: heap_oob 49 8b 52 ce
    ; asm: movl 50(%rcx), %edi
-    [-,%rdi]            v142 = uload32.i64 v1+50        ; bin: 8b 79 32
+    [-,%rdi]            v142 = uload32.i64 v1+50        ; bin: heap_oob 8b 79 32
    ; asm: movl -50(%rsi), %edx
-    [-,%rdx]            v143 = uload32.i64 v2-50        ; bin: 8b 56 ce
+    [-,%rdx]            v143 = uload32.i64 v2-50        ; bin: heap_oob 8b 56 ce
    ; asm: movslq 50(%rcx), %rdi
-    [-,%rdi]            v144 = sload32.i64 v1+50        ; bin: 48 63 79 32
+    [-,%rdi]            v144 = sload32.i64 v1+50        ; bin: heap_oob 48 63 79 32
    ; asm: movslq -50(%rsi), %rdx
-    [-,%rdx]            v145 = sload32.i64 v2-50        ; bin: 48 63 56 ce
+    [-,%rdx]            v145 = sload32.i64 v2-50        ; bin: heap_oob 48 63 56 ce
    ; asm: movzwq 50(%rcx), %rdi
-    [-,%rdi]            v146 = uload16.i64 v1+50        ; bin: 48 0f b7 79 32
+    [-,%rdi]            v146 = uload16.i64 v1+50        ; bin: heap_oob 48 0f b7 79 32
    ; asm: movzwq -50(%rsi), %rdx
-    [-,%rdx]            v147 = uload16.i64 v2-50        ; bin: 48 0f b7 56 ce
+    [-,%rdx]            v147 = uload16.i64 v2-50        ; bin: heap_oob 48 0f b7 56 ce
    ; asm: movswq 50(%rcx), %rdi
-    [-,%rdi]            v148 = sload16.i64 v1+50        ; bin: 48 0f bf 79 32
+    [-,%rdi]            v148 = sload16.i64 v1+50        ; bin: heap_oob 48 0f bf 79 32
    ; asm: movswq -50(%rsi), %rdx
-    [-,%rdx]            v149 = sload16.i64 v2-50        ; bin: 48 0f bf 56 ce
+    [-,%rdx]            v149 = sload16.i64 v2-50        ; bin: heap_oob 48 0f bf 56 ce
    ; asm: movzbq 50(%rcx), %rdi
-    [-,%rdi]            v150 = uload8.i64 v1+50         ; bin: 48 0f b6 79 32
+    [-,%rdi]            v150 = uload8.i64 v1+50         ; bin: heap_oob 48 0f b6 79 32
    ; asm: movzbq -50(%rsi), %rdx
-    [-,%rdx]            v151 = uload8.i64 v2-50         ; bin: 48 0f b6 56 ce
+    [-,%rdx]            v151 = uload8.i64 v2-50         ; bin: heap_oob 48 0f b6 56 ce
    ; asm: movsbq 50(%rcx), %rdi
-    [-,%rdi]            v152 = sload8.i64 v1+50         ; bin: 48 0f be 79 32
+    [-,%rdi]            v152 = sload8.i64 v1+50         ; bin: heap_oob 48 0f be 79 32
    ; asm: movsbq -50(%rsi), %rdx
-    [-,%rdx]            v153 = sload8.i64 v2-50         ; bin: 48 0f be 56 ce
+    [-,%rdx]            v153 = sload8.i64 v2-50         ; bin: heap_oob 48 0f be 56 ce

    ; Register-indirect with 32-bit signed displacement.

    ; asm: movq %rcx, 10000(%r10)
-    store v1, v3+10000                          ; bin: 49 89 8a 00002710
+    store v1, v3+10000                          ; bin: heap_oob 49 89 8a 00002710
    ; asm: movq %r10, -10000(%rcx)
-    store v3, v1-10000                          ; bin: 4c 89 91 ffffd8f0
+    store v3, v1-10000                          ; bin: heap_oob 4c 89 91 ffffd8f0
    ; asm: movl %ecx, 10000(%rsi)
-    istore32 v1, v2+10000                       ; bin: 89 8e 00002710
+    istore32 v1, v2+10000                       ; bin: heap_oob 89 8e 00002710
    ; asm: movl %esi, -10000(%rcx)
-    istore32 v2, v1-10000                       ; bin: 89 b1 ffffd8f0
+    istore32 v2, v1-10000                       ; bin: heap_oob 89 b1 ffffd8f0
    ; asm: movw %cx, 10000(%rsi)
-    istore16 v1, v2+10000                       ; bin: 66 89 8e 00002710
+    istore16 v1, v2+10000                       ; bin: heap_oob 66 89 8e 00002710
    ; asm: movw %si, -10000(%rcx)
-    istore16 v2, v1-10000                       ; bin: 66 89 b1 ffffd8f0
+    istore16 v2, v1-10000                       ; bin: heap_oob 66 89 b1 ffffd8f0
    ; asm: movb %cl, 10000(%rsi)
-    istore8 v1, v2+10000                        ; bin: 88 8e 00002710
+    istore8 v1, v2+10000                        ; bin: heap_oob 88 8e 00002710
    ; asm: movb %sil, 10000(%rcx)
-    istore8 v2, v1+10000                        ; bin: 40 88 b1 00002710
+    istore8 v2, v1+10000                        ; bin: heap_oob 40 88 b1 00002710

    ; asm: movq 50000(%rcx), %r10
-    [-,%r10]            v160 = load.i64 v1+50000           ; bin: 4c 8b 91 0000c350
+    [-,%r10]            v160 = load.i64 v1+50000           ; bin: heap_oob 4c 8b 91 0000c350
    ; asm: movq -50000(%r10), %rdx
-    [-,%rdx]            v161 = load.i64 v3-50000           ; bin: 49 8b 92 ffff3cb0
+    [-,%rdx]            v161 = load.i64 v3-50000           ; bin: heap_oob 49 8b 92 ffff3cb0
    ; asm: movl 50000(%rcx), %edi
-    [-,%rdi]            v162 = uload32.i64 v1+50000        ; bin: 8b b9 0000c350
+    [-,%rdi]            v162 = uload32.i64 v1+50000        ; bin: heap_oob 8b b9 0000c350
    ; asm: movl -50000(%rsi), %edx
-    [-,%rdx]            v163 = uload32.i64 v2-50000        ; bin: 8b 96 ffff3cb0
+    [-,%rdx]            v163 = uload32.i64 v2-50000        ; bin: heap_oob 8b 96 ffff3cb0
    ; asm: movslq 50000(%rcx), %rdi
-    [-,%rdi]            v164 = sload32.i64 v1+50000        ; bin: 48 63 b9 0000c350
+    [-,%rdi]            v164 = sload32.i64 v1+50000        ; bin: heap_oob 48 63 b9 0000c350
    ; asm: movslq -50000(%rsi), %rdx
-    [-,%rdx]            v165 = sload32.i64 v2-50000        ; bin: 48 63 96 ffff3cb0
+    [-,%rdx]            v165 = sload32.i64 v2-50000        ; bin: heap_oob 48 63 96 ffff3cb0
    ; asm: movzwq 50000(%rcx), %rdi
-    [-,%rdi]            v166 = uload16.i64 v1+50000        ; bin: 48 0f b7 b9 0000c350
+    [-,%rdi]            v166 = uload16.i64 v1+50000        ; bin: heap_oob 48 0f b7 b9 0000c350
    ; asm: movzwq -50000(%rsi), %rdx
-    [-,%rdx]            v167 = uload16.i64 v2-50000        ; bin: 48 0f b7 96 ffff3cb0
+    [-,%rdx]            v167 = uload16.i64 v2-50000        ; bin: heap_oob 48 0f b7 96 ffff3cb0
    ; asm: movswq 50000(%rcx), %rdi
-    [-,%rdi]            v168 = sload16.i64 v1+50000        ; bin: 48 0f bf b9 0000c350
+    [-,%rdi]            v168 = sload16.i64 v1+50000        ; bin: heap_oob 48 0f bf b9 0000c350
    ; asm: movswq -50000(%rsi), %rdx
-    [-,%rdx]            v169 = sload16.i64 v2-50000        ; bin: 48 0f bf 96 ffff3cb0
+    [-,%rdx]            v169 = sload16.i64 v2-50000        ; bin: heap_oob 48 0f bf 96 ffff3cb0
    ; asm: movzbq 50000(%rcx), %rdi
-    [-,%rdi]            v170 = uload8.i64 v1+50000         ; bin: 48 0f b6 b9 0000c350
+    [-,%rdi]            v170 = uload8.i64 v1+50000         ; bin: heap_oob 48 0f b6 b9 0000c350
    ; asm: movzbq -50000(%rsi), %rdx
-    [-,%rdx]            v171 = uload8.i64 v2-50000         ; bin: 48 0f b6 96 ffff3cb0
+    [-,%rdx]            v171 = uload8.i64 v2-50000         ; bin: heap_oob 48 0f b6 96 ffff3cb0
    ; asm: movsbq 50000(%rcx), %rdi
-    [-,%rdi]            v172 = sload8.i64 v1+50000         ; bin: 48 0f be b9 0000c350
+    [-,%rdi]            v172 = sload8.i64 v1+50000         ; bin: heap_oob 48 0f be b9 0000c350
    ; asm: movsbq -50000(%rsi), %rdx
-    [-,%rdx]            v173 = sload8.i64 v2-50000         ; bin: 48 0f be 96 ffff3cb0
+    [-,%rdx]            v173 = sload8.i64 v2-50000         ; bin: heap_oob 48 0f be 96 ffff3cb0


    ; More arithmetic.
@@ -324,17 +329,17 @@ ebb0:
    [-,%rax]      v190 = iconst.i64 1
    [-,%rdx]      v191 = iconst.i64 2
    ; asm: idivq %rcx
-    [-,%rax,%rdx] v192, v193 = x86_sdivmodx v190, v191, v1  ; bin: 48 f7 f9
+    [-,%rax,%rdx] v192, v193 = x86_sdivmodx v190, v191, v1  ; bin: int_divz 48 f7 f9
    ; asm: idivq %rsi
-    [-,%rax,%rdx] v194, v195 = x86_sdivmodx v190, v191, v2  ; bin: 48 f7 fe
+    [-,%rax,%rdx] v194, v195 = x86_sdivmodx v190, v191, v2  ; bin: int_divz 48 f7 fe
    ; asm: idivq %r10
-    [-,%rax,%rdx] v196, v197 = x86_sdivmodx v190, v191, v3  ; bin: 49 f7 fa
+    [-,%rax,%rdx] v196, v197 = x86_sdivmodx v190, v191, v3  ; bin: int_divz 49 f7 fa
    ; asm: divq %rcx
-    [-,%rax,%rdx] v198, v199 = x86_udivmodx v190, v191, v1  ; bin: 48 f7 f1
+    [-,%rax,%rdx] v198, v199 = x86_udivmodx v190, v191, v1  ; bin: int_divz 48 f7 f1
    ; asm: divq %rsi
-    [-,%rax,%rdx] v200, v201 = x86_udivmodx v190, v191, v2  ; bin: 48 f7 f6
+    [-,%rax,%rdx] v200, v201 = x86_udivmodx v190, v191, v2  ; bin: int_divz 48 f7 f6
    ; asm: divq %r10
-    [-,%rax,%rdx] v202, v203 = x86_udivmodx v190, v191, v3  ; bin: 49 f7 f2
+    [-,%rax,%rdx] v202, v203 = x86_udivmodx v190, v191, v3  ; bin: int_divz 49 f7 f2

    ; double-length multiply instructions, 64 bit
    [-,%rax]       v1001 = iconst.i64 1
@@ -453,6 +458,14 @@ ebb0:
    ; asm: setbe %dl
    [-,%rdx]            v319 = icmp ule v2, v3  ; bin: 4c 39 d6 0f 96 c2

+    ; asm: cmpq $37, %rcx
+    ; asm: setl %bl
+    [-,%rbx]            v320 = icmp_imm slt v1, 37     ; bin: 48 83 f9 25 0f 9c c3
+
+    ; asm: cmpq $100000, %rcx
+    ; asm: setl %bl
+    [-,%rbx]            v321 = icmp_imm slt v1, 100000 ; bin: 48 81 f9 000186a0 0f 9c c3
+
    ; Bool-to-int conversions.

    ; asm: movzbq %bl, %rcx
@@ -529,6 +542,21 @@ ebb0:
    ; asm: addq $-2147483648, %rsp
    adjust_sp_imm -2147483648                   ; bin: 48 81 c4 80000000

+    ; Shift immediates
+    ; asm: shlq $12, %rsi
+    [-,%rsi]             v515 = ishl_imm v2, 12   ; bin: 48 c1 e6 0c
+    ; asm: shlq $13, %r8
+    [-,%r8]              v516 = ishl_imm v4, 13   ; bin: 49 c1 e0 0d
+    ; asm: sarq $32, %rsi
+    [-,%rsi]             v517 = sshr_imm v2, 32   ; bin: 48 c1 fe 20
+    ; asm: sarq $33, %r8
+    [-,%r8]              v518 = sshr_imm v4, 33   ; bin: 49 c1 f8 21
+    ; asm: shrl $62, %rsi
+    [-,%rsi]             v519 = ushr_imm v2, 62   ; bin: 48 c1 ee 3e
+    ; asm: shrl $63, %r8
+    [-,%r8]              v520 = ushr_imm v4, 63   ; bin: 49 c1 e8 3f
+
+
    ; asm: testq %rcx, %rcx
    ; asm: je ebb1
    brz v1, ebb1                                ; bin: 48 85 c9 74 1b
@@ -569,9 +597,9 @@ ebb0:

 ebb1:
    ; asm: cmpq %r10, %rcx
-    [-,%eflags]         v10 = ifcmp v1, v2      ; bin: 4c 39 d1
+    [-,%rflags]         v10 = ifcmp v1, v2      ; bin: 4c 39 d1
    ; asm: cmpq %rcx, %r10
-    [-,%eflags]         v11 = ifcmp v2, v1      ; bin: 49 39 ca
+    [-,%rflags]         v11 = ifcmp v2, v1      ; bin: 49 39 ca

    ; asm: je ebb1
    brif eq v11, ebb1                           ; bin: 74 f8
@@ -617,41 +645,42 @@ ebb1:

    ; The trapif instructions are encoded as macros: a conditional jump over a ud2.
    ; asm: jne .+4; ud2
-    trapif eq v11, user0                           ; bin: 75 02 0f 0b
+    trapif eq v11, user0                           ; bin: 75 02 user0 0f 0b
    ; asm: je .+4; ud2
-    trapif ne v11, user0                           ; bin: 74 02 0f 0b
+    trapif ne v11, user0                           ; bin: 74 02 user0 0f 0b
    ; asm: jnl .+4; ud2
-    trapif slt v11, user0                          ; bin: 7d 02 0f 0b
+    trapif slt v11, user0                          ; bin: 7d 02 user0 0f 0b
    ; asm: jnge .+4; ud2
-    trapif sge v11, user0                          ; bin: 7c 02 0f 0b
+    trapif sge v11, user0                          ; bin: 7c 02 user0 0f 0b
    ; asm: jng .+4; ud2
-    trapif sgt v11, user0                          ; bin: 7e 02 0f 0b
+    trapif sgt v11, user0                          ; bin: 7e 02 user0 0f 0b
    ; asm: jnle .+4; ud2
-    trapif sle v11, user0                          ; bin: 7f 02 0f 0b
+    trapif sle v11, user0                          ; bin: 7f 02 user0 0f 0b
    ; asm: jnb .+4; ud2
-    trapif ult v11, user0                          ; bin: 73 02 0f 0b
+    trapif ult v11, user0                          ; bin: 73 02 user0 0f 0b
    ; asm: jnae .+4; ud2
-    trapif uge v11, user0                          ; bin: 72 02 0f 0b
+    trapif uge v11, user0                          ; bin: 72 02 user0 0f 0b
    ; asm: jna .+4; ud2
-    trapif ugt v11, user0                          ; bin: 76 02 0f 0b
+    trapif ugt v11, user0                          ; bin: 76 02 user0 0f 0b
    ; asm: jnbe .+4; ud2
-    trapif ule v11, user0                          ; bin: 77 02 0f 0b
+    trapif ule v11, user0                          ; bin: 77 02 user0 0f 0b

    ; Stack check.
    ; asm: cmpq %rsp, %rcx
-    [-,%eflags]         v40 = ifcmp_sp v1       ; bin: 48 39 e1
+    [-,%rflags]         v40 = ifcmp_sp v1       ; bin: 48 39 e1
    ; asm: cmpq %rsp, %r10
-    [-,%eflags]         v41 = ifcmp_sp v2       ; bin: 49 39 e2
+    [-,%rflags]         v41 = ifcmp_sp v2       ; bin: 49 39 e2

    ; asm: cmpq $-100, %rcx
-    [-,%eflags]         v522 = ifcmp_imm v1, -100   ; bin: 48 83 f9 9c
+    [-,%rflags]         v522 = ifcmp_imm v1, -100   ; bin: 48 83 f9 9c
    ; asm: cmpq $100, %r10
-    [-,%eflags]         v523 = ifcmp_imm v2, 100    ; bin: 49 83 fa 64
+    [-,%rflags]         v523 = ifcmp_imm v2, 100    ; bin: 49 83 fa 64

    ; asm: cmpq $-10000, %rcx
-    [-,%eflags]         v524 = ifcmp_imm v1, -10000 ; bin: 48 81 f9 ffffd8f0
+    [-,%rflags]         v524 = ifcmp_imm v1, -10000 ; bin: 48 81 f9 ffffd8f0
    ; asm: cmpq $10000, %r10
-    [-,%eflags]         v525 = ifcmp_imm v2, 10000  ; bin: 49 81 fa 00002710
+    [-,%rflags]         v525 = ifcmp_imm v2, 10000  ; bin: 49 81 fa 00002710
+

    return
 }
@@ -708,71 +737,71 @@ ebb0:
    ; Register indirect addressing with no displacement.

    ; asm: movl (%rcx), %edi
-    [-,%rdi]            v10 = load.i32 v1      ; bin: 8b 39
+    [-,%rdi]            v10 = load.i32 v1      ; bin: heap_oob 8b 39
    ; asm: movl (%rsi), %edx
-    [-,%rdx]            v11 = load.i32 v2      ; bin: 8b 16
+    [-,%rdx]            v11 = load.i32 v2      ; bin: heap_oob 8b 16
    ; asm: movzwl (%rcx), %edi
-    [-,%rdi]            v12 = uload16.i32 v1   ; bin: 0f b7 39
+    [-,%rdi]            v12 = uload16.i32 v1   ; bin: heap_oob 0f b7 39
    ; asm: movzwl (%rsi), %edx
-    [-,%rdx]            v13 = uload16.i32 v2   ; bin: 0f b7 16
+    [-,%rdx]            v13 = uload16.i32 v2   ; bin: heap_oob 0f b7 16
    ; asm: movswl (%rcx), %edi
-    [-,%rdi]            v14 = sload16.i32 v1   ; bin: 0f bf 39
+    [-,%rdi]            v14 = sload16.i32 v1   ; bin: heap_oob 0f bf 39
    ; asm: movswl (%rsi), %edx
-    [-,%rdx]            v15 = sload16.i32 v2   ; bin: 0f bf 16
+    [-,%rdx]            v15 = sload16.i32 v2   ; bin: heap_oob 0f bf 16
    ; asm: movzbl (%rcx), %edi
-    [-,%rdi]            v16 = uload8.i32 v1    ; bin: 0f b6 39
+    [-,%rdi]            v16 = uload8.i32 v1    ; bin: heap_oob 0f b6 39
    ; asm: movzbl (%rsi), %edx
-    [-,%rdx]            v17 = uload8.i32 v2    ; bin: 0f b6 16
+    [-,%rdx]            v17 = uload8.i32 v2    ; bin: heap_oob 0f b6 16
    ; asm: movsbl (%rcx), %edi
-    [-,%rdi]            v18 = sload8.i32 v1    ; bin: 0f be 39
+    [-,%rdi]            v18 = sload8.i32 v1    ; bin: heap_oob 0f be 39
    ; asm: movsbl (%rsi), %edx
-    [-,%rdx]            v19 = sload8.i32 v2    ; bin: 0f be 16
+    [-,%rdx]            v19 = sload8.i32 v2    ; bin: heap_oob 0f be 16

    ; Register-indirect with 8-bit signed displacement.

    ; asm: movl 50(%rcx), %edi
-    [-,%rdi]            v20 = load.i32 v1+50           ; bin: 8b 79 32
+    [-,%rdi]            v20 = load.i32 v1+50           ; bin: heap_oob 8b 79 32
    ; asm: movl -50(%rsi), %edx
-    [-,%rdx]            v21 = load.i32 v2-50           ; bin: 8b 56 ce
+    [-,%rdx]            v21 = load.i32 v2-50           ; bin: heap_oob 8b 56 ce
    ; asm: movzwl 50(%rcx), %edi
-    [-,%rdi]            v22 = uload16.i32 v1+50        ; bin: 0f b7 79 32
+    [-,%rdi]            v22 = uload16.i32 v1+50        ; bin: heap_oob 0f b7 79 32
    ; asm: movzwl -50(%rsi), %edx
-    [-,%rdx]            v23 = uload16.i32 v2-50        ; bin: 0f b7 56 ce
+    [-,%rdx]            v23 = uload16.i32 v2-50        ; bin: heap_oob 0f b7 56 ce
    ; asm: movswl 50(%rcx), %edi
-    [-,%rdi]            v24 = sload16.i32 v1+50        ; bin: 0f bf 79 32
+    [-,%rdi]            v24 = sload16.i32 v1+50        ; bin: heap_oob 0f bf 79 32
    ; asm: movswl -50(%rsi), %edx
-    [-,%rdx]            v25 = sload16.i32 v2-50        ; bin: 0f bf 56 ce
+    [-,%rdx]            v25 = sload16.i32 v2-50        ; bin: heap_oob 0f bf 56 ce
    ; asm: movzbl 50(%rcx), %edi
-    [-,%rdi]            v26 = uload8.i32 v1+50         ; bin: 0f b6 79 32
+    [-,%rdi]            v26 = uload8.i32 v1+50         ; bin: heap_oob 0f b6 79 32
    ; asm: movzbl -50(%rsi), %edx
-    [-,%rdx]            v27 = uload8.i32 v2-50         ; bin: 0f b6 56 ce
+    [-,%rdx]            v27 = uload8.i32 v2-50         ; bin: heap_oob 0f b6 56 ce
    ; asm: movsbl 50(%rcx), %edi
-    [-,%rdi]            v28 = sload8.i32 v1+50         ; bin: 0f be 79 32
+    [-,%rdi]            v28 = sload8.i32 v1+50         ; bin: heap_oob 0f be 79 32
    ; asm: movsbl -50(%rsi), %edx
-    [-,%rdx]            v29 = sload8.i32 v2-50         ; bin: 0f be 56 ce
+    [-,%rdx]            v29 = sload8.i32 v2-50         ; bin: heap_oob 0f be 56 ce

    ; Register-indirect with 32-bit signed displacement.

    ; asm: movl 50000(%rcx), %edi
-    [-,%rdi]            v30 = load.i32 v1+50000           ; bin: 8b b9 0000c350
+    [-,%rdi]            v30 = load.i32 v1+50000           ; bin: heap_oob 8b b9 0000c350
    ; asm: movl -50000(%rsi), %edx
-    [-,%rdx]            v31 = load.i32 v2-50000           ; bin: 8b 96 ffff3cb0
+    [-,%rdx]            v31 = load.i32 v2-50000           ; bin: heap_oob 8b 96 ffff3cb0
    ; asm: movzwl 50000(%rcx), %edi
-    [-,%rdi]            v32 = uload16.i32 v1+50000        ; bin: 0f b7 b9 0000c350
+    [-,%rdi]            v32 = uload16.i32 v1+50000        ; bin: heap_oob 0f b7 b9 0000c350
    ; asm: movzwl -50000(%rsi), %edx
-    [-,%rdx]            v33 = uload16.i32 v2-50000        ; bin: 0f b7 96 ffff3cb0
+    [-,%rdx]            v33 = uload16.i32 v2-50000        ; bin: heap_oob 0f b7 96 ffff3cb0
    ; asm: movswl 50000(%rcx), %edi
-    [-,%rdi]            v34 = sload16.i32 v1+50000        ; bin: 0f bf b9 0000c350
+    [-,%rdi]            v34 = sload16.i32 v1+50000        ; bin: heap_oob 0f bf b9 0000c350
    ; asm: movswl -50000(%rsi), %edx
-    [-,%rdx]            v35 = sload16.i32 v2-50000        ; bin: 0f bf 96 ffff3cb0
+    [-,%rdx]            v35 = sload16.i32 v2-50000        ; bin: heap_oob 0f bf 96 ffff3cb0
    ; asm: movzbl 50000(%rcx), %edi
-    [-,%rdi]            v36 = uload8.i32 v1+50000         ; bin: 0f b6 b9 0000c350
+    [-,%rdi]            v36 = uload8.i32 v1+50000         ; bin: heap_oob 0f b6 b9 0000c350
    ; asm: movzbl -50000(%rsi), %edx
-    [-,%rdx]            v37 = uload8.i32 v2-50000         ; bin: 0f b6 96 ffff3cb0
+    [-,%rdx]            v37 = uload8.i32 v2-50000         ; bin: heap_oob 0f b6 96 ffff3cb0
    ; asm: movsbl 50000(%rcx), %edi
-    [-,%rdi]            v38 = sload8.i32 v1+50000         ; bin: 0f be b9 0000c350
+    [-,%rdi]            v38 = sload8.i32 v1+50000         ; bin: heap_oob 0f be b9 0000c350
    ; asm: movsbl -50000(%rsi), %edx
-    [-,%rdx]            v39 = sload8.i32 v2-50000         ; bin: 0f be 96 ffff3cb0
+    [-,%rdx]            v39 = sload8.i32 v2-50000         ; bin: heap_oob 0f be 96 ffff3cb0

    ; Integer Register-Register Operations.

@@ -903,17 +932,17 @@ ebb0:
    [-,%rax]      v160 = iconst.i32 1
    [-,%rdx]      v161 = iconst.i32 2
    ; asm: idivl %ecx
-    [-,%rax,%rdx] v162, v163 = x86_sdivmodx v160, v161, v1  ; bin: f7 f9
+    [-,%rax,%rdx] v162, v163 = x86_sdivmodx v160, v161, v1  ; bin: int_divz f7 f9
    ; asm: idivl %esi
-    [-,%rax,%rdx] v164, v165 = x86_sdivmodx v160, v161, v2  ; bin: f7 fe
+    [-,%rax,%rdx] v164, v165 = x86_sdivmodx v160, v161, v2  ; bin: int_divz f7 fe
    ; asm: idivl %r10d
-    [-,%rax,%rdx] v166, v167 = x86_sdivmodx v160, v161, v3  ; bin: 41 f7 fa
+    [-,%rax,%rdx] v166, v167 = x86_sdivmodx v160, v161, v3  ; bin: int_divz 41 f7 fa
    ; asm: divl %ecx
-    [-,%rax,%rdx] v168, v169 = x86_udivmodx v160, v161, v1  ; bin: f7 f1
+    [-,%rax,%rdx] v168, v169 = x86_udivmodx v160, v161, v1  ; bin: int_divz f7 f1
    ; asm: divl %esi
-    [-,%rax,%rdx] v170, v171 = x86_udivmodx v160, v161, v2  ; bin: f7 f6
+    [-,%rax,%rdx] v170, v171 = x86_udivmodx v160, v161, v2  ; bin: int_divz f7 f6
    ; asm: divl %r10d
-    [-,%rax,%rdx] v172, v173 = x86_udivmodx v160, v161, v3  ; bin: 41 f7 f2
+    [-,%rax,%rdx] v172, v173 = x86_udivmodx v160, v161, v3  ; bin: int_divz 41 f7 f2

    ; Bit-counting instructions.

@@ -1010,6 +1039,14 @@ ebb0:
    ; asm: setbe %dl
    [-,%rdx]            v319 = icmp ule v2, v3  ; bin: 44 39 d6 0f 96 c2

+    ; asm: cmpl $37, %ecx
+    ; asm: setl %bl
+    [-,%rbx]            v320 = icmp_imm slt v1, 37  ; bin: 83 f9 25 0f 9c c3
+
+    ; asm: cmpq $100000, %ecx
+    ; asm: setl %bl
+    [-,%rbx]            v321 = icmp_imm slt v1, 100000 ; bin: 81 f9 000186a0 0f 9c c3
+
    ; Bool-to-int conversions.

    ; asm: movzbl %bl, %ecx
@@ -1039,19 +1076,32 @@ ebb0:
    regfill v1, ss1 -> %rcx                     ; bin: 8b 8c 24 00000408

    ; asm: cmpl %esi, %ecx
-    [-,%eflags]         v520 = ifcmp v1, v2      ; bin: 39 f1
+    [-,%rflags]         v520 = ifcmp v1, v2      ; bin: 39 f1
    ; asm: cmpl %r10d, %esi
-    [-,%eflags]         v521 = ifcmp v2, v3      ; bin: 44 39 d6
+    [-,%rflags]         v521 = ifcmp v2, v3      ; bin: 44 39 d6

    ; asm: cmpl $-100, %ecx
-    [-,%eflags]         v522 = ifcmp_imm v1, -100   ; bin: 83 f9 9c
+    [-,%rflags]         v522 = ifcmp_imm v1, -100   ; bin: 83 f9 9c
    ; asm: cmpl $100, %r10d
-    [-,%eflags]         v523 = ifcmp_imm v3, 100    ; bin: 41 83 fa 64
+    [-,%rflags]         v523 = ifcmp_imm v3, 100    ; bin: 41 83 fa 64

    ; asm: cmpl $-10000, %ecx
-    [-,%eflags]         v524 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0
+    [-,%rflags]         v524 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0
    ; asm: cmpl $10000, %r10d
-    [-,%eflags]         v525 = ifcmp_imm v3, 10000  ; bin: 41 81 fa 00002710
+    [-,%rflags]         v525 = ifcmp_imm v3, 10000  ; bin: 41 81 fa 00002710
+
+    ; asm: shll $2, %esi
+    [-,%rsi]             v526 = ishl_imm v2, 2    ; bin: c1 e6 02
+    ; asm: shll $12, %r10d
+    [-,%r10]             v527 = ishl_imm v3, 12   ; bin: 41 c1 e2 0c
+    ; asm: sarl $5, %esi
+    [-,%rsi]             v529 = sshr_imm v2, 5    ; bin: c1 fe 05
+    ; asm: sarl $32, %r10d
+    [-,%r10]             v530 = sshr_imm v3, 32   ; bin: 41 c1 fa 20
+    ; asm: shrl $8, %esi
+    [-,%rsi]             v532 = ushr_imm v2, 8    ; bin: c1 ee 08
+    ; asm: shrl $31, %r10d
+    [-,%r10]             v533 = ushr_imm v3, 31   ; bin: 41 c1 ea 1f

    ; asm: testl %ecx, %ecx
    ; asm: je ebb1x
@@ -1082,6 +1132,7 @@ ebb1:
    ; asm: ebb2x:
 ebb2:
    jump ebb1                                   ; bin: eb fd
+
 }

 ; Tests for i32/i8 conversion instructions.
@@ -1109,7 +1160,7 @@ ebb0:
    ; asm: movzbl %r10b, %ecx
    [-,%rcx]            v32 = uextend.i32 v13           ; bin: 41 0f b6 ca

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }

 ; Tests for i32/i16 conversion instructions.
@@ -1137,7 +1188,7 @@ ebb0:
    ; asm: movzwl %r10w, %ecx
    [-,%rcx]            v32 = uextend.i32 v13           ; bin: 41 0f b7 ca

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }

 ; Tests for i64/i8 conversion instructions.
@@ -1165,7 +1216,7 @@ ebb0:
    ; asm: movzbl %r10b, %ecx
    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 41 0f b6 ca

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }

 ; Tests for i64/i16 conversion instructions.
@@ -1193,7 +1244,7 @@ ebb0:
    ; asm: movzwl %r10w, %ecx
    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 41 0f b7 ca

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }

 ; Tests for i64/i32 conversion instructions.
@@ -1221,5 +1272,5 @@ ebb0:
    ; asm: movl %r10d, %ecx
    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 44 89 d1

-    trap user0                                          ; bin: 0f 0b
+    trap user0                                          ; bin: user0 0f 0b
 }
--- a/cranelift/filetests/isa/intel/legalize-div-traps.cton
+++ b/cranelift/filetests/isa/intel/legalize-div-traps.cton
@@ -40,7 +40,7 @@ ebb0(v0: i64, v1: i64):
    ; nextln: brif eq $fm1, $(m1=$EBB)
    ; nextln: $(fz=$V) = ifcmp_imm v1, 0
    ; nextln: trapif eq $fz, int_divz
-    ; check: $(hi=$V) = sshr
+    ; check: $(hi=$V) = sshr_imm
    ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
    ; nextln: jump $(done=$EBB)($q)
    ; check: $m1:
@@ -60,7 +60,7 @@ ebb0(v0: i64, v1: i64):
    v2 = srem v0, v1
    ; nextln: $(fm1=$V) = ifcmp_imm v1, -1
    ; nextln: brif eq $fm1, $(m1=$EBB)
-    ; check: $(hi=$V) = sshr
+    ; check: $(hi=$V) = sshr_imm
    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
    ; nextln: jump $(done=$EBB)($r)
    ; check: $m1:
--- a/cranelift/filetests/isa/intel/legalize-div.cton
+++ b/cranelift/filetests/isa/intel/legalize-div.cton
@@ -32,7 +32,7 @@ function %sdiv(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    ; check: ebb0(
    v2 = sdiv v0, v1
-    ; check: $(hi=$V) = sshr
+    ; check: $(hi=$V) = sshr_imm
    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
    return v2
    ; nextln: return $d
@@ -46,7 +46,7 @@ ebb0(v0: i64, v1: i64):
    v2 = srem v0, v1
    ; nextln: $(fm1=$V) = ifcmp_imm v1, -1
    ; nextln: brif eq $fm1, $(m1=$EBB)
-    ; check: $(hi=$V) = sshr
+    ; check: $(hi=$V) = sshr_imm
    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
    ; nextln: jump $(done=$EBB)($r)
    ; check: $m1:
--- a/cranelift/filetests/isa/intel/legalize-libcall.cton
+++ b/cranelift/filetests/isa/intel/legalize-libcall.cton
@@ -9,7 +9,7 @@ ebb0(v0: f32):
    v1 = floor v0
    return v1
 }
-; check: function %floor(f32 [%xmm0]) -> f32 [%xmm0] native {
-; check: sig0 = (f32) -> f32 native
+; check: function %floor(f32 [%xmm0]) -> f32 [%xmm0] system_v {
+; check: sig0 = (f32) -> f32 system_v
 ; check: fn0 = sig0 %FloorF32
 ; check: v1 = call fn0(v0)
--- a/cranelift/filetests/isa/intel/legalize-memory.cton
+++ b/cranelift/filetests/isa/intel/legalize-memory.cton
@@ -23,7 +23,7 @@ function %deref(i64 vmctx) -> i64 {
 ebb1(v1: i64):
    v2 = global_addr.i64 gv2
    ; check: $(a1=$V) = iadd_imm v1, -16
-    ; check: $(p1=$V) = load.i64 $a1
+    ; check: $(p1=$V) = load.i64 notrap aligned $a1
    ; check: v2 = iadd_imm $p1, 32
    return v2
    ; check: return v2
@@ -55,7 +55,7 @@ ebb0(v0: i32, v999: i64):
    ; Checks here are assuming that no pipehole opts fold the load offsets.
    ; nextln: $(xoff=$V) = uextend.i64 v0
    ; nextln: $(haddr=$V) = iadd_imm v999, 64
-    ; nextln: $(hbase=$V) = load.i64 $haddr
+    ; nextln: $(hbase=$V) = load.i64 notrap aligned $haddr
    ; nextln: v1 = iadd $hbase, $xoff
    v2 = load.f32 v1+16
    ; nextln: v2 = load.f32 v1+16
@@ -103,7 +103,7 @@ ebb0(v0: i32, v999: i64):
    ; Checks here are assuming that no pipehole opts fold the load offsets.
    ; nextln: $(xoff=$V) = uextend.i64 v0
    ; nextln: $(haddr=$V) = iadd_imm.i64 v999, 64
-    ; nextln: $(hbase=$V) = load.i64 $haddr
+    ; nextln: $(hbase=$V) = load.i64 notrap aligned $haddr
    ; nextln: v1 = iadd $hbase, $xoff
    v2 = load.f32 v1+0x7fff_ffff
    ; nextln: v2 = load.f32 v1+0x7fff_ffff
--- a/cranelift/filetests/isa/intel/prologue-epilogue.cton
+++ b/cranelift/filetests/isa/intel/prologue-epilogue.cton
@@ -9,7 +9,7 @@ ebb0:
    return
 }

-; check: function %foo(i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] native {
+; check: function %foo(i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
 ; nextln:     ss0 = explicit_slot 168, offset -224
 ; nextln:     ss1 = incoming_arg 56, offset -56
 ; check: ebb0(v0: i64 [%rbp], v1: i64 [%rbx], v2: i64 [%r12], v3: i64 [%r13], v4: i64 [%r14], v5: i64 [%r15]):
--- a/cranelift/filetests/isa/riscv/abi-e.cton
+++ b/cranelift/filetests/isa/riscv/abi-e.cton
@@ -7,8 +7,8 @@ isa riscv enable_e
 function %f() {
    ; Spilling into the stack args after %x15 since %16 and up are not
    ; available in RV32E.
-    sig0 = (i64, i64, i64, i64) -> i64 native
-    ; check: sig0 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [0], i32 [4]) -> i32 [%x10], i32 [%x11] native
+    sig0 = (i64, i64, i64, i64) -> i64 system_v
+    ; check: sig0 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [0], i32 [4]) -> i32 [%x10], i32 [%x11] system_v
 ebb0:
    return
 }
--- a/cranelift/filetests/isa/riscv/abi.cton
+++ b/cranelift/filetests/isa/riscv/abi.cton
@@ -5,27 +5,27 @@ isa riscv
 ; regex: V=v\d+

 function %f() {
-    sig0 = (i32) -> i32 native
-    ; check: sig0 = (i32 [%x10]) -> i32 [%x10] native
+    sig0 = (i32) -> i32 system_v
+    ; check: sig0 = (i32 [%x10]) -> i32 [%x10] system_v

-    sig1 = (i64) -> b1 native
-    ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] native
+    sig1 = (i64) -> b1 system_v
+    ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v

    ; The i64 argument must go in an even-odd register pair.
-    sig2 = (f32, i64) -> f64 native
-    ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] native
+    sig2 = (f32, i64) -> f64 system_v
+    ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v

    ; Spilling into the stack args.
-    sig3 = (f64, f64, f64, f64, f64, f64, f64, i64) -> f64 native
-    ; check: sig3 = (f64 [%f10], f64 [%f11], f64 [%f12], f64 [%f13], f64 [%f14], f64 [%f15], f64 [%f16], i32 [0], i32 [4]) -> f64 [%f10] native
+    sig3 = (f64, f64, f64, f64, f64, f64, f64, i64) -> f64 system_v
+    ; check: sig3 = (f64 [%f10], f64 [%f11], f64 [%f12], f64 [%f13], f64 [%f14], f64 [%f15], f64 [%f16], i32 [0], i32 [4]) -> f64 [%f10] system_v

    ; Splitting vectors.
-    sig4 = (i32x4) native
-    ; check: sig4 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13]) native
+    sig4 = (i32x4) system_v
+    ; check: sig4 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13]) system_v

    ; Splitting vectors, then splitting ints.
-    sig5 = (i64x4) native
-    ; check: sig5 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [%x16], i32 [%x17]) native
+    sig5 = (i64x4) system_v
+    ; check: sig5 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [%x16], i32 [%x17]) system_v

 ebb0:
    return
--- a/cranelift/filetests/isa/riscv/legalize-abi.cton
+++ b/cranelift/filetests/isa/riscv/legalize-abi.cton
@@ -106,7 +106,7 @@ ebb0(v0: i64x4):
 }

 function %indirect(i32) {
-    sig1 = () native
+    sig1 = () system_v
 ebb0(v0: i32):
    call_indirect sig1, v0()
    return
@@ -114,7 +114,7 @@ ebb0(v0: i32):

 ; The first argument to call_indirect doesn't get altered.
 function %indirect_arg(i32, f32x2) {
-    sig1 = (f32x2) native
+    sig1 = (f32x2) system_v
 ebb0(v0: i32, v1: f32x2):
    call_indirect sig1, v0(v1)
    ; check: call_indirect sig1, v0($V, $V)
--- a/cranelift/filetests/isa/riscv/parse-encoding.cton
+++ b/cranelift/filetests/isa/riscv/parse-encoding.cton
@@ -3,32 +3,32 @@ test legalizer
 isa riscv

 function %parse_encoding(i32 [%x5]) -> i32 [%x10] {
-    ; check: function %parse_encoding(i32 [%x5], i32 link [%x1]) -> i32 [%x10], i32 link [%x1] native {
+    ; check: function %parse_encoding(i32 [%x5], i32 link [%x1]) -> i32 [%x10], i32 link [%x1] system_v {

-    sig0 = (i32 [%x10]) -> i32 [%x10] native
-    ; check: sig0 = (i32 [%x10]) -> i32 [%x10] native
+    sig0 = (i32 [%x10]) -> i32 [%x10] system_v
+    ; check: sig0 = (i32 [%x10]) -> i32 [%x10] system_v

-    sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] native
-    ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] native
+    sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v
+    ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v

-    sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] native
-    ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] native
+    sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v
+    ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v

    ; Arguments on stack where not necessary
-    sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] native
-    ; check: sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] native
+    sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] system_v
+    ; check: sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] system_v

    ; Stack argument before register argument
-    sig4 = (f32 [72], i32 [%x10]) native
-    ; check: sig4 = (f32 [72], i32 [%x10]) native
+    sig4 = (f32 [72], i32 [%x10]) system_v
+    ; check: sig4 = (f32 [72], i32 [%x10]) system_v

    ; Return value on stack
-    sig5 = () -> f32 [0] native
-    ; check: sig5 = () -> f32 [0] native
+    sig5 = () -> f32 [0] system_v
+    ; check: sig5 = () -> f32 [0] system_v

    ; function + signature
-    fn0 = function %bar(i32 [%x10]) -> b1 [%x10] native
-    ; check: sig6 = (i32 [%x10]) -> b1 [%x10] native
+    fn0 = function %bar(i32 [%x10]) -> b1 [%x10] system_v
+    ; check: sig6 = (i32 [%x10]) -> b1 [%x10] system_v
    ; nextln: fn0 = sig6 %bar

 ebb0(v0: i32):
--- a/cranelift/filetests/licm/complex.cton
+++ b/cranelift/filetests/licm/complex.cton
@@ -1,6 +1,6 @@
 test licm

-function %complex(i32) -> i32 native {
+function %complex(i32) -> i32 system_v {
 ebb0(v0: i32):
    jump ebb1(v0)

--- a/cranelift/filetests/licm/reject.cton
+++ b/cranelift/filetests/licm/reject.cton
@@ -0,0 +1,81 @@
+test licm
+
+function %other_side_effects(i32) -> i32 {
+
+ebb0(v0: i32):
+    jump ebb1(v0)
+
+ebb1(v1: i32):
+    regmove.i32 v0, %10 -> %20
+; check: ebb1(v1: i32):
+; check: regmove.i32 v0, %10 -> %20
+    v2 = iconst.i32 1
+    brz v1, ebb2(v1)
+    v5 = isub v1, v2
+    jump ebb1(v5)
+
+ebb2(v6: i32):
+    return v6
+
+}
+
+function %cpu_flags(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+    jump ebb1(v0, v1)
+
+ebb1(v2: i32, v3: i32):
+    v4 = ifcmp.i32 v0, v1
+    v5 = selectif.i32 eq v4, v2, v3
+; check: ebb1(v2: i32, v3: i32):
+; check: ifcmp.i32 v0, v1
+; check: v5 = selectif.i32 eq v4, v2, v3
+    v8 = iconst.i32 1
+    brz v1, ebb2(v1)
+    v9 = isub v1, v8
+    v10 = iadd v1, v8
+    jump ebb1(v9, v10)
+
+ebb2(v6: i32):
+    return v6
+}
+
+function %spill(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+    v2 = spill.i32 v0
+    jump ebb1(v0, v1)
+
+ebb1(v3: i32, v4: i32):
+    v5 = spill.i32 v1
+    v6 = fill.i32 v2
+    v7 = fill.i32 v5
+; check: ebb1(v3: i32, v4: i32):
+; check: v5 = spill.i32 v1
+; check: v6 = fill.i32 v2
+; check: v7 = fill v5
+    brz v1, ebb2(v1)
+    v9 = isub v1, v4
+    jump ebb1(v9, v3)
+
+ebb2(v10: i32):
+    return v10
+}
+
+function %non_invariant_aliases(i32) -> i32 {
+
+ebb0(v0: i32):
+    jump ebb1(v0)
+
+ebb1(v1: i32):
+    v8 -> v1
+    v9 -> v1
+    v2 = iadd v8, v9
+; check: ebb1(v1: i32):
+; check: v2 = iadd v8, v9
+    brz v1, ebb2(v1)
+    v5 = isub v1, v2
+    jump ebb1(v5)
+
+ebb2(v6: i32):
+    return v6
+
+}
--- a/cranelift/filetests/parser/branch.cton
+++ b/cranelift/filetests/parser/branch.cton
@@ -9,7 +9,7 @@ ebb0:
 ebb1:
    jump ebb0()
 }
-; sameln: function %minimal() native {
+; sameln: function %minimal() system_v {
 ; nextln: ebb0:
 ; nextln:     jump ebb1
 ; nextln: 
@@ -25,7 +25,7 @@ ebb0(v90: i32):
 ebb1(v91: i32):
    jump ebb0(v91)
 }
-; sameln: function %onearg(i32) native {
+; sameln: function %onearg(i32) system_v {
 ; nextln: ebb0(v90: i32):
 ; nextln:     jump ebb1(v90)
 ; nextln: 
@@ -41,7 +41,7 @@ ebb0(v90: i32, v91: f32):
 ebb1(v92: i32, v93: f32):
    jump ebb0(v92, v93)
 }
-; sameln: function %twoargs(i32, f32) native {
+; sameln: function %twoargs(i32, f32) system_v {
 ; nextln: ebb0(v90: i32, v91: f32):
 ; nextln:     jump ebb1(v90, v91)
 ; nextln: 
@@ -57,7 +57,7 @@ ebb0(v90: i32):
 ebb1:
    brnz v90, ebb1()
 }
-; sameln: function %minimal(i32) native {
+; sameln: function %minimal(i32) system_v {
 ; nextln: ebb0(v90: i32):
 ; nextln:     brz v90, ebb1
 ; nextln: 
@@ -72,7 +72,7 @@ ebb0(v90: i32, v91: f32):
 ebb1(v92: i32, v93: f32):
    brnz v90, ebb0(v92, v93)
 }
-; sameln: function %twoargs(i32, f32) native {
+; sameln: function %twoargs(i32, f32) system_v {
 ; nextln: ebb0(v90: i32, v91: f32):
 ; nextln:     brz v90, ebb1(v90, v91)
 ; nextln: 
@@ -94,7 +94,7 @@ ebb30:
 ebb40:
    trap user4
 }
-; sameln: function %jumptable(i32) native {
+; sameln: function %jumptable(i32) system_v {
 ; check:      jt2 = jump_table 0, 0, ebb10, ebb40, ebb20, ebb30
 ; check:      jt200 = jump_table 0
 ; check:  ebb10(v3: i32):
--- a/cranelift/filetests/parser/call.cton
+++ b/cranelift/filetests/parser/call.cton
@@ -5,7 +5,7 @@ function %mini() {
 ebb1:
    return
 }
-; sameln: function %mini() native {
+; sameln: function %mini() system_v {
 ; nextln: ebb1:
 ; nextln:     return
 ; nextln: }
@@ -29,10 +29,10 @@ function %signatures() {
    fn5 = sig11 %foo
    fn8 = function %bar(i32) -> b1
 }
-; sameln: function %signatures() native {
-; check:      sig10 = () native
+; sameln: function %signatures() system_v {
+; check:      sig10 = () system_v
 ; check:      sig11 = (i32, f64) -> i32, b1 spiderwasm
-; check:      sig12 = (i32) -> b1 native
+; check:      sig12 = (i32) -> b1 system_v
 ; not:        fn0
 ; check:      fn5 = sig11 %foo
 ; check:      fn8 = sig12 %bar
@@ -88,7 +88,7 @@ function %special1(i32 sret, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32
 ebb0(v1: i32, v2: i32, v3: i32, v4: i32):
    return v4, v2, v3, v1
 }
-; check: function %special1(i32 sret, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32 csr, i32 sret native {
+; check: function %special1(i32 sret, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32 csr, i32 sret system_v {
 ; check: ebb0(v1: i32, v2: i32, v3: i32, v4: i32):
 ; check:     return v4, v2, v3, v1
 ; check: }
--- a/cranelift/filetests/parser/instruction_encoding.cton
+++ b/cranelift/filetests/parser/instruction_encoding.cton
@@ -13,7 +13,7 @@ ebb1(v0: i32 [%x8], v1: i32):
@55 v9 = iadd v8, v7
@a5 [Iret#5] return v0, v8
 }
-; sameln: function %foo(i32, i32) native {
+; sameln: function %foo(i32, i32) system_v {
 ; nextln: ebb1(v0: i32 [%x8], v1: i32):
 ; nextln:     [-,-]$WS v2 = iadd v0, v1
 ; nextln:     [-]$WS trap heap_oob
--- a/cranelift/filetests/parser/keywords.cton
+++ b/cranelift/filetests/parser/keywords.cton
@@ -2,4 +2,4 @@ test cat

 ; 'function' is not a keyword, and can be used as the name of a function too.
 function %function() {}
-; check: function %function() native
+; check: function %function() system_v
--- a/cranelift/filetests/parser/rewrite.cton
+++ b/cranelift/filetests/parser/rewrite.cton
@@ -9,7 +9,7 @@ ebb100(v20: i32):
    v9200 = f64const 0x4.0p0
    trap user4
 }
-; sameln: function %defs() native {
+; sameln: function %defs() system_v {
 ; nextln: ebb100(v20: i32):
 ; nextln:     v1000 = iconst.i32x8 5
 ; nextln:     v9200 = f64const 0x1.0000000000000p2
@@ -23,7 +23,7 @@ ebb100(v20: i32):
    v200 = iadd v20, v1000
    jump ebb100(v1000)
 }
-; sameln: function %use_value() native {
+; sameln: function %use_value() system_v {
 ; nextln: ebb100(v20: i32):
 ; nextln:     v1000 = iadd_imm v20, 5
 ; nextln:     v200 = iadd v20, v1000
--- a/cranelift/filetests/parser/tiny.cton
+++ b/cranelift/filetests/parser/tiny.cton
@@ -5,7 +5,7 @@ function %minimal() {
 ebb0:
    trap user0
 }
-; sameln: function %minimal() native {
+; sameln: function %minimal() system_v {
 ; nextln: ebb0:
 ; nextln:     trap user0
 ; nextln: }
@@ -18,7 +18,7 @@ ebb0:
    v1 = iconst.i8 6
    v2 = ishl v0, v1
 }
-; sameln: function %ivalues() native {
+; sameln: function %ivalues() system_v {
 ; nextln: ebb0:
 ; nextln:     v0 = iconst.i32 2
 ; nextln:     v1 = iconst.i8 6
@@ -34,7 +34,7 @@ ebb0:
    v2 = bextend.b32 v1
    v3 = bxor v0, v2
 }
-; sameln: function %bvalues() native {
+; sameln: function %bvalues() system_v {
 ; nextln: ebb0:
 ; nextln:     v0 = bconst.b32 true
 ; nextln:     v1 = bconst.b8 false
@@ -47,17 +47,17 @@ function %select() {
 ebb0(v90: i32, v91: i32, v92: b1):
    v0 = select v92, v90, v91
 }
-; sameln: function %select() native {
+; sameln: function %select() system_v {
 ; nextln: ebb0(v90: i32, v91: i32, v92: b1):
 ; nextln:     v0 = select v92, v90, v91
 ; nextln: }

 ; Polymorphic instruction controlled by third operand.
-function %selectif() native {
+function %selectif() system_v {
 ebb0(v95: i32, v96: i32, v97: b1):
    v98 = selectif.i32 eq v97, v95, v96
 }
-; sameln: function %selectif() native {
+; sameln: function %selectif() system_v {
 ; nextln: ebb0(v95: i32, v96: i32, v97: b1):
 ; nextln: v98 = selectif.i32 eq v97, v95, v96
 ; nextln: }
@@ -69,7 +69,7 @@ ebb0:
    v1 = extractlane v0, 3
    v2 = insertlane v0, 1, v1
 }
-; sameln: function %lanes() native {
+; sameln: function %lanes() system_v {
 ; nextln: ebb0:
 ; nextln:     v0 = iconst.i32x4 2
 ; nextln:     v1 = extractlane v0, 3
@@ -85,7 +85,7 @@ ebb0(v90: i32, v91: i32):
    v3 = irsub_imm v91, 45
    br_icmp eq v90, v91, ebb0(v91, v90)
 }
-; sameln: function %icmp(i32, i32) native {
+; sameln: function %icmp(i32, i32) system_v {
 ; nextln: ebb0(v90: i32, v91: i32):
 ; nextln:     v0 = icmp eq v90, v91
 ; nextln:     v1 = icmp ult v90, v91
@@ -101,7 +101,7 @@ ebb0(v90: f32, v91: f32):
    v1 = fcmp uno v90, v91
    v2 = fcmp lt v90, v91
 }
-; sameln: function %fcmp(f32, f32) native {
+; sameln: function %fcmp(f32, f32) system_v {
 ; nextln: ebb0(v90: f32, v91: f32):
 ; nextln:     v0 = fcmp eq v90, v91
 ; nextln:     v1 = fcmp uno v90, v91
@@ -115,7 +115,7 @@ ebb0(v90: i32, v91: f32):
    v0 = bitcast.i8x4 v90
    v1 = bitcast.i32 v91
 }
-; sameln: function %bitcast(i32, f32) native {
+; sameln: function %bitcast(i32, f32) system_v {
 ; nextln: ebb0(v90: i32, v91: f32):
 ; nextln:     v0 = bitcast.i8x4 v90
 ; nextln:     v1 = bitcast.i32 v91
@@ -135,7 +135,7 @@ ebb0:
    stack_store v1, ss10+2
    stack_store v2, ss2
 }
-; sameln: function %stack() native {
+; sameln: function %stack() system_v {
 ; check:     ss2 = explicit_slot 4
 ; check:     ss3 = incoming_arg 4, offset 8
 ; check:     ss4 = outgoing_arg 4
@@ -162,7 +162,7 @@ ebb0(v1: i32):
    store aligned v3, v1+12
    store notrap aligned v3, v1-12
 }
-; sameln: function %memory(i32) native {
+; sameln: function %memory(i32) system_v {
 ; nextln: ebb0(v1: i32):
 ; nextln:     v2 = load.i64 v1
 ; nextln:     v3 = load.i64 aligned v1
@@ -187,7 +187,7 @@ ebb0(v1: i32):
    regfill v1, ss0 -> %10
    return
 }
-; sameln: function %diversion(i32) native {
+; sameln: function %diversion(i32) system_v {
 ; nextln:     ss0 = spill_slot 4
 ; check: ebb0(v1: i32):
 ; nextln:     regmove v1, %10 -> %20
@@ -204,7 +204,7 @@ ebb0:
    copy_special %20 -> %10
    return
 }
-; sameln: function %copy_special() native {
+; sameln: function %copy_special() system_v {
 ; nextln: ebb0:
 ; nextln:     copy_special %10 -> %20
 ; nextln:     copy_special %20 -> %10
--- a/cranelift/filetests/postopt/basic.cton
+++ b/cranelift/filetests/postopt/basic.cton
@@ -0,0 +1,100 @@
+test postopt
+isa intel
+
+; Test that compare+branch sequences are folded effectively on x86.
+
+function %br_icmp(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+[Op1icscc#39,%rdx]  v2 = icmp slt v0, v1
+[Op1t8jccd_long#85] brnz v2, ebb1
+[Op1ret#c3]         return v1
+
+ebb1:
+[Op1puid#b8,%rax]   v8 = iconst.i32 3
+[Op1ret#c3]         return v8
+}
+; sameln: function %br_icmp
+; nextln: ebb0(v0: i32, v1: i32):
+; nextln:    v9 = ifcmp v0, v1
+; nextln:    v2 = trueif slt v9
+; nextln:    brif slt v9, ebb1
+; nextln:    return v1
+; nextln: 
+; nextln: ebb1:
+; nextln:    v8 = iconst.i32 3
+; nextln:    return v8
+; nextln: }
+
+; Use brz instead of brnz, so the condition is inverted.
+
+function %br_icmp_inverse(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+[Op1icscc#39,%rdx]  v2 = icmp slt v0, v1
+[Op1t8jccd_long#84] brz v2, ebb1
+[Op1ret#c3]         return v1
+
+ebb1:
+[Op1puid#b8,%rax]   v8 = iconst.i32 3
+[Op1ret#c3]         return v8
+}
+; sameln: function %br_icmp_inverse
+; nextln: ebb0(v0: i32, v1: i32):
+; nextln:    v9 = ifcmp v0, v1
+; nextln:    v2 = trueif slt v9
+; nextln:    brif sge v9, ebb1
+; nextln:    return v1
+; nextln: 
+; nextln: ebb1:
+; nextln:    v8 = iconst.i32 3
+; nextln:    return v8
+; nextln: }
+
+; Use icmp_imm instead of icmp.
+
+function %br_icmp_imm(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+[Op1icsccib#7083]   v2 = icmp_imm slt v0, 2
+[Op1t8jccd_long#84] brz v2, ebb1
+[Op1ret#c3]         return v1
+
+ebb1:
+[Op1puid#b8,%rax]   v8 = iconst.i32 3
+[Op1ret#c3]         return v8
+}
+; sameln: function %br_icmp_imm
+; nextln: ebb0(v0: i32, v1: i32):
+; nextln:    v9 = ifcmp_imm v0, 2
+; nextln:    v2 = trueif slt v9
+; nextln:    brif sge v9, ebb1
+; nextln:    return v1
+; nextln: 
+; nextln: ebb1:
+; nextln:    v8 = iconst.i32 3
+; nextln:    return v8
+; nextln: }
+
+; Use fcmp instead of icmp.
+
+function %br_fcmp(f32, f32) -> f32 {
+ebb0(v0: f32, v1: f32):
+[Op2fcscc#42e,%rdx] v2 = fcmp gt v0, v1
+[Op1t8jccd_long#84] brz v2, ebb1
+[Op1ret#c3]         return v1
+
+ebb1:
+[Op1puid#b8,%rax]    v18 = iconst.i32 0x40a8_0000
+[Mp2frurm#56e,%xmm0] v8 = bitcast.f32 v18
+[Op1ret#c3]         return v8
+}
+; sameln: function %br_fcmp
+; nextln: ebb0(v0: f32, v1: f32):
+; nextln:    v19 = ffcmp v0, v1
+; nextln:    v2 = trueff gt v19
+; nextln:    brff ule v19, ebb1
+; nextln:    return v1
+; nextln: 
+; nextln: ebb1:
+; nextln:    v18 = iconst.i32 0x40a8_0000
+; nextln:    v8 = bitcast.f32 v18
+; nextln:    return v8
+; nextln: }
--- a/cranelift/filetests/preopt/simplify.cton
+++ b/cranelift/filetests/preopt/simplify.cton
@@ -0,0 +1,80 @@
+test preopt
+isa intel
+
+function %iadd_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = iadd v0, v1
+    return v2
+}
+; sameln: function %iadd_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, 2
+; nextln:     return v2
+; nextln: }
+
+function %isub_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = isub v0, v1
+    return v2
+}
+; sameln: function %isub_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, -2
+; nextln:     return v2
+; nextln: }
+
+function %icmp_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = icmp slt v0, v1
+    v3 = bint.i32 v2
+    return v3
+}
+; sameln: function %icmp_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = icmp_imm slt v0, 2
+; nextln:     v3 = bint.i32 v2
+; nextln:     return v3
+; nextln: }
+
+function %brz_bint(i32) {
+ebb0(v0: i32):
+    v3 = icmp_imm slt v0, 0
+    v1 = bint.i32 v3
+    v2 = select v1, v1, v1
+    trapz v1, user0
+    brz v1, ebb1
+    jump ebb2
+
+ebb1:
+    return
+
+ebb2:
+    return
+}
+; sameln: function %brz_bint
+; nextln: (v0: i32):
+; nextln:    v3 = icmp_imm slt v0, 0
+; nextln:    v1 = bint.i32 v3
+; nextln:    v2 = select v3, v1, v1
+; nextln:    trapz v3, user0
+; nextln:    brz v3, ebb1
+; nextln:    jump ebb2
+
+function %irsub_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = isub v1, v0
+    return v2
+}
+; sameln: function %irsub_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = irsub_imm v1, 2
+; nextln:     return v2
+; nextln: }
--- a/cranelift/filetests/regalloc/coalesce.cton
+++ b/cranelift/filetests/regalloc/coalesce.cton
@@ -109,7 +109,7 @@ ebb1(v10: i32):
    return v11
 }

-function %gvn_unremovable_phi(i32) native {
+function %gvn_unremovable_phi(i32) system_v {
 ebb0(v0: i32):
    v2 = iconst.i32 0
    jump ebb2(v2, v0)
--- a/cranelift/filetests/regalloc/coalescing-207.cton
+++ b/cranelift/filetests/regalloc/coalescing-207.cton
@@ -5,12 +5,12 @@ isa intel haswell
 ; Reported as https://github.com/Cretonne/cretonne/issues/207
 ;
 ; The coalescer creates a virtual register with two interfering values.
-function %pr207(i64 vmctx, i32, i32) -> i32 native {
+function %pr207(i64 vmctx, i32, i32) -> i32 system_v {
    gv0 = vmctx-8
    heap0 = static gv0, min 0, bound 0x5000, guard 0x0040_0000
-    sig0 = (i64 vmctx, i32, i32) -> i32 native
-    sig1 = (i64 vmctx, i32, i32, i32) -> i32 native
-    sig2 = (i64 vmctx, i32, i32, i32) -> i32 native
+    sig0 = (i64 vmctx, i32, i32) -> i32 system_v
+    sig1 = (i64 vmctx, i32, i32, i32) -> i32 system_v
+    sig2 = (i64 vmctx, i32, i32, i32) -> i32 system_v
    fn0 = sig0 u0:2
    fn1 = sig1 u0:0
    fn2 = sig2 u0:1
@@ -1034,10 +1034,10 @@ ebb92(v767: i32):
 }

 ; Same problem from musl.wasm.
-function %musl(f64 [%xmm0], i64 vmctx [%rdi]) -> f64 [%xmm0] native {
+function %musl(f64 [%xmm0], i64 vmctx [%rdi]) -> f64 [%xmm0] system_v {
    gv0 = vmctx
    heap0 = static gv0, min 0, bound 0x0001_0000_0000, guard 0x8000_0000
-    sig0 = (f64 [%xmm0], i32 [%rdi], i64 vmctx [%rsi]) -> f64 [%xmm0] native
+    sig0 = (f64 [%xmm0], i32 [%rdi], i64 vmctx [%rsi]) -> f64 [%xmm0] system_v
    fn0 = sig0 u0:517

 ebb0(v0: f64, v1: i64):
--- a/cranelift/filetests/regalloc/coalescing-216.cton
+++ b/cranelift/filetests/regalloc/coalescing-216.cton
@@ -5,7 +5,7 @@ isa intel haswell
 ; Reported as https://github.com/Cretonne/cretonne/issues/216 from the Binaryen fuzzer.
 ;
 ; The (old) coalescer creates a virtual register with two identical values.
-function %pr216(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] native {
+function %pr216(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v {
 ebb0(v0: i32, v1: i64):
    v3 = iconst.i64 0
    v5 = iconst.i32 0
--- a/cranelift/filetests/regalloc/coloring-227.cton
+++ b/cranelift/filetests/regalloc/coloring-227.cton
@@ -2,7 +2,7 @@ test regalloc
 set is_64bit
 isa intel haswell

-function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) native {
+function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) system_v {
    gv0 = vmctx
    heap0 = static gv0, min 0, bound 0x0001_0000_0000, guard 0x8000_0000

@@ -21,7 +21,7 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
@0011 [RexOp1puid#b8]               v9 = iconst.i32 0
@0015 [RexOp1puid#b8]               v11 = iconst.i32 0
@0017 [RexOp1icscc#39]              v12 = icmp.i32 eq v15, v11
-@0017 [RexOp2urm#4b6]               v13 = bint.i32 v12
+@0017 [RexOp2urm_noflags#4b6]       v13 = bint.i32 v12
@001a [RexOp1rr#21]                 v14 = band v9, v13
@001b [RexOp1tjccb#75]              brnz v14, ebb6
@001d [RexOp1jmpb#eb]               jump ebb7
--- a/cranelift/filetests/regalloc/ghost-param.cton
+++ b/cranelift/filetests/regalloc/ghost-param.cton
@@ -9,7 +9,7 @@ isa intel haswell
 ;
 ; Test case by binaryen fuzzer!

-function %pr215(i64 vmctx [%rdi]) native {
+function %pr215(i64 vmctx [%rdi]) system_v {
 ebb0(v0: i64):
    v10 = iconst.i64 0
    v1 = bitcast.f64 v10
--- a/cranelift/filetests/regalloc/global-fixed.cton
+++ b/cranelift/filetests/regalloc/global-fixed.cton
@@ -2,7 +2,7 @@ test regalloc
 set is_64bit=1
 isa intel haswell

-function %foo() native {
+function %foo() system_v {
 ebb4:
    v3 = iconst.i32 0
    jump ebb3
--- a/cranelift/filetests/regalloc/intel-regres.cton
+++ b/cranelift/filetests/regalloc/intel-regres.cton
@@ -11,7 +11,7 @@ isa intel
 ; This ended up confusong the constraint solver which had not made a record of
 ; the fixed register assignment for v9 since it was already in the correct
 ; register.
-function %pr147(i32) -> i32 native {
+function %pr147(i32) -> i32 system_v {
 ebb0(v0: i32):
    v1 = iconst.i32 0
    v2 = iconst.i32 1
--- a/cranelift/filetests/regalloc/output-interference.cton
+++ b/cranelift/filetests/regalloc/output-interference.cton
@@ -2,7 +2,7 @@ test regalloc
 set is_64bit=1
 isa intel haswell

-function %test(i64) -> i64 native {
+function %test(i64) -> i64 system_v {
 ebb0(v0: i64):
    v2 = iconst.i64 12
    ; This division clobbers two of its fixed input registers on Intel.
--- a/cranelift/filetests/regalloc/reload-208.cton
+++ b/cranelift/filetests/regalloc/reload-208.cton
@@ -11,11 +11,11 @@ isa intel haswell
 ;
 ; The problem was the reload pass rewriting EBB arguments on "brnz v9, ebb3(v9)"

-function %pr208(i64 vmctx [%rdi]) native {
+function %pr208(i64 vmctx [%rdi]) system_v {
    gv0 = vmctx-8
    heap0 = static gv0, min 0, bound 0x5000, guard 0x0040_0000
-    sig0 = (i64 vmctx [%rdi]) -> i32 [%rax] native
-    sig1 = (i64 vmctx [%rdi], i32 [%rsi]) native
+    sig0 = (i64 vmctx [%rdi]) -> i32 [%rax] system_v
+    sig1 = (i64 vmctx [%rdi], i32 [%rsi]) system_v
    fn0 = sig0 u0:1
    fn1 = sig1 u0:3

--- a/cranelift/filetests/regalloc/reload.cton
+++ b/cranelift/filetests/regalloc/reload.cton
@@ -5,7 +5,7 @@ isa riscv enable_e

 ; Check that we can handle a function return value that got spilled.
 function %spill_return() -> i32 {
-    fn0 = function %foo() -> i32 native
+    fn0 = function %foo() -> i32 system_v

 ebb0:
    v0 = call fn0()
--- a/cranelift/filetests/regalloc/schedule-moves.cton
+++ b/cranelift/filetests/regalloc/schedule-moves.cton
@@ -1,7 +1,7 @@
 test regalloc
 isa intel haswell

-function %pr165() native {
+function %pr165() system_v {
 ebb0:
    v0 = iconst.i32 0x0102_0304
    v1 = iconst.i32 0x1102_0304
@@ -19,7 +19,7 @@ ebb0:

 ; Same as above, but use so many registers that spilling is required.
 ; Note: This is also a candidate for using xchg instructions.
-function %emergency_spill() native {
+function %emergency_spill() system_v {
 ebb0:
    v0 = iconst.i32 0x0102_0304
    v1 = iconst.i32 0x1102_0304
--- a/cranelift/filetests/regalloc/spill-noregs.cton
+++ b/cranelift/filetests/regalloc/spill-noregs.cton
@@ -13,7 +13,7 @@ isa intel
 ;
 ; The spiller was not releasing register pressure for dead EBB parameters.

-function %pr223(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] native {
+function %pr223(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v {
 ebb0(v0: i32, v1: i64):
    v2 = iconst.i32 0
    v3 = iconst.i64 0
--- a/cranelift/filetests/regalloc/spill.cton
+++ b/cranelift/filetests/regalloc/spill.cton
@@ -93,7 +93,7 @@ ebb0(v0: i32):

 ; The same value used as indirect callee and argument.
 function %doubleuse_icall1(i32) {
-    sig0 = (i32) native
+    sig0 = (i32) system_v
 ebb0(v0: i32):
    ; not:copy
    call_indirect sig0, v0(v0)
@@ -102,7 +102,7 @@ ebb0(v0: i32):

 ; The same value used as indirect callee and two arguments.
 function %doubleuse_icall2(i32) {
-    sig0 = (i32, i32) native
+    sig0 = (i32, i32) system_v
 ebb0(v0: i32):
    ; check: $(c=$V) = copy v0
    call_indirect sig0, v0(v0, v0)
--- a/cranelift/filetests/verifier/defs_dominates_uses.cton
+++ b/cranelift/filetests/verifier/defs_dominates_uses.cton
@@ -0,0 +1,16 @@
+test verifier
+
+; Test verification that uses properly dominate defs.
+
+function %non_dominating(i32) -> i32 system_v {
+ebb0(v0: i32):
+    v1 = iadd.i32 v2, v0   ; error: uses value from non-dominating
+    v2 = iadd.i32 v1, v0
+    return v2
+}
+
+function %inst_uses_its_own_values(i32) -> i32 system_v {
+ebb0(v0: i32):
+    v1 = iadd.i32 v1, v0   ; error: uses value from itself
+    return v1
+}
--- a/cranelift/filetests/verifier/flags.cton
+++ b/cranelift/filetests/verifier/flags.cton
@@ -4,65 +4,65 @@ isa intel
 ; Simple, correct use of CPU flags.
 function %simple(i32) -> i32 {
                    ebb0(v0: i32):
-    [Op1rcmp#39]        v1 = ifcmp v0, v0
-    [Op2seti_abcd#490]  v2 = trueif ugt v1
-    [Op2urm_abcd#4b6]   v3 = bint.i32 v2
-    [Op1ret#c3]         return v3
+    [Op1rcmp#39]              v1 = ifcmp v0, v0
+    [Op2seti_abcd#490]        v2 = trueif ugt v1
+    [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
+    [Op1ret#c3]               return v3
 }

 ; Overlapping flag values of different types.
 function %overlap(i32, f32) -> i32 {
                    ebb0(v0: i32, v1: f32):
-    [Op1rcmp#39]        v2 = ifcmp v0, v0
-    [Op2fcmp#42e]       v3 = ffcmp v1, v1
-    [Op2setf_abcd#490]  v4 = trueff gt v3 ; error: conflicting live CPU flags: v2 and v3
-    [Op2seti_abcd#490]  v5 = trueif ugt v2
-    [Op1rr#21]          v6 = band v4, v5
-    [Op2urm_abcd#4b6]   v7 = bint.i32 v6
-    [Op1ret#c3]         return v7
+    [Op1rcmp#39]              v2 = ifcmp v0, v0
+    [Op2fcmp#42e]             v3 = ffcmp v1, v1
+    [Op2setf_abcd#490]        v4 = trueff gt v3 ; error: conflicting live CPU flags: v2 and v3
+    [Op2seti_abcd#490]        v5 = trueif ugt v2
+    [Op1rr#21]                v6 = band v4, v5
+    [Op2urm_noflags_abcd#4b6] v7 = bint.i32 v6
+    [Op1ret#c3]               return v7
 }

 ; CPU flags clobbered by arithmetic.
 function %clobbered(i32) -> i32 {
                    ebb0(v0: i32):
-    [Op1rcmp#39]        v1 = ifcmp v0, v0
-    [Op1rr#01]          v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1
-    [Op2seti_abcd#490]  v3 = trueif ugt v1
-    [Op2urm_abcd#4b6]   v4 = bint.i32 v3
-    [Op1ret#c3]         return v4
+    [Op1rcmp#39]              v1 = ifcmp v0, v0
+    [Op1rr#01]                v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1
+    [Op2seti_abcd#490]        v3 = trueif ugt v1
+    [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3
+    [Op1ret#c3]               return v4
 }

 ; CPU flags not clobbered by load.
 function %live_across_load(i32) -> i32 {
                    ebb0(v0: i32):
-    [Op1rcmp#39]        v1 = ifcmp v0, v0
-    [Op1ld#8b]          v2 = load.i32 v0
-    [Op2seti_abcd#490]  v3 = trueif ugt v1
-    [Op2urm_abcd#4b6]   v4 = bint.i32 v3
-    [Op1ret#c3]         return v4
+    [Op1rcmp#39]              v1 = ifcmp v0, v0
+    [Op1ld#8b]                v2 = load.i32 v0
+    [Op2seti_abcd#490]        v3 = trueif ugt v1
+    [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3
+    [Op1ret#c3]               return v4
 }

 ; Correct use of CPU flags across EBB.
 function %live_across_ebb(i32) -> i32 {
-                    ebb0(v0: i32):
-    [Op1rcmp#39]        v1 = ifcmp v0, v0
-    [Op1jmpb#eb]        jump ebb1
-                    ebb1:
-    [Op2seti_abcd#490]  v2 = trueif ugt v1
-    [Op2urm_abcd#4b6]   v3 = bint.i32 v2
-    [Op1ret#c3]         return v3
+                          ebb0(v0: i32):
+    [Op1rcmp#39]              v1 = ifcmp v0, v0
+    [Op1jmpb#eb]              jump ebb1
+                          ebb1:
+    [Op2seti_abcd#490]        v2 = trueif ugt v1
+    [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
+    [Op1ret#c3]               return v3
 }

 function %live_across_ebb_backwards(i32) -> i32 {
-                    ebb0(v0: i32):
-    [Op1jmpb#eb]        jump ebb2
-                    ebb1:
-    [Op2seti_abcd#490]  v2 = trueif ugt v1
-    [Op2urm_abcd#4b6]   v3 = bint.i32 v2
-    [Op1ret#c3]         return v3
-                    ebb2:
-    [Op1rcmp#39]        v1 = ifcmp v0, v0
-    [Op1jmpb#eb]        jump ebb1
+                          ebb0(v0: i32):
+    [Op1jmpb#eb]              jump ebb2
+                          ebb1:
+    [Op2seti_abcd#490]        v2 = trueif ugt v1
+    [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
+    [Op1ret#c3]               return v3
+                          ebb2:
+    [Op1rcmp#39]              v1 = ifcmp v0, v0
+    [Op1jmpb#eb]              jump ebb1
 }

 ; Flags live into loop.
@@ -73,4 +73,4 @@ function %live_into_loop(i32) -> i32 {
                    ebb1:
    [Op2seti_abcd#490]  v2 = trueif ugt v1
    [Op1jmpb#eb]        jump ebb1
-}
+}
--- a/cranelift/publish-all.sh
+++ b/cranelift/publish-all.sh
@@ -4,17 +4,13 @@ cd $(dirname "$0")
 topdir="$(pwd)"

 # All the cretonne-* crates have the same version number
-# The filecheck crate version is managed independently.
-version="0.3.4"
+version="0.4.1"

 # Update all of the Cargo.toml files.
 #
 # The main Cargo.toml in the top-level directory is the cretonne-tools crate which we don't publish.
 echo "Updating crate versions to $version"
 for crate in . lib/*; do
-    if [ "$crate" = "lib/filecheck" ]; then
-        continue
-    fi
    # Update the version number of this crate to $version.
    sed -i.bk -e "s/^version = .*/version = \"$version\"/" "$crate/Cargo.toml"
    # Update the required version number of any cretonne* dependencies.
@@ -31,7 +27,7 @@ cargo update

 echo git commit -a -m "\"Bump version to $version"\"
 echo git push
-for crate in filecheck cretonne frontend native reader wasm; do
+for crate in cretonne frontend native reader wasm; do
    echo cargo publish --manifest-path "lib/$crate/Cargo.toml"
 done
 echo
--- a/cranelift/src/cat.rs
+++ b/cranelift/src/cat.rs
@@ -1,16 +1,13 @@
 //! The `cat` sub-command.
 //!
-//! Read a sequence of Cretonne IL files and print them again to stdout. This has the effect of
+//! Read a sequence of Cretonne IR files and print them again to stdout. This has the effect of
 //! normalizing formatting and removing comments.

-use std::borrow::Cow;
-use cretonne::ir::Function;
-use cton_reader::{parse_functions, TestCommand};
 use CommandResult;
+use cton_reader::parse_functions;
 use utils::read_to_string;
-use filetest::subtest::{self, SubTest, Context, Result as STResult};

-pub fn run(files: Vec<String>) -> CommandResult {
+pub fn run(files: &[String]) -> CommandResult {
    for (i, f) in files.into_iter().enumerate() {
        if i != 0 {
            println!();
@@ -20,7 +17,7 @@ pub fn run(files: Vec<String>) -> CommandResult {
    Ok(())
 }

-fn cat_one(filename: String) -> CommandResult {
+fn cat_one(filename: &str) -> CommandResult {
    let buffer = read_to_string(&filename).map_err(
        |e| format!("{}: {}", filename, e),
    )?;
@@ -37,34 +34,3 @@ fn cat_one(filename: String) -> CommandResult {

    Ok(())
 }
-
-/// Object implementing the `test cat` sub-test.
-///
-/// This command is used for testing the parser and function printer. It simply parses a function
-/// and prints it out again.
-///
-/// The result is verified by filecheck.
-struct TestCat;
-
-pub fn subtest(parsed: &TestCommand) -> STResult<Box<SubTest>> {
-    assert_eq!(parsed.command, "cat");
-    if !parsed.options.is_empty() {
-        Err(format!("No options allowed on {}", parsed))
-    } else {
-        Ok(Box::new(TestCat))
-    }
-}
-
-impl SubTest for TestCat {
-    fn name(&self) -> Cow<str> {
-        Cow::from("cat")
-    }
-
-    fn needs_verifier(&self) -> bool {
-        false
-    }
-
-    fn run(&self, func: Cow<Function>, context: &Context) -> STResult<()> {
-        subtest::run_filecheck(&func.display(context.isa).to_string(), context)
-    }
-}
--- a/cranelift/src/compile.rs
+++ b/cranelift/src/compile.rs
@@ -1,14 +1,13 @@
-//! CLI tool to compile cretonne IL into native code.
-//!
-//! Reads IR files into Cretonne IL and compiles it.
+//! CLI tool to read Cretonne IR files and compile them into native code.

-use cton_reader::parse_test;
-use std::path::PathBuf;
 use cretonne::Context;
+use cretonne::print_errors::pretty_error;
 use cretonne::settings::FlagsOrIsa;
 use cretonne::{binemit, ir};
+use cton_reader::parse_test;
 use std::path::Path;
-use utils::{pretty_error, read_to_string, parse_sets_and_isa};
+use std::path::PathBuf;
+use utils::{parse_sets_and_isa, read_to_string};

 struct PrintRelocs {
    flag_print: bool,
@@ -45,26 +44,38 @@ impl binemit::RelocSink for PrintRelocs {
    }
 }

+struct PrintTraps {
+    flag_print: bool,
+}
+
+impl binemit::TrapSink for PrintTraps {
+    fn trap(&mut self, offset: binemit::CodeOffset, _srcloc: ir::SourceLoc, code: ir::TrapCode) {
+        if self.flag_print {
+            println!("trap: {} at {}", code, offset);
+        }
+    }
+}
+
 pub fn run(
    files: Vec<String>,
    flag_print: bool,
-    flag_set: Vec<String>,
-    flag_isa: String,
+    flag_set: &[String],
+    flag_isa: &str,
 ) -> Result<(), String> {
    let parsed = parse_sets_and_isa(flag_set, flag_isa)?;

    for filename in files {
        let path = Path::new(&filename);
        let name = String::from(path.as_os_str().to_string_lossy());
-        handle_module(flag_print, path.to_path_buf(), name, parsed.as_fisa())?;
+        handle_module(flag_print, &path.to_path_buf(), &name, parsed.as_fisa())?;
    }
    Ok(())
 }

 fn handle_module(
    flag_print: bool,
-    path: PathBuf,
-    name: String,
+    path: &PathBuf,
+    name: &str,
    fisa: FlagsOrIsa,
 ) -> Result<(), String> {
    let buffer = read_to_string(&path).map_err(
@@ -95,8 +106,9 @@ fn handle_module(
        // Encode the result as machine code.
        let mut mem = Vec::new();
        let mut relocs = PrintRelocs { flag_print };
+        let mut traps = PrintTraps { flag_print };
        mem.resize(size as usize, 0);
-        context.emit_to_memory(mem.as_mut_ptr(), &mut relocs, &*isa);
+        context.emit_to_memory(mem.as_mut_ptr(), &mut relocs, &mut traps, &*isa);

        if flag_print {
            print!(".byte ");
--- a/cranelift/src/cton-util.rs
+++ b/cranelift/src/cton-util.rs
@@ -1,27 +1,25 @@
-#[macro_use(dbg)]
 extern crate cretonne;
+extern crate cton_filetests;
 extern crate cton_reader;
 extern crate cton_wasm;
 extern crate docopt;
+extern crate filecheck;
 #[macro_use]
 extern crate serde_derive;
-extern crate filecheck;
-extern crate num_cpus;
 extern crate tempdir;
 extern crate term;

-use cretonne::{VERSION, timing};
+use cretonne::{timing, VERSION};
 use docopt::Docopt;
 use std::io::{self, Write};
 use std::process;

-mod utils;
-mod filetest;
 mod cat;
+mod compile;
 mod print_cfg;
 mod rsfilecheck;
+mod utils;
 mod wasm;
-mod compile;

 const USAGE: &str = "
 Cretonne code generator utility
@@ -40,12 +38,12 @@ Options:
    -T, --time-passes
                    print pass timing report
    -t, --just-decode
-                    just decode WebAssembly to Cretonne IL
+                    just decode WebAssembly to Cretonne IR
    -s, --print-size
                    prints generated code size
    -c, --check-translation
-                    just checks the correctness of Cretonne IL translated from WebAssembly
-    -p, --print     print the resulting Cretonne IL
+                    just checks the correctness of Cretonne IR translated from WebAssembly
+    -p, --print     print the resulting Cretonne IR
    -h, --help      print this help message
    --set=<set>     configure Cretonne settings
    --isa=<isa>     specify the Cretonne ISA
@@ -88,15 +86,20 @@ fn cton_util() -> CommandResult {

    // Find the sub-command to execute.
    let result = if args.cmd_test {
-        filetest::run(args.flag_verbose, args.arg_file)
+        cton_filetests::run(args.flag_verbose, &args.arg_file).map(|_time| ())
    } else if args.cmd_cat {
-        cat::run(args.arg_file)
+        cat::run(&args.arg_file)
    } else if args.cmd_filecheck {
-        rsfilecheck::run(args.arg_file, args.flag_verbose)
+        rsfilecheck::run(&args.arg_file, args.flag_verbose)
    } else if args.cmd_print_cfg {
-        print_cfg::run(args.arg_file)
+        print_cfg::run(&args.arg_file)
    } else if args.cmd_compile {
-        compile::run(args.arg_file, args.flag_print, args.flag_set, args.flag_isa)
+        compile::run(
+            args.arg_file,
+            args.flag_print,
+            &args.flag_set,
+            &args.flag_isa,
+        )
    } else if args.cmd_wasm {
        wasm::run(
            args.arg_file,
@@ -104,8 +107,8 @@ fn cton_util() -> CommandResult {
            args.flag_just_decode,
            args.flag_check_translation,
            args.flag_print,
-            args.flag_set,
-            args.flag_isa,
+            &args.flag_set,
+            &args.flag_isa,
            args.flag_print_size,
        )
    } else {
--- a/cranelift/src/filetest/binemit.rs
+++ b/cranelift/src/filetest/binemit.rs
@@ -1,305 +0,0 @@
-//! Test command for testing the binary machine code emission.
-//!
-//! The `binemit` test command generates binary machine code for every instruction in the input
-//! functions and compares the results to the expected output.
-
-use std::borrow::Cow;
-use std::collections::HashMap;
-use std::fmt::Write;
-use cretonne::binemit;
-use cretonne::dbg::DisplayList;
-use cretonne::ir;
-use cretonne::ir::entities::AnyEntity;
-use cretonne::binemit::RegDiversions;
-use cton_reader::TestCommand;
-use filetest::subtest::{SubTest, Context, Result};
-use utils::{match_directive, pretty_error};
-
-struct TestBinEmit;
-
-pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
-    assert_eq!(parsed.command, "binemit");
-    if !parsed.options.is_empty() {
-        Err(format!("No options allowed on {}", parsed))
-    } else {
-        Ok(Box::new(TestBinEmit))
-    }
-}
-
-// Code sink that generates text.
-struct TextSink {
-    offset: binemit::CodeOffset,
-    text: String,
-}
-
-impl TextSink {
-    /// Create a new empty TextSink.
-    pub fn new() -> Self {
-        Self {
-            offset: 0,
-            text: String::new(),
-        }
-    }
-}
-
-
-
-impl binemit::CodeSink for TextSink {
-    fn offset(&self) -> binemit::CodeOffset {
-        self.offset
-    }
-
-    fn put1(&mut self, x: u8) {
-        write!(self.text, "{:02x} ", x).unwrap();
-        self.offset += 1;
-    }
-
-    fn put2(&mut self, x: u16) {
-        write!(self.text, "{:04x} ", x).unwrap();
-        self.offset += 2;
-    }
-
-    fn put4(&mut self, x: u32) {
-        write!(self.text, "{:08x} ", x).unwrap();
-        self.offset += 4;
-    }
-
-    fn put8(&mut self, x: u64) {
-        write!(self.text, "{:016x} ", x).unwrap();
-        self.offset += 8;
-    }
-
-    fn reloc_ebb(&mut self, reloc: binemit::Reloc, ebb_offset: binemit::CodeOffset) {
-        write!(self.text, "{}({}) ", reloc, ebb_offset).unwrap();
-    }
-
-    fn reloc_external(
-        &mut self,
-        reloc: binemit::Reloc,
-        name: &ir::ExternalName,
-        addend: binemit::Addend,
-    ) {
-        write!(
-            self.text,
-            "{}({}",
-            reloc,
-            name,
-        ).unwrap();
-        if addend != 0 {
-            write!(
-                self.text,
-                "{:+}",
-                addend,
-            ).unwrap();
-        }
-        write!(
-            self.text,
-            ") ",
-        ).unwrap();
-    }
-
-    fn reloc_jt(&mut self, reloc: binemit::Reloc, jt: ir::JumpTable) {
-        write!(self.text, "{}({}) ", reloc, jt).unwrap();
-    }
-}
-
-impl SubTest for TestBinEmit {
-    fn name(&self) -> Cow<str> {
-        Cow::from("binemit")
-    }
-
-    fn is_mutating(&self) -> bool {
-        true
-    }
-
-    fn needs_isa(&self) -> bool {
-        true
-    }
-
-    fn run(&self, func: Cow<ir::Function>, context: &Context) -> Result<()> {
-        let isa = context.isa.expect("binemit needs an ISA");
-        let encinfo = isa.encoding_info();
-        // TODO: Run a verifier pass over the code first to detect any bad encodings or missing/bad
-        // value locations. The current error reporting is just crashing...
-        let mut func = func.into_owned();
-
-        // Fix the stack frame layout so we can test spill/fill encodings.
-        let min_offset = func.stack_slots
-            .keys()
-            .map(|ss| func.stack_slots[ss].offset.unwrap())
-            .min();
-        func.stack_slots.frame_size = min_offset.map(|off| (-off) as u32);
-
-        let is_compressed = isa.flags().is_compressed();
-
-        // Give an encoding to any instruction that doesn't already have one.
-        let mut divert = RegDiversions::new();
-        for ebb in func.layout.ebbs() {
-            divert.clear();
-            for inst in func.layout.ebb_insts(ebb) {
-                if !func.encodings[inst].is_legal() {
-                    // Find an encoding that satisfies both immediate field and register
-                    // constraints.
-                    if let Some(enc) = {
-                        let mut legal_encodings = isa.legal_encodings(
-                            &func.dfg,
-                            &func.dfg[inst],
-                            func.dfg.ctrl_typevar(inst),
-                        ).filter(|e| {
-                                let recipe_constraints = &encinfo.constraints[e.recipe()];
-                                recipe_constraints.satisfied(inst, &divert, &func)
-                            });
-
-                        if is_compressed {
-                            // Get the smallest legal encoding
-                            legal_encodings.min_by_key(|&e| encinfo.bytes(e))
-                        } else {
-                            // If not using compressed, just use the first encoding.
-                            legal_encodings.next()
-                        }
-                    }
-                    {
-                        func.encodings[inst] = enc;
-                    }
-                }
-                divert.apply(&func.dfg[inst]);
-            }
-        }
-
-        // Relax branches and compute EBB offsets based on the encodings.
-        let code_size = binemit::relax_branches(&mut func, isa).map_err(|e| {
-            pretty_error(&func, context.isa, e)
-        })?;
-
-        // Collect all of the 'bin:' directives on instructions.
-        let mut bins = HashMap::new();
-        for comment in &context.details.comments {
-            if let Some(want) = match_directive(comment.text, "bin:") {
-                match comment.entity {
-                    AnyEntity::Inst(inst) => {
-                        if let Some(prev) = bins.insert(inst, want) {
-                            return Err(format!(
-                                "multiple 'bin:' directives on {}: '{}' and '{}'",
-                                func.dfg.display_inst(inst, isa),
-                                prev,
-                                want
-                            ));
-                        }
-                    }
-                    _ => {
-                        return Err(format!(
-                            "'bin:' directive on non-inst {}: {}",
-                            comment.entity,
-                            comment.text
-                        ))
-                    }
-                }
-            }
-        }
-        if bins.is_empty() {
-            return Err("No 'bin:' directives found".to_string());
-        }
-
-        // Now emit all instructions.
-        let mut sink = TextSink::new();
-        for ebb in func.layout.ebbs() {
-            divert.clear();
-            // Correct header offsets should have been computed by `relax_branches()`.
-            assert_eq!(
-                sink.offset,
-                func.offsets[ebb],
-                "Inconsistent {} header offset",
-                ebb
-            );
-            for (offset, inst, enc_bytes) in func.inst_offsets(ebb, &encinfo) {
-                assert_eq!(sink.offset, offset);
-                sink.text.clear();
-                let enc = func.encodings[inst];
-
-                // Send legal encodings into the emitter.
-                if enc.is_legal() {
-                    // Generate a better error message if output locations are not specified.
-                    if let Some(&v) = func.dfg.inst_results(inst).iter().find(|&&v| {
-                        !func.locations[v].is_assigned()
-                    })
-                    {
-                        return Err(format!(
-                            "Missing register/stack slot for {} in {}",
-                            v,
-                            func.dfg.display_inst(inst, isa)
-                        ));
-                    }
-                    let before = sink.offset;
-                    isa.emit_inst(&func, inst, &mut divert, &mut sink);
-                    let emitted = sink.offset - before;
-                    // Verify the encoding recipe sizes against the ISAs emit_inst implementation.
-                    assert_eq!(
-                        emitted,
-                        enc_bytes,
-                        "Inconsistent size for [{}] {}",
-                        encinfo.display(enc),
-                        func.dfg.display_inst(inst, isa)
-                    );
-                }
-
-                // Check against bin: directives.
-                if let Some(want) = bins.remove(&inst) {
-                    if !enc.is_legal() {
-                        // A possible cause of an unencoded instruction is a missing location for
-                        // one of the input operands.
-                        if let Some(&v) = func.dfg.inst_args(inst).iter().find(|&&v| {
-                            !func.locations[v].is_assigned()
-                        })
-                        {
-                            return Err(format!(
-                                "Missing register/stack slot for {} in {}",
-                                v,
-                                func.dfg.display_inst(inst, isa)
-                            ));
-                        }
-
-                        // Do any encodings exist?
-                        let encodings = isa.legal_encodings(
-                            &func.dfg,
-                            &func.dfg[inst],
-                            func.dfg.ctrl_typevar(inst),
-                        ).map(|e| encinfo.display(e))
-                            .collect::<Vec<_>>();
-
-                        if encodings.is_empty() {
-                            return Err(format!(
-                                "No encodings found for: {}",
-                                func.dfg.display_inst(inst, isa)
-                            ));
-                        }
-                        return Err(format!(
-                                "No matching encodings for {} in {}",
-                                func.dfg.display_inst(inst, isa),
-                                DisplayList(&encodings),
-                            ));
-                    }
-                    let have = sink.text.trim();
-                    if have != want {
-                        return Err(format!(
-                            "Bad machine code for {}: {}\nWant: {}\nGot:  {}",
-                            inst,
-                            func.dfg.display_inst(inst, isa),
-                            want,
-                            have
-                        ));
-                    }
-                }
-            }
-        }
-
-        if sink.offset != code_size {
-            return Err(format!(
-                "Expected code size {}, got {}",
-                code_size,
-                sink.offset
-            ));
-        }
-
-        Ok(())
-    }
-}
--- a/cranelift/src/filetest/compile.rs
+++ b/cranelift/src/filetest/compile.rs
@@ -1,114 +0,0 @@
-//! Test command for testing the code generator pipeline
-//!
-//! The `compile` test command runs each function through the full code generator pipeline
-
-use cretonne::binemit;
-use cretonne::ir;
-use cretonne;
-use cton_reader::TestCommand;
-use filetest::subtest::{SubTest, Context, Result, run_filecheck};
-use std::borrow::Cow;
-use std::fmt::Write;
-use utils::pretty_error;
-
-struct TestCompile;
-
-pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
-    assert_eq!(parsed.command, "compile");
-    if !parsed.options.is_empty() {
-        Err(format!("No options allowed on {}", parsed))
-    } else {
-        Ok(Box::new(TestCompile))
-    }
-}
-
-impl SubTest for TestCompile {
-    fn name(&self) -> Cow<str> {
-        Cow::from("compile")
-    }
-
-    fn is_mutating(&self) -> bool {
-        true
-    }
-
-    fn needs_isa(&self) -> bool {
-        true
-    }
-
-    fn run(&self, func: Cow<ir::Function>, context: &Context) -> Result<()> {
-        let isa = context.isa.expect("compile needs an ISA");
-
-        // Create a compilation context, and drop in the function.
-        let mut comp_ctx = cretonne::Context::new();
-        comp_ctx.func = func.into_owned();
-
-        let code_size = comp_ctx.compile(isa).map_err(|e| {
-            pretty_error(&comp_ctx.func, context.isa, e)
-        })?;
-
-        dbg!(
-            "Generated {} bytes of code:\n{}",
-            code_size,
-            comp_ctx.func.display(isa)
-        );
-
-        // Verify that the returned code size matches the emitted bytes.
-        let mut sink = SizeSink { offset: 0 };
-        binemit::emit_function(
-            &comp_ctx.func,
-            |func, inst, div, sink| isa.emit_inst(func, inst, div, sink),
-            &mut sink,
-        );
-
-        if sink.offset != code_size {
-            return Err(format!(
-                "Expected code size {}, got {}",
-                code_size,
-                sink.offset
-            ));
-        }
-
-        // Run final code through filecheck.
-        let mut text = String::new();
-        write!(&mut text, "{}", &comp_ctx.func.display(Some(isa)))
-            .map_err(|e| e.to_string())?;
-        run_filecheck(&text, context)
-    }
-}
-
-// Code sink that simply counts bytes.
-struct SizeSink {
-    offset: binemit::CodeOffset,
-}
-
-impl binemit::CodeSink for SizeSink {
-    fn offset(&self) -> binemit::CodeOffset {
-        self.offset
-    }
-
-    fn put1(&mut self, _: u8) {
-        self.offset += 1;
-    }
-
-    fn put2(&mut self, _: u16) {
-        self.offset += 2;
-    }
-
-    fn put4(&mut self, _: u32) {
-        self.offset += 4;
-    }
-
-    fn put8(&mut self, _: u64) {
-        self.offset += 8;
-    }
-
-    fn reloc_ebb(&mut self, _reloc: binemit::Reloc, _ebb_offset: binemit::CodeOffset) {}
-    fn reloc_external(
-        &mut self,
-        _reloc: binemit::Reloc,
-        _name: &ir::ExternalName,
-        _addend: binemit::Addend,
-    ) {
-    }
-    fn reloc_jt(&mut self, _reloc: binemit::Reloc, _jt: ir::JumpTable) {}
-}
--- a/cranelift/src/filetest/concurrent.rs
+++ b/cranelift/src/filetest/concurrent.rs
@@ -1,154 +0,0 @@
-//! Run tests concurrently.
-//!
-//! This module provides the `ConcurrentRunner` struct which uses a pool of threads to run tests
-//! concurrently.
-
-use cretonne::timing;
-use std::panic::catch_unwind;
-use std::path::{Path, PathBuf};
-use std::sync::mpsc::{channel, Sender, Receiver};
-use std::sync::{Arc, Mutex};
-use std::thread;
-use std::time::Duration;
-use num_cpus;
-use filetest::{TestResult, runone};
-
-// Request sent to worker threads contains jobid and path.
-struct Request(usize, PathBuf);
-
-/// Reply from worker thread,
-pub enum Reply {
-    Starting { jobid: usize, thread_num: usize },
-    Done { jobid: usize, result: TestResult },
-    Tick,
-}
-
-/// Manage threads that run test jobs concurrently.
-pub struct ConcurrentRunner {
-    // Channel for sending requests to the worker threads.
-    // The workers are sharing the receiver with an `Arc<Mutex<Receiver>>`.
-    // This is `None` when shutting down.
-    request_tx: Option<Sender<Request>>,
-
-    // Channel for receiving replies from the workers.
-    // Workers have their own `Sender`.
-    reply_rx: Receiver<Reply>,
-
-    handles: Vec<thread::JoinHandle<timing::PassTimes>>,
-}
-
-impl ConcurrentRunner {
-    /// Create a new `ConcurrentRunner` with threads spun up.
-    pub fn new() -> Self {
-        let (request_tx, request_rx) = channel();
-        let request_mutex = Arc::new(Mutex::new(request_rx));
-        let (reply_tx, reply_rx) = channel();
-
-        heartbeat_thread(reply_tx.clone());
-
-        let handles = (0..num_cpus::get())
-            .map(|num| {
-                worker_thread(num, request_mutex.clone(), reply_tx.clone())
-            })
-            .collect();
-
-        Self {
-            request_tx: Some(request_tx),
-            reply_rx,
-            handles,
-        }
-    }
-
-    /// Shut down worker threads orderly. They will finish any queued jobs first.
-    pub fn shutdown(&mut self) {
-        self.request_tx = None;
-    }
-
-    /// Join all the worker threads.
-    /// Transfer pass timings from the worker threads to the current thread.
-    pub fn join(&mut self) {
-        assert!(self.request_tx.is_none(), "must shutdown before join");
-        for h in self.handles.drain(..) {
-            match h.join() {
-                Ok(t) => timing::add_to_current(t),
-                Err(e) => println!("worker panicked: {:?}", e),
-            }
-        }
-    }
-
-    /// Add a new job to the queues.
-    pub fn put(&mut self, jobid: usize, path: &Path) {
-        self.request_tx
-            .as_ref()
-            .expect("cannot push after shutdown")
-            .send(Request(jobid, path.to_owned()))
-            .expect("all the worker threads are gone");
-    }
-
-    /// Get a job reply without blocking.
-    pub fn try_get(&mut self) -> Option<Reply> {
-        self.reply_rx.try_recv().ok()
-    }
-
-    /// Get a job reply, blocking until one is available.
-    pub fn get(&mut self) -> Option<Reply> {
-        self.reply_rx.recv().ok()
-    }
-}
-
-/// Spawn a heartbeat thread which sends ticks down the reply channel every second.
-/// This lets us implement timeouts without the not yet stable `recv_timeout`.
-fn heartbeat_thread(replies: Sender<Reply>) -> thread::JoinHandle<()> {
-    thread::Builder::new()
-        .name("heartbeat".to_string())
-        .spawn(move || while replies.send(Reply::Tick).is_ok() {
-            thread::sleep(Duration::from_secs(1));
-        })
-        .unwrap()
-}
-
-/// Spawn a worker thread running tests.
-fn worker_thread(
-    thread_num: usize,
-    requests: Arc<Mutex<Receiver<Request>>>,
-    replies: Sender<Reply>,
-) -> thread::JoinHandle<timing::PassTimes> {
-    thread::Builder::new()
-        .name(format!("worker #{}", thread_num))
-        .spawn(move || {
-            loop {
-                // Lock the mutex only long enough to extract a request.
-                let Request(jobid, path) = match requests.lock().unwrap().recv() {
-                    Err(..) => break, // TX end shut down. exit thread.
-                    Ok(req) => req,
-                };
-
-                // Tell them we're starting this job.
-                // The receiver should always be present for this as long as we have jobs.
-                replies.send(Reply::Starting { jobid, thread_num }).unwrap();
-
-                let result = catch_unwind(|| runone::run(path.as_path())).unwrap_or_else(|e| {
-                    // The test panicked, leaving us a `Box<Any>`.
-                    // Panics are usually strings.
-                    if let Some(msg) = e.downcast_ref::<String>() {
-                        Err(format!("panicked in worker #{}: {}", thread_num, msg))
-                    } else if let Some(msg) = e.downcast_ref::<&'static str>() {
-                        Err(format!("panicked in worker #{}: {}", thread_num, msg))
-                    } else {
-                        Err(format!("panicked in worker #{}", thread_num))
-                    }
-                });
-
-                if let Err(ref msg) = result {
-                    dbg!("FAIL: {}", msg);
-                }
-
-                replies.send(Reply::Done { jobid, result }).unwrap();
-            }
-
-            // Timing is accumulated independently per thread.
-            // Timings from this worker thread will be aggregated by `ConcurrentRunner::join()`.
-            timing::take_current()
-        })
-        .unwrap()
-}
--- a/cranelift/src/filetest/domtree.rs
+++ b/cranelift/src/filetest/domtree.rs
@@ -1,148 +0,0 @@
-//! Test command for verifying dominator trees.
-//!
-//! The `test domtree` test command looks for annotations on instructions like this:
-//!
-//!     jump ebb3 ; dominates: ebb3
-//!
-//! This annotation means that the jump instruction is expected to be the immediate dominator of
-//! `ebb3`.
-//!
-//! We verify that the dominator tree annotations are complete and correct.
-//!
-
-use cretonne::dominator_tree::{DominatorTree, DominatorTreePreorder};
-use cretonne::flowgraph::ControlFlowGraph;
-use cretonne::ir::Function;
-use cretonne::ir::entities::AnyEntity;
-use cton_reader::TestCommand;
-use filetest::subtest::{SubTest, Context, Result, run_filecheck};
-use std::borrow::{Borrow, Cow};
-use std::collections::HashMap;
-use std::fmt::{self, Write};
-use std::result;
-use utils::match_directive;
-
-struct TestDomtree;
-
-pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
-    assert_eq!(parsed.command, "domtree");
-    if !parsed.options.is_empty() {
-        Err(format!("No options allowed on {}", parsed))
-    } else {
-        Ok(Box::new(TestDomtree))
-    }
-}
-
-impl SubTest for TestDomtree {
-    fn name(&self) -> Cow<str> {
-        Cow::from("domtree")
-    }
-
-    // Extract our own dominator tree from
-    fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
-        let func = func.borrow();
-        let cfg = ControlFlowGraph::with_function(func);
-        let domtree = DominatorTree::with_function(func, &cfg);
-
-        // Build an expected domtree from the source annotations.
-        let mut expected = HashMap::new();
-        for comment in &context.details.comments {
-            if let Some(tail) = match_directive(comment.text, "dominates:") {
-                let inst = match comment.entity {
-                    AnyEntity::Inst(inst) => inst,
-                    _ => {
-                        return Err(format!(
-                            "annotation on non-inst {}: {}",
-                            comment.entity,
-                            comment.text
-                        ))
-                    }
-                };
-                for src_ebb in tail.split_whitespace() {
-                    let ebb = match context.details.map.lookup_str(src_ebb) {
-                        Some(AnyEntity::Ebb(ebb)) => ebb,
-                        _ => return Err(format!("expected defined EBB, got {}", src_ebb)),
-                    };
-
-                    // Annotations say that `inst` is the idom of `ebb`.
-                    if expected.insert(ebb, inst).is_some() {
-                        return Err(format!("multiple dominators for {}", src_ebb));
-                    }
-
-                    // Compare to computed domtree.
-                    match domtree.idom(ebb) {
-                        Some(got_inst) if got_inst != inst => {
-                            return Err(format!(
-                                "mismatching idoms for {}:\n\
-                                                want: {}, got: {}",
-                                src_ebb,
-                                inst,
-                                got_inst
-                            ));
-                        }
-                        None => {
-                            return Err(format!(
-                                "mismatching idoms for {}:\n\
-                                                want: {}, got: unreachable",
-                                src_ebb,
-                                inst
-                            ));
-                        }
-                        _ => {}
-                    }
-                }
-            }
-        }
-
-        // Now we know that everything in `expected` is consistent with `domtree`.
-        // All other EBB's should be either unreachable or the entry block.
-        for ebb in func.layout.ebbs().skip(1).filter(
-            |ebb| !expected.contains_key(ebb),
-        )
-        {
-            if let Some(got_inst) = domtree.idom(ebb) {
-                return Err(format!(
-                    "mismatching idoms for renumbered {}:\n\
-                                    want: unrechable, got: {}",
-                    ebb,
-                    got_inst
-                ));
-            }
-        }
-
-        let text = filecheck_text(func, &domtree).expect("formatting error");
-        run_filecheck(&text, context)
-    }
-}
-
-// Generate some output for filecheck testing
-fn filecheck_text(func: &Function, domtree: &DominatorTree) -> result::Result<String, fmt::Error> {
-    let mut s = String::new();
-
-    write!(s, "cfg_postorder:")?;
-    for &ebb in domtree.cfg_postorder() {
-        write!(s, " {}", ebb)?;
-    }
-    writeln!(s, "")?;
-
-    // Compute and print out a pre-order of the dominator tree.
-    writeln!(s, "domtree_preorder {{")?;
-    let mut dtpo = DominatorTreePreorder::new();
-    dtpo.compute(domtree, &func.layout);
-    let mut stack = Vec::new();
-    stack.extend(func.layout.entry_block());
-    while let Some(ebb) = stack.pop() {
-        write!(s, "    {}:", ebb)?;
-        let i = stack.len();
-        for ch in dtpo.children(ebb) {
-            write!(s, " {}", ch)?;
-            stack.push(ch);
-        }
-        writeln!(s, "")?;
-        // Reverse the children we just pushed so we'll pop them in order.
-        stack[i..].reverse();
-    }
-    writeln!(s, "}}")?;
-
-    Ok(s)
-}
--- a/cranelift/src/filetest/legalizer.rs
+++ b/cranelift/src/filetest/legalizer.rs
@@ -1,53 +0,0 @@
-//! Test command for checking the IL legalizer.
-//!
-//! The `test legalizer` test command runs each function through `legalize_function()` and sends
-//! the result to filecheck.
-
-use std::borrow::Cow;
-use cretonne;
-use cretonne::ir::Function;
-use cton_reader::TestCommand;
-use filetest::subtest::{SubTest, Context, Result, run_filecheck};
-use std::fmt::Write;
-use utils::pretty_error;
-
-struct TestLegalizer;
-
-pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
-    assert_eq!(parsed.command, "legalizer");
-    if !parsed.options.is_empty() {
-        Err(format!("No options allowed on {}", parsed))
-    } else {
-        Ok(Box::new(TestLegalizer))
-    }
-}
-
-impl SubTest for TestLegalizer {
-    fn name(&self) -> Cow<str> {
-        Cow::from("legalizer")
-    }
-
-    fn is_mutating(&self) -> bool {
-        true
-    }
-
-    fn needs_isa(&self) -> bool {
-        true
-    }
-
-    fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
-        let mut comp_ctx = cretonne::Context::new();
-        comp_ctx.func = func.into_owned();
-        let isa = context.isa.expect("legalizer needs an ISA");
-
-        comp_ctx.compute_cfg();
-        comp_ctx.legalize(isa).map_err(|e| {
-            pretty_error(&comp_ctx.func, context.isa, e)
-        })?;
-
-        let mut text = String::new();
-        write!(&mut text, "{}", &comp_ctx.func.display(Some(isa)))
-            .map_err(|e| e.to_string())?;
-        run_filecheck(&text, context)
-    }
-}
--- a/cranelift/src/filetest/licm.rs
+++ b/cranelift/src/filetest/licm.rs
@@ -1,53 +0,0 @@
-//! Test command for testing the LICM pass.
-//!
-//! The `licm` test command runs each function through the LICM pass after ensuring
-//! that all instructions are legal for the target.
-//!
-//! The resulting function is sent to `filecheck`.
-
-use cretonne::ir::Function;
-use cretonne;
-use cton_reader::TestCommand;
-use filetest::subtest::{SubTest, Context, Result, run_filecheck};
-use std::borrow::Cow;
-use std::fmt::Write;
-use utils::pretty_error;
-
-struct TestLICM;
-
-pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
-    assert_eq!(parsed.command, "licm");
-    if !parsed.options.is_empty() {
-        Err(format!("No options allowed on {}", parsed))
-    } else {
-        Ok(Box::new(TestLICM))
-    }
-}
-
-impl SubTest for TestLICM {
-    fn name(&self) -> Cow<str> {
-        Cow::from("licm")
-    }
-
-    fn is_mutating(&self) -> bool {
-        true
-    }
-
-    fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
-        // Create a compilation context, and drop in the function.
-        let mut comp_ctx = cretonne::Context::new();
-        comp_ctx.func = func.into_owned();
-
-        comp_ctx.flowgraph();
-        comp_ctx.compute_loop_analysis();
-        comp_ctx.licm(context.flags_or_isa()).map_err(|e| {
-            pretty_error(&comp_ctx.func, context.isa, Into::into(e))
-        })?;
-
-        let mut text = String::new();
-        write!(&mut text, "{}", &comp_ctx.func).map_err(
-            |e| e.to_string(),
-        )?;
-        run_filecheck(&text, context)
-    }
-}
--- a/cranelift/src/filetest/mod.rs
+++ b/cranelift/src/filetest/mod.rs
@@ -1,75 +0,0 @@
-//! File tests.
-//!
-//! This module contains the main driver for `cton-util test` as well as implementations of the
-//! available test commands.
-
-use std::path::Path;
-use std::time;
-use cton_reader::TestCommand;
-use CommandResult;
-use cat;
-use print_cfg;
-use filetest::runner::TestRunner;
-
-pub mod subtest;
-
-mod binemit;
-mod compile;
-mod concurrent;
-mod domtree;
-mod legalizer;
-mod licm;
-mod preopt;
-mod regalloc;
-mod runner;
-mod runone;
-mod simple_gvn;
-mod verifier;
-
-/// The result of running the test in a file.
-pub type TestResult = Result<time::Duration, String>;
-
-/// Main entry point for `cton-util test`.
-///
-/// Take a list of filenames which can be either `.cton` files or directories.
-///
-/// Files are interpreted as test cases and executed immediately.
-///
-/// Directories are scanned recursively for test cases ending in `.cton`. These test cases are
-/// executed on background threads.
-///
-pub fn run(verbose: bool, files: Vec<String>) -> CommandResult {
-    let mut runner = TestRunner::new(verbose);
-
-    for path in files.iter().map(Path::new) {
-        if path.is_file() {
-            runner.push_test(path);
-        } else {
-            runner.push_dir(path);
-        }
-    }
-
-    runner.start_threads();
-    runner.run()
-}
-
-/// Create a new subcommand trait object to match `parsed.command`.
-///
-/// This function knows how to create all of the possible `test <foo>` commands that can appear in
-/// a `.cton` test file.
-fn new_subtest(parsed: &TestCommand) -> subtest::Result<Box<subtest::SubTest>> {
-    match parsed.command {
-        "binemit" => binemit::subtest(parsed),
-        "cat" => cat::subtest(parsed),
-        "compile" => compile::subtest(parsed),
-        "domtree" => domtree::subtest(parsed),
-        "legalizer" => legalizer::subtest(parsed),
-        "licm" => licm::subtest(parsed),
-        "preopt" => preopt::subtest(parsed),
-        "print-cfg" => print_cfg::subtest(parsed),
-        "regalloc" => regalloc::subtest(parsed),
-        "simple-gvn" => simple_gvn::subtest(parsed),
-        "verifier" => verifier::subtest(parsed),
-        _ => Err(format!("unknown test command '{}'", parsed.command)),
-    }
-}
--- a/cranelift/src/filetest/preopt.rs
+++ b/cranelift/src/filetest/preopt.rs
@@ -1,50 +0,0 @@
-//! Test command for testing the preopt pass.
-//!
-//! The resulting function is sent to `filecheck`.
-
-use cretonne::ir::Function;
-use cretonne;
-use cton_reader::TestCommand;
-use filetest::subtest::{SubTest, Context, Result, run_filecheck};
-use std::borrow::Cow;
-use std::fmt::Write;
-use utils::pretty_error;
-
-struct TestPreopt;
-
-pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
-    assert_eq!(parsed.command, "preopt");
-    if !parsed.options.is_empty() {
-        Err(format!("No options allowed on {}", parsed))
-    } else {
-        Ok(Box::new(TestPreopt))
-    }
-}
-
-impl SubTest for TestPreopt {
-    fn name(&self) -> Cow<str> {
-        Cow::from("preopt")
-    }
-
-    fn is_mutating(&self) -> bool {
-        true
-    }
-
-    fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
-        // Create a compilation context, and drop in the function.
-        let mut comp_ctx = cretonne::Context::new();
-        comp_ctx.func = func.into_owned();
-        let isa = context.isa.expect("preopt needs an ISA");
-
-        comp_ctx.flowgraph();
-        comp_ctx.preopt(isa).map_err(|e| {
-            pretty_error(&comp_ctx.func, context.isa, Into::into(e))
-        })?;
-
-        let mut text = String::new();
-        write!(&mut text, "{}", &comp_ctx.func).map_err(
-            |e| e.to_string(),
-        )?;
-        run_filecheck(&text, context)
-    }
-}
--- a/cranelift/src/filetest/regalloc.rs
+++ b/cranelift/src/filetest/regalloc.rs
@@ -1,62 +0,0 @@
-//! Test command for testing the register allocator.
-//!
-//! The `regalloc` test command runs each function through the register allocator after ensuring
-//! that all instructions are legal for the target.
-//!
-//! The resulting function is sent to `filecheck`.
-
-use cretonne::ir::Function;
-use cretonne;
-use cton_reader::TestCommand;
-use filetest::subtest::{SubTest, Context, Result, run_filecheck};
-use std::borrow::Cow;
-use std::fmt::Write;
-use utils::pretty_error;
-
-struct TestRegalloc;
-
-pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
-    assert_eq!(parsed.command, "regalloc");
-    if !parsed.options.is_empty() {
-        Err(format!("No options allowed on {}", parsed))
-    } else {
-        Ok(Box::new(TestRegalloc))
-    }
-}
-
-impl SubTest for TestRegalloc {
-    fn name(&self) -> Cow<str> {
-        Cow::from("regalloc")
-    }
-
-    fn is_mutating(&self) -> bool {
-        true
-    }
-
-    fn needs_isa(&self) -> bool {
-        true
-    }
-
-    fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
-        let isa = context.isa.expect("register allocator needs an ISA");
-
-        // Create a compilation context, and drop in the function.
-        let mut comp_ctx = cretonne::Context::new();
-        comp_ctx.func = func.into_owned();
-
-        comp_ctx.compute_cfg();
-        // TODO: Should we have an option to skip legalization?
-        comp_ctx.legalize(isa).map_err(|e| {
-            pretty_error(&comp_ctx.func, context.isa, e)
-        })?;
-        comp_ctx.compute_domtree();
-        comp_ctx.regalloc(isa).map_err(|e| {
-            pretty_error(&comp_ctx.func, context.isa, e)
-        })?;
-
-        let mut text = String::new();
-        write!(&mut text, "{}", &comp_ctx.func.display(Some(isa)))
-            .map_err(|e| e.to_string())?;
-        run_filecheck(&text, context)
-    }
-}
--- a/cranelift/src/filetest/runner.rs
+++ b/cranelift/src/filetest/runner.rs
@@ -1,338 +0,0 @@
-//! Test runner.
-//!
-//! This module implements the `TestRunner` struct which manages executing tests as well as
-//! scanning directories for tests.
-
-use std::error::Error;
-use std::fmt::{self, Display};
-use std::ffi::OsStr;
-use std::path::{Path, PathBuf};
-use filetest::{TestResult, runone};
-use filetest::concurrent::{ConcurrentRunner, Reply};
-use CommandResult;
-
-// Timeout in seconds when we're not making progress.
-const TIMEOUT_PANIC: usize = 10;
-
-// Timeout for reporting slow tests without panicking.
-const TIMEOUT_SLOW: usize = 3;
-
-struct QueueEntry {
-    path: PathBuf,
-    state: State,
-}
-
-#[derive(PartialEq, Eq, Debug)]
-enum State {
-    New,
-    Queued,
-    Running,
-    Done(TestResult),
-}
-
-impl QueueEntry {
-    pub fn path(&self) -> &Path {
-        self.path.as_path()
-    }
-}
-
-impl Display for QueueEntry {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let p = self.path.to_string_lossy();
-        match self.state {
-            State::Done(Ok(dur)) => {
-                write!(
-                    f,
-                    "{}.{:03} {}",
-                    dur.as_secs(),
-                    dur.subsec_nanos() / 1000000,
-                    p
-                )
-            }
-            State::Done(Err(ref e)) => write!(f, "FAIL {}: {}", p, e),
-            _ => write!(f, "{}", p),
-        }
-    }
-}
-
-pub struct TestRunner {
-    verbose: bool,
-
-    // Directories that have not yet been scanned.
-    dir_stack: Vec<PathBuf>,
-
-    // Filenames of tests to run.
-    tests: Vec<QueueEntry>,
-
-    // Pointer into `tests` where the `New` entries begin.
-    new_tests: usize,
-
-    // Number of contiguous reported tests at the front of `tests`.
-    reported_tests: usize,
-
-    // Number of errors seen so far.
-    errors: usize,
-
-    // Number of ticks received since we saw any progress.
-    ticks_since_progress: usize,
-
-    threads: Option<ConcurrentRunner>,
-}
-
-impl TestRunner {
-    /// Create a new blank TrstRunner.
-    pub fn new(verbose: bool) -> Self {
-        Self {
-            verbose,
-            dir_stack: Vec::new(),
-            tests: Vec::new(),
-            new_tests: 0,
-            reported_tests: 0,
-            errors: 0,
-            ticks_since_progress: 0,
-            threads: None,
-        }
-    }
-
-    /// Add a directory path to be scanned later.
-    ///
-    /// If `dir` turns out to be a regular file, it is silently ignored.
-    /// Otherwise, any problems reading the directory are reported.
-    pub fn push_dir<P: Into<PathBuf>>(&mut self, dir: P) {
-        self.dir_stack.push(dir.into());
-    }
-
-    /// Add a test to be executed later.
-    ///
-    /// Any problems reading `file` as a test case file will be reported as a test failure.
-    pub fn push_test<P: Into<PathBuf>>(&mut self, file: P) {
-        self.tests.push(QueueEntry {
-            path: file.into(),
-            state: State::New,
-        });
-    }
-
-    /// Begin running tests concurrently.
-    pub fn start_threads(&mut self) {
-        assert!(self.threads.is_none());
-        self.threads = Some(ConcurrentRunner::new());
-    }
-
-    /// Scan any directories pushed so far.
-    /// Push any potential test cases found.
-    pub fn scan_dirs(&mut self) {
-        // This recursive search tries to minimize statting in a directory hierarchy containing
-        // mostly test cases.
-        //
-        // - Directory entries with a "cton" extension are presumed to be test case files.
-        // - Directory entries with no extension are presumed to be subdirectories.
-        // - Anything else is ignored.
-        //
-        while let Some(dir) = self.dir_stack.pop() {
-            match dir.read_dir() {
-                Err(err) => {
-                    // Fail silently if `dir` was actually a regular file.
-                    // This lets us skip spurious extensionless files without statting everything
-                    // needlessly.
-                    if !dir.is_file() {
-                        self.path_error(dir, err);
-                    }
-                }
-                Ok(entries) => {
-                    // Read all directory entries. Avoid statting.
-                    for entry_result in entries {
-                        match entry_result {
-                            Err(err) => {
-                                // Not sure why this would happen. `read_dir` succeeds, but there's
-                                // a problem with an entry. I/O error during a getdirentries
-                                // syscall seems to be the reason. The implementation in
-                                // libstd/sys/unix/fs.rs seems to suggest that breaking now would
-                                // be a good idea, or the iterator could keep returning the same
-                                // error forever.
-                                self.path_error(dir, err);
-                                break;
-                            }
-                            Ok(entry) => {
-                                let path = entry.path();
-                                // Recognize directories and tests by extension.
-                                // Yes, this means we ignore directories with '.' in their name.
-                                match path.extension().and_then(OsStr::to_str) {
-                                    Some("cton") => self.push_test(path),
-                                    Some(_) => {}
-                                    None => self.push_dir(path),
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-            // Get the new jobs running before moving on to the next directory.
-            self.schedule_jobs();
-        }
-    }
-
-    /// Report an error related to a path.
-    fn path_error<E: Error>(&mut self, path: PathBuf, err: E) {
-        self.errors += 1;
-        println!("{}: {}", path.to_string_lossy(), err);
-    }
-
-    /// Report on the next in-order job, if it's done.
-    fn report_job(&self) -> bool {
-        let jobid = self.reported_tests;
-        if let Some(&QueueEntry { state: State::Done(ref result), .. }) = self.tests.get(jobid) {
-            if self.verbose || result.is_err() {
-                println!("{}", self.tests[jobid]);
-            }
-            true
-        } else {
-            false
-        }
-    }
-
-    /// Schedule any new jobs to run.
-    fn schedule_jobs(&mut self) {
-        for jobid in self.new_tests..self.tests.len() {
-            assert_eq!(self.tests[jobid].state, State::New);
-            if let Some(ref mut conc) = self.threads {
-                // Queue test for concurrent execution.
-                self.tests[jobid].state = State::Queued;
-                conc.put(jobid, self.tests[jobid].path());
-            } else {
-                // Run test synchronously.
-                self.tests[jobid].state = State::Running;
-                let result = runone::run(self.tests[jobid].path());
-                self.finish_job(jobid, result);
-            }
-            self.new_tests = jobid + 1;
-        }
-
-        // Check for any asynchronous replies without blocking.
-        while let Some(reply) = self.threads.as_mut().and_then(ConcurrentRunner::try_get) {
-            self.handle_reply(reply);
-        }
-    }
-
-    /// Report the end of a job.
-    fn finish_job(&mut self, jobid: usize, result: TestResult) {
-        assert_eq!(self.tests[jobid].state, State::Running);
-        if result.is_err() {
-            self.errors += 1;
-        }
-        self.tests[jobid].state = State::Done(result);
-
-        // Reports jobs in order.
-        while self.report_job() {
-            self.reported_tests += 1;
-        }
-    }
-
-    /// Handle a reply from the async threads.
-    fn handle_reply(&mut self, reply: Reply) {
-        match reply {
-            Reply::Starting { jobid, .. } => {
-                assert_eq!(self.tests[jobid].state, State::Queued);
-                self.tests[jobid].state = State::Running;
-            }
-            Reply::Done { jobid, result } => {
-                self.ticks_since_progress = 0;
-                self.finish_job(jobid, result)
-            }
-            Reply::Tick => {
-                self.ticks_since_progress += 1;
-                if self.ticks_since_progress == TIMEOUT_SLOW {
-                    println!(
-                        "STALLED for {} seconds with {}/{} tests finished",
-                        self.ticks_since_progress,
-                        self.reported_tests,
-                        self.tests.len()
-                    );
-                    for jobid in self.reported_tests..self.tests.len() {
-                        if self.tests[jobid].state == State::Running {
-                            println!("slow: {}", self.tests[jobid]);
-                        }
-                    }
-                }
-                if self.ticks_since_progress >= TIMEOUT_PANIC {
-                    panic!(
-                        "worker threads stalled for {} seconds.",
-                        self.ticks_since_progress
-                    );
-                }
-            }
-        }
-    }
-
-    /// Drain the async jobs and shut down the threads.
-    fn drain_threads(&mut self) {
-        if let Some(mut conc) = self.threads.take() {
-            conc.shutdown();
-            while self.reported_tests < self.tests.len() {
-                match conc.get() {
-                    Some(reply) => self.handle_reply(reply),
-                    None => break,
-                }
-            }
-            conc.join();
-        }
-    }
-
-    /// Print out a report of slow tests.
-    fn report_slow_tests(&self) {
-        // Collect runtimes of succeeded tests.
-        let mut times = self.tests
-            .iter()
-            .filter_map(|entry| match *entry {
-                QueueEntry { state: State::Done(Ok(dur)), .. } => Some(dur),
-                _ => None,
-            })
-            .collect::<Vec<_>>();
-
-        // Get me some real data, kid.
-        let len = times.len();
-        if len < 4 {
-            return;
-        }
-
-        // Compute quartiles.
-        times.sort();
-        let qlen = len / 4;
-        let q1 = times[qlen];
-        let q3 = times[len - 1 - qlen];
-        // Inter-quartile range.
-        let iqr = q3 - q1;
-
-
-        // Cut-off for what we consider a 'slow' test: 3 IQR from the 75% quartile.
-        //
-        // Q3 + 1.5 IQR are the data points that would be plotted as outliers outside a box plot,
-        // but we have a wider distribution of test times, so double it to 3 IQR.
-        let cut = q3 + iqr * 3;
-        if cut > *times.last().unwrap() {
-            return;
-        }
-
-        for t in self.tests.iter().filter(|entry| match **entry {
-            QueueEntry { state: State::Done(Ok(dur)), .. } => dur > cut,
-            _ => false,
-        })
-        {
-            println!("slow: {}", t)
-        }
-
-    }
-
-    /// Scan pushed directories for tests and run them.
-    pub fn run(&mut self) -> CommandResult {
-        self.scan_dirs();
-        self.schedule_jobs();
-        self.drain_threads();
-        self.report_slow_tests();
-        println!("{} tests", self.tests.len());
-        match self.errors {
-            0 => Ok(()),
-            1 => Err("1 failure".to_string()),
-            n => Err(format!("{} failures", n)),
-        }
-    }
-}
--- a/cranelift/src/filetest/runone.rs
+++ b/cranelift/src/filetest/runone.rs
@@ -1,134 +0,0 @@
-//! Run the tests in a single test file.
-
-use std::borrow::Cow;
-use std::path::Path;
-use std::time;
-use cretonne::ir::Function;
-use cretonne::isa::TargetIsa;
-use cretonne::settings::Flags;
-use cretonne::timing;
-use cretonne::verify_function;
-use cton_reader::parse_test;
-use cton_reader::IsaSpec;
-use utils::{read_to_string, pretty_verifier_error};
-use filetest::{TestResult, new_subtest};
-use filetest::subtest::{SubTest, Context, Result};
-
-/// Load `path` and run the test in it.
-///
-/// If running this test causes a panic, it will propagate as normal.
-pub fn run(path: &Path) -> TestResult {
-    let _tt = timing::process_file();
-    dbg!("---\nFile: {}", path.to_string_lossy());
-    let started = time::Instant::now();
-    let buffer = read_to_string(path).map_err(|e| e.to_string())?;
-    let testfile = parse_test(&buffer).map_err(|e| e.to_string())?;
-    if testfile.functions.is_empty() {
-        return Err("no functions found".to_string());
-    }
-
-    // Parse the test commands.
-    let mut tests = testfile
-        .commands
-        .iter()
-        .map(new_subtest)
-        .collect::<Result<Vec<_>>>()?;
-
-    // Flags to use for those tests that don't need an ISA.
-    // This is the cumulative effect of all the `set` commands in the file.
-    let flags = match testfile.isa_spec {
-        IsaSpec::None(ref f) => f,
-        IsaSpec::Some(ref v) => v.last().expect("Empty ISA list").flags(),
-    };
-
-    // Sort the tests so the mutators are at the end, and those that don't need the verifier are at
-    // the front.
-    tests.sort_by_key(|st| (st.is_mutating(), st.needs_verifier()));
-
-    // Expand the tests into (test, flags, isa) tuples.
-    let mut tuples = test_tuples(&tests, &testfile.isa_spec, flags)?;
-
-    // Isolate the last test in the hope that this is the only mutating test.
-    // If so, we can completely avoid cloning functions.
-    let last_tuple = match tuples.pop() {
-        None => return Err("no test commands found".to_string()),
-        Some(t) => t,
-    };
-
-    for (func, details) in testfile.functions {
-        let mut context = Context {
-            preamble_comments: &testfile.preamble_comments,
-            details,
-            verified: false,
-            flags,
-            isa: None,
-        };
-
-        for tuple in &tuples {
-            run_one_test(*tuple, Cow::Borrowed(&func), &mut context)?;
-        }
-        // Run the last test with an owned function which means it won't need to clone it before
-        // mutating.
-        run_one_test(last_tuple, Cow::Owned(func), &mut context)?;
-    }
-
-
-    Ok(started.elapsed())
-}
-
-// Given a slice of tests, generate a vector of (test, flags, isa) tuples.
-fn test_tuples<'a>(
-    tests: &'a [Box<SubTest>],
-    isa_spec: &'a IsaSpec,
-    no_isa_flags: &'a Flags,
-) -> Result<Vec<(&'a SubTest, &'a Flags, Option<&'a TargetIsa>)>> {
-    let mut out = Vec::new();
-    for test in tests {
-        if test.needs_isa() {
-            match *isa_spec {
-                IsaSpec::None(_) => {
-                    // TODO: Generate a list of default ISAs.
-                    return Err(format!("test {} requires an ISA", test.name()));
-                }
-                IsaSpec::Some(ref isas) => {
-                    for isa in isas {
-                        out.push((&**test, isa.flags(), Some(&**isa)));
-                    }
-                }
-            }
-        } else {
-            // This test doesn't require an ISA, and we only want to run one instance of it.
-            // Still, give it an ISA ref if we happen to have a unique one.
-            // For example, `test cat` can use this to print encodings and register names.
-            out.push((&**test, no_isa_flags, isa_spec.unique_isa()));
-        }
-    }
-    Ok(out)
-}
-
-fn run_one_test<'a>(
-    tuple: (&'a SubTest, &'a Flags, Option<&'a TargetIsa>),
-    func: Cow<Function>,
-    context: &mut Context<'a>,
-) -> Result<()> {
-    let (test, flags, isa) = tuple;
-    let name = format!("{}({})", test.name(), func.name);
-    dbg!("Test: {} {}", name, isa.map(TargetIsa::name).unwrap_or("-"));
-
-    context.flags = flags;
-    context.isa = isa;
-
-    // Should we run the verifier before this test?
-    if !context.verified && test.needs_verifier() {
-        verify_function(&func, context.flags_or_isa()).map_err(
-            |e| {
-                pretty_verifier_error(&func, isa, e)
-            },
-        )?;
-        context.verified = true;
-    }
-
-    test.run(func, context).map_err(
-        |e| format!("{}: {}", name, e),
-    )
-}
--- a/cranelift/src/filetest/simple_gvn.rs
+++ b/cranelift/src/filetest/simple_gvn.rs
@@ -1,52 +0,0 @@
-//! Test command for testing the simple GVN pass.
-//!
-//! The `simple-gvn` test command runs each function through the simple GVN pass after ensuring
-//! that all instructions are legal for the target.
-//!
-//! The resulting function is sent to `filecheck`.
-
-use cretonne::ir::Function;
-use cretonne;
-use cton_reader::TestCommand;
-use filetest::subtest::{SubTest, Context, Result, run_filecheck};
-use std::borrow::Cow;
-use std::fmt::Write;
-use utils::pretty_error;
-
-struct TestSimpleGVN;
-
-pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
-    assert_eq!(parsed.command, "simple-gvn");
-    if !parsed.options.is_empty() {
-        Err(format!("No options allowed on {}", parsed))
-    } else {
-        Ok(Box::new(TestSimpleGVN))
-    }
-}
-
-impl SubTest for TestSimpleGVN {
-    fn name(&self) -> Cow<str> {
-        Cow::from("simple-gvn")
-    }
-
-    fn is_mutating(&self) -> bool {
-        true
-    }
-
-    fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
-        // Create a compilation context, and drop in the function.
-        let mut comp_ctx = cretonne::Context::new();
-        comp_ctx.func = func.into_owned();
-
-        comp_ctx.flowgraph();
-        comp_ctx.simple_gvn(context.flags_or_isa()).map_err(|e| {
-            pretty_error(&comp_ctx.func, context.isa, Into::into(e))
-        })?;
-
-        let mut text = String::new();
-        write!(&mut text, "{}", &comp_ctx.func).map_err(
-            |e| e.to_string(),
-        )?;
-        run_filecheck(&text, context)
-    }
-}
--- a/cranelift/src/filetest/subtest.rs
+++ b/cranelift/src/filetest/subtest.rs
@@ -1,102 +0,0 @@
-//! SubTest trait.
-
-use std::result;
-use std::borrow::Cow;
-use cretonne::ir::Function;
-use cretonne::isa::TargetIsa;
-use cretonne::settings::{Flags, FlagsOrIsa};
-use cton_reader::{Details, Comment};
-use filecheck::{CheckerBuilder, Checker, NO_VARIABLES};
-
-pub type Result<T> = result::Result<T, String>;
-
-/// Context for running a test on a single function.
-pub struct Context<'a> {
-    /// Comments from the preamble f the test file. These apply to all functions.
-    pub preamble_comments: &'a [Comment<'a>],
-
-    /// Additional details about the function from the parser.
-    pub details: Details<'a>,
-
-    /// Was the function verified before running this test?
-    pub verified: bool,
-
-    /// ISA-independent flags for this test.
-    pub flags: &'a Flags,
-
-    /// Target ISA to test against. Only guaranteed to be present for sub-tests whose `needs_isa`
-    /// method returned `true`. For other sub-tests, this is set if the test file has a unique ISA.
-    pub isa: Option<&'a TargetIsa>,
-}
-
-impl<'a> Context<'a> {
-    /// Get a `FlagsOrIsa` object for passing to the verifier.
-    pub fn flags_or_isa(&self) -> FlagsOrIsa<'a> {
-        FlagsOrIsa {
-            flags: self.flags,
-            isa: self.isa,
-        }
-    }
-}
-
-/// Common interface for implementations of test commands.
-///
-/// Each `.cton` test file may contain multiple test commands, each represented by a `SubTest`
-/// trait object.
-pub trait SubTest {
-    /// Name identifying this subtest. Typically the same as the test command.
-    fn name(&self) -> Cow<str>;
-
-    /// Should the verifier be run on the function before running the test?
-    fn needs_verifier(&self) -> bool {
-        true
-    }
-
-    /// Does this test mutate the function when it runs?
-    /// This is used as a hint to avoid cloning the function needlessly.
-    fn is_mutating(&self) -> bool {
-        false
-    }
-
-    /// Does this test need a `TargetIsa` trait object?
-    fn needs_isa(&self) -> bool {
-        false
-    }
-
-    /// Run this test on `func`.
-    fn run(&self, func: Cow<Function>, context: &Context) -> Result<()>;
-}
-
-/// Run filecheck on `text`, using directives extracted from `context`.
-pub fn run_filecheck(text: &str, context: &Context) -> Result<()> {
-    let checker = build_filechecker(context)?;
-    if checker.check(text, NO_VARIABLES).map_err(|e| {
-        format!("filecheck: {}", e)
-    })?
-    {
-        Ok(())
-    } else {
-        // Filecheck mismatch. Emit an explanation as output.
-        let (_, explain) = checker.explain(text, NO_VARIABLES).map_err(|e| {
-            format!("explain: {}", e)
-        })?;
-        Err(format!("filecheck failed:\n{}{}", checker, explain))
-    }
-}
-
-/// Build a filechecker using the directives in the file preamble and the function's comments.
-pub fn build_filechecker(context: &Context) -> Result<Checker> {
-    let mut builder = CheckerBuilder::new();
-    // Preamble comments apply to all functions.
-    for comment in context.preamble_comments {
-        builder.directive(comment.text).map_err(|e| {
-            format!("filecheck: {}", e)
-        })?;
-    }
-    for comment in &context.details.comments {
-        builder.directive(comment.text).map_err(|e| {
-            format!("filecheck: {}", e)
-        })?;
-    }
-    Ok(builder.finish())
-}
--- a/cranelift/src/filetest/verifier.rs
+++ b/cranelift/src/filetest/verifier.rs
@@ -1,80 +0,0 @@
-//! Test command for checking the IL verifier.
-//!
-//! The `test verifier` test command looks for annotations on instructions like this:
-//!
-//!     jump ebb3 ; error: jump to non-existent EBB
-//!
-//! This annotation means that the verifier is expected to given an error for the jump instruction
-//! containing the substring "jump to non-existent EBB".
-
-use std::borrow::{Borrow, Cow};
-use cretonne::verify_function;
-use cretonne::ir::Function;
-use cton_reader::TestCommand;
-use filetest::subtest::{SubTest, Context, Result};
-use utils::match_directive;
-
-struct TestVerifier;
-
-pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
-    assert_eq!(parsed.command, "verifier");
-    if !parsed.options.is_empty() {
-        Err(format!("No options allowed on {}", parsed))
-    } else {
-        Ok(Box::new(TestVerifier))
-    }
-}
-
-impl SubTest for TestVerifier {
-    fn name(&self) -> Cow<str> {
-        Cow::from("verifier")
-    }
-
-    fn needs_verifier(&self) -> bool {
-        // Running the verifier before this test would defeat its purpose.
-        false
-    }
-
-    fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
-        let func = func.borrow();
-
-        // Scan source annotations for "error:" directives.
-        let mut expected = None;
-        for comment in &context.details.comments {
-            if let Some(tail) = match_directive(comment.text, "error:") {
-                // Currently, the verifier can only report one problem at a time.
-                // Reject more than one `error:` directives.
-                if expected.is_some() {
-                    return Err("cannot handle multiple error: directives".to_string());
-                }
-                expected = Some((comment.entity, tail));
-            }
-        }
-
-        match verify_function(func, context.flags_or_isa()) {
-            Ok(_) => {
-                match expected {
-                    None => Ok(()),
-                    Some((_, msg)) => Err(format!("passed, expected error: {}", msg)),
-                }
-            }
-            Err(got) => {
-                match expected {
-                    None => Err(format!("verifier pass, got {}", got)),
-                    Some((want_loc, want_msg)) if got.message.contains(want_msg) => {
-                        if want_loc == got.location {
-                            Ok(())
-                        } else {
-                            Err(format!(
-                                "correct error reported on {}, but wanted {}",
-                                got.location,
-                                want_loc
-                            ))
-                        }
-                    }
-                    Some(_) => Err(format!("mismatching error: {}", got)),
-                }
-            }
-        }
-    }
-}
--- a/cranelift/src/print_cfg.rs
+++ b/cranelift/src/print_cfg.rs
@@ -1,20 +1,14 @@
 //! The `print-cfg` sub-command.
 //!
-//! Read a series of Cretonne IL files and print their control flow graphs
+//! Read a series of Cretonne IR files and print their control flow graphs
 //! in graphviz format.

-use std::borrow::Cow;
-use std::fmt::{Result, Write, Display, Formatter};
-
 use CommandResult;
-use cretonne::flowgraph::ControlFlowGraph;
-use cretonne::ir::Function;
-use cretonne::ir::instructions::BranchInfo;
-use cton_reader::{parse_functions, TestCommand};
-use filetest::subtest::{self, SubTest, Context, Result as STResult};
+use cretonne::cfg_printer::CFGPrinter;
+use cton_reader::parse_functions;
 use utils::read_to_string;

-pub fn run(files: Vec<String>) -> CommandResult {
+pub fn run(files: &[String]) -> CommandResult {
    for (i, f) in files.into_iter().enumerate() {
        if i != 0 {
            println!();
@@ -24,74 +18,8 @@ pub fn run(files: Vec<String>) -> CommandResult {
    Ok(())
 }

-struct CFGPrinter<'a> {
-    func: &'a Function,
-    cfg: ControlFlowGraph,
-}
-
-impl<'a> CFGPrinter<'a> {
-    pub fn new(func: &'a Function) -> CFGPrinter<'a> {
-        CFGPrinter {
-            func,
-            cfg: ControlFlowGraph::with_function(func),
-        }
-    }
-
-    /// Write the CFG for this function to `w`.
-    pub fn write(&self, w: &mut Write) -> Result {
-        self.header(w)?;
-        self.ebb_nodes(w)?;
-        self.cfg_connections(w)?;
-        writeln!(w, "}}")
-    }
-
-    fn header(&self, w: &mut Write) -> Result {
-        writeln!(w, "digraph \"{}\" {{", self.func.name)?;
-        if let Some(entry) = self.func.layout.entry_block() {
-            writeln!(w, "    {{rank=min; {}}}", entry)?;
-        }
-        Ok(())
-    }
-
-    fn ebb_nodes(&self, w: &mut Write) -> Result {
-        for ebb in &self.func.layout {
-            write!(w, "    {} [shape=record, label=\"{{{}", ebb, ebb)?;
-            // Add all outgoing branch instructions to the label.
-            for inst in self.func.layout.ebb_insts(ebb) {
-                let idata = &self.func.dfg[inst];
-                match idata.analyze_branch(&self.func.dfg.value_lists) {
-                    BranchInfo::SingleDest(dest, _) => {
-                        write!(w, " | <{}>{} {}", inst, idata.opcode(), dest)?
-                    }
-                    BranchInfo::Table(table) => {
-                        write!(w, " | <{}>{} {}", inst, idata.opcode(), table)?
-                    }
-                    BranchInfo::NotABranch => {}
-                }
-            }
-            writeln!(w, "}}\"]")?
-        }
-        Ok(())
-    }
-
-    fn cfg_connections(&self, w: &mut Write) -> Result {
-        for ebb in &self.func.layout {
-            for (parent, inst) in self.cfg.pred_iter(ebb) {
-                writeln!(w, "    {}:{} -> {}", parent, inst, ebb)?;
-            }
-        }
-        Ok(())
-    }
-}
-
-impl<'a> Display for CFGPrinter<'a> {
-    fn fmt(&self, f: &mut Formatter) -> Result {
-        self.write(f)
-    }
-}
-
-fn print_cfg(filename: String) -> CommandResult {
-    let buffer = read_to_string(&filename).map_err(
+fn print_cfg(filename: &str) -> CommandResult {
+    let buffer = read_to_string(filename).map_err(
        |e| format!("{}: {}", filename, e),
    )?;
    let items = parse_functions(&buffer).map_err(
@@ -107,29 +35,3 @@ fn print_cfg(filename: String) -> CommandResult {

    Ok(())
 }
-
-/// Object implementing the `test print-cfg` sub-test.
-struct TestPrintCfg;
-
-pub fn subtest(parsed: &TestCommand) -> STResult<Box<SubTest>> {
-    assert_eq!(parsed.command, "print-cfg");
-    if !parsed.options.is_empty() {
-        Err(format!("No options allowed on {}", parsed))
-    } else {
-        Ok(Box::new(TestPrintCfg))
-    }
-}
-
-impl SubTest for TestPrintCfg {
-    fn name(&self) -> Cow<str> {
-        Cow::from("print-cfg")
-    }
-
-    fn needs_verifier(&self) -> bool {
-        false
-    }
-
-    fn run(&self, func: Cow<Function>, context: &Context) -> STResult<()> {
-        subtest::run_filecheck(&CFGPrinter::new(&func).to_string(), context)
-    }
-}
--- a/cranelift/src/rsfilecheck.rs
+++ b/cranelift/src/rsfilecheck.rs
@@ -1,9 +1,13 @@
-use CommandResult;
-use utils::read_to_string;
-use filecheck::{CheckerBuilder, Checker, NO_VARIABLES};
-use std::io::{self, Read};
+//! The `filecheck` sub-command.
+//!
+//! This file is named to avoid a name collision with the filecheck crate.

-pub fn run(files: Vec<String>, verbose: bool) -> CommandResult {
+use CommandResult;
+use filecheck::{Checker, CheckerBuilder, NO_VARIABLES};
+use std::io::{self, Read};
+use utils::read_to_string;
+
+pub fn run(files: &[String], verbose: bool) -> CommandResult {
    if files.is_empty() {
        return Err("No check files".to_string());
    }
--- a/cranelift/src/utils.rs
+++ b/cranelift/src/utils.rs
@@ -1,13 +1,9 @@
 //! Utility functions.

-use cretonne::ir::entities::AnyEntity;
-use cretonne::{ir, verifier};
-use cretonne::result::CtonError;
+use cretonne::isa;
 use cretonne::isa::TargetIsa;
 use cretonne::settings::{self, FlagsOrIsa};
-use cretonne::isa;
 use cton_reader::{parse_options, Location};
-use std::fmt::Write;
 use std::fs::File;
 use std::io::{self, Read};
 use std::path::Path;
@@ -28,51 +24,6 @@ pub fn read_to_end<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
    Ok(buffer)
 }

-/// Look for a directive in a comment string.
-/// The directive is of the form "foo:" and should follow the leading `;` in the comment:
-///
-/// ; dominates: ebb3 ebb4
-///
-/// Return the comment text following the directive.
-pub fn match_directive<'a>(comment: &'a str, directive: &str) -> Option<&'a str> {
-    assert!(
-        directive.ends_with(':'),
-        "Directive must include trailing colon"
-    );
-    let text = comment.trim_left_matches(';').trim_left();
-    if text.starts_with(directive) {
-        Some(text[directive.len()..].trim())
-    } else {
-        None
-    }
-}
-
-/// Pretty-print a verifier error.
-pub fn pretty_verifier_error(
-    func: &ir::Function,
-    isa: Option<&TargetIsa>,
-    err: verifier::Error,
-) -> String {
-    let mut msg = err.to_string();
-    match err.location {
-        AnyEntity::Inst(inst) => {
-            write!(msg, "\n{}: {}\n\n", inst, func.dfg.display_inst(inst, isa)).unwrap()
-        }
-        _ => msg.push('\n'),
-    }
-    write!(msg, "{}", func.display(isa)).unwrap();
-    msg
-}
-
-/// Pretty-print a Cretonne error.
-pub fn pretty_error(func: &ir::Function, isa: Option<&TargetIsa>, err: CtonError) -> String {
-    if let CtonError::Verifier(e) = err {
-        pretty_verifier_error(func, isa, e)
-    } else {
-        err.to_string()
-    }
-}
-
 /// Like `FlagsOrIsa`, but holds ownership.
 pub enum OwnedFlagsOrIsa {
    Flags(settings::Flags),
@@ -90,10 +41,7 @@ impl OwnedFlagsOrIsa {
 }

 /// Parse "set" and "isa" commands.
-pub fn parse_sets_and_isa(
-    flag_set: Vec<String>,
-    flag_isa: String,
-) -> Result<OwnedFlagsOrIsa, String> {
+pub fn parse_sets_and_isa(flag_set: &[String], flag_isa: &str) -> Result<OwnedFlagsOrIsa, String> {
    let mut flag_builder = settings::builder();
    parse_options(
        flag_set.iter().map(|x| x.as_str()),
@@ -119,12 +67,3 @@ pub fn parse_sets_and_isa(
        Ok(OwnedFlagsOrIsa::Flags(settings::Flags::new(&flag_builder)))
    }
 }
-
-#[test]
-fn test_match_directive() {
-    assert_eq!(match_directive("; foo: bar  ", "foo:"), Some("bar"));
-    assert_eq!(match_directive(" foo:bar", "foo:"), Some("bar"));
-    assert_eq!(match_directive("foo:bar", "foo:"), Some("bar"));
-    assert_eq!(match_directive(";x foo: bar", "foo:"), None);
-    assert_eq!(match_directive(";;; foo: bar", "foo:"), Some("bar"));
-}
--- a/cranelift/src/wasm.rs
+++ b/cranelift/src/wasm.rs
@@ -1,19 +1,21 @@
 //! CLI tool to use the functions provided by the [cretonne-wasm](../cton_wasm/index.html) crate.
 //!
-//! Reads Wasm binary files, translates the functions' code to Cretonne IL.
+//! Reads Wasm binary files, translates the functions' code to Cretonne IR.
+#![cfg_attr(feature = "cargo-clippy", allow(too_many_arguments, cyclomatic_complexity))]

-use cton_wasm::{translate_module, DummyEnvironment, ModuleEnvironment};
-use std::path::PathBuf;
 use cretonne::Context;
+use cretonne::print_errors::{pretty_error, pretty_verifier_error};
 use cretonne::settings::FlagsOrIsa;
-use std::fs::File;
+use cton_wasm::{translate_module, DummyEnvironment, ModuleEnvironment};
 use std::error::Error;
+use std::fs::File;
 use std::io;
 use std::path::Path;
+use std::path::PathBuf;
 use std::process::Command;
 use tempdir::TempDir;
 use term;
-use utils::{pretty_verifier_error, pretty_error, parse_sets_and_isa, read_to_end};
+use utils::{parse_sets_and_isa, read_to_end};

 macro_rules! vprintln {
    ($x: expr, $($tts:tt)*) => {
@@ -37,8 +39,8 @@ pub fn run(
    flag_just_decode: bool,
    flag_check_translation: bool,
    flag_print: bool,
-    flag_set: Vec<String>,
-    flag_isa: String,
+    flag_set: &[String],
+    flag_isa: &str,
    flag_print_size: bool,
 ) -> Result<(), String> {
    let parsed = parse_sets_and_isa(flag_set, flag_isa)?;
@@ -52,8 +54,8 @@ pub fn run(
            flag_check_translation,
            flag_print,
            flag_print_size,
-            path.to_path_buf(),
-            name,
+            &path.to_path_buf(),
+            &name,
            parsed.as_fisa(),
        )?;
    }
@@ -66,8 +68,8 @@ fn handle_module(
    flag_check_translation: bool,
    flag_print: bool,
    flag_print_size: bool,
-    path: PathBuf,
-    name: String,
+    path: &PathBuf,
+    name: &str,
    fisa: FlagsOrIsa,
 ) -> Result<(), String> {
    let mut terminal = term::stdout().unwrap();
@@ -152,29 +154,27 @@ fn handle_module(
        context.func = func.clone();
        if flag_check_translation {
            context.verify(fisa).map_err(|err| {
-                pretty_verifier_error(&context.func, fisa.isa, err)
+                pretty_verifier_error(&context.func, fisa.isa, &err)
            })?;
-        } else {
-            if let Some(isa) = fisa.isa {
-                let compiled_size = context.compile(isa).map_err(|err| {
-                    pretty_error(&context.func, fisa.isa, err)
-                })?;
-                if flag_print_size {
-                    println!(
-                        "Function #{} code size: {} bytes",
-                        func_index,
-                        compiled_size
-                    );
-                    total_module_code_size += compiled_size;
-                    println!(
-                        "Function #{} bytecode size: {} bytes",
-                        func_index,
-                        dummy_environ.func_bytecode_sizes[func_index]
-                    );
-                }
-            } else {
-                return Err(String::from("compilation requires a target isa"));
+        } else if let Some(isa) = fisa.isa {
+            let compiled_size = context.compile(isa).map_err(|err| {
+                pretty_error(&context.func, fisa.isa, err)
+            })?;
+            if flag_print_size {
+                println!(
+                    "Function #{} code size: {} bytes",
+                    func_index,
+                    compiled_size
+                );
+                total_module_code_size += compiled_size;
+                println!(
+                    "Function #{} bytecode size: {} bytes",
+                    func_index,
+                    dummy_environ.func_bytecode_sizes[def_index]
+                );
            }
+        } else {
+            return Err(String::from("compilation requires a target isa"));
        }
        if flag_print {
            vprintln!(flag_verbose, "");
@@ -193,10 +193,7 @@ fn handle_module(

    if !flag_check_translation && flag_print_size {
        println!("Total module code size: {} bytes", total_module_code_size);
-        let total_bytecode_size = dummy_environ.func_bytecode_sizes.iter().fold(
-            0,
-            |sum, x| sum + x,
-        );
+        let total_bytecode_size: usize = dummy_environ.func_bytecode_sizes.iter().sum();
        println!("Total module bytecode size: {} bytes", total_bytecode_size);
    }

--- a/cranelift/test-all.sh
+++ b/cranelift/test-all.sh
@@ -3,11 +3,10 @@ set -euo pipefail

 # This is the top-level test script:
 #
-# - Build documentation for Rust code in 'src/tools/target/doc'.
-# - Run unit tests for all Rust crates.
-# - Make a debug build of all crates.
-# - Make a release build of cton-util.
-# - Run file-level tests with the release build of cton-util.
+# - Make a debug build.
+# - Make a release build.
+# - Run unit tests for all Rust crates (including the filetests)
+# - Build API documentation.
 #
 # All tests run by this script should be passing at all times.

@@ -42,22 +41,26 @@ if [ -n "$needcheck" ]; then
    touch $tsfile || echo no target directory
 fi

-cd "$topdir"
-banner "Rust unit tests"
-cargo test --all
+# Make sure the code builds in debug mode.
+banner "Rust debug build"
+cargo build

-# Build cton-util for parser testing.
-cd "$topdir"
-banner "Rust documentation"
-echo "open $topdir/target/doc/cretonne/index.html"
+# Make sure the code builds in release mode, and run the unit tests. We run
+# these in release mode for speed, but note that the top-level Cargo.toml file
+# does enable debug assertions in release builds.
+banner "Rust release build and unit tests"
+cargo test --all --release
+
+# Make sure the documentation builds.
+banner "Rust documentation: $topdir/target/doc/cretonne/index.html"
 cargo doc
-banner "Rust release build"
-cargo build --release

-export CTONUTIL="$topdir/target/release/cton-util"
-
-cd "$topdir"
-banner "File tests"
-"$CTONUTIL" test filetests docs
+# Run clippy if we have it.
+banner "Rust linter"
+if $topdir/check-clippy.sh; then
+    $topdir/clippy-all.sh --write-mode=diff
+else
+    echo "\`cargo +nightly install clippy\` for optional rust linting"
+fi

 banner "OK"
--- a/cranelift/tests/filetests.rs
+++ b/cranelift/tests/filetests.rs
@@ -0,0 +1,7 @@
+extern crate cton_filetests;
+
+#[test]
+fn filetests() {
+    // Run all the filetests in the following directories.
+    cton_filetests::run(false, &["filetests".into(), "docs".into()]).expect("test harness");
+}
				`@@ -0,0 +1 @@`
				`doc-valid-idents = [ "WebAssembly", "NaN", "SetCC" ]`