diff --git a/README.rst b/README.rst
index 0186bc57f8..51ee362d10 100644
--- a/README.rst
+++ b/README.rst
@@ -3,7 +3,7 @@ Cretonne Code Generator
 =======================
 
 Cretonne is a low-level retargetable code generator. It translates a `target-independent
-intermediate language <http://cretonne.readthedocs.io/en/latest/langref.html>`_ into executable
+intermediate language <https://cretonne.readthedocs.io/en/latest/langref.html>`_ into executable
 machine code.
 
 *This is a work in progress that is not yet functional.*
@@ -100,7 +100,7 @@ Building the documentation
 --------------------------
 
 To build the Cretonne documentation, you need the `Sphinx documentation
-generator <http://www.sphinx-doc.org/>`_::
+generator <https://www.sphinx-doc.org/>`_::
 
     $ pip install sphinx sphinx-autobuild sphinx_rtd_theme
     $ cd cretonne/docs
diff --git a/check-rustfmt.sh b/check-rustfmt.sh
index 483a45396a..1983493342 100755
--- a/check-rustfmt.sh
+++ b/check-rustfmt.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
-#
+set -euo pipefail
+
 # Usage: check-rustfmt.sh [--install]
 #
 # Check that the desired version of rustfmt is installed.
diff --git a/cranelift/Cargo.toml b/cranelift/Cargo.toml
index 91fe8ed6d9..297e8c31b3 100644
--- a/cranelift/Cargo.toml
+++ b/cranelift/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "cretonne-tools"
 authors = ["The Cretonne Project Developers"]
-version = "0.1.0"
+version = "0.3.4"
 description = "Binaries for testing the Cretonne library"
 license = "Apache-2.0"
 documentation = "https://cretonne.readthedocs.io/"
@@ -13,18 +13,18 @@ name = "cton-util"
 path = "src/cton-util.rs"
 
 [dependencies]
-cretonne = { path = "lib/cretonne", version = "0.1.0" }
-cretonne-reader = { path = "lib/reader", version = "0.1.0" }
-cretonne-frontend = { path = "lib/frontend", version = "0.1.0" }
-cretonne-wasm = { path = "lib/wasm", version = "0.1.0" }
-cretonne-native = { path = "lib/native", version = "0.1.0" }
+cretonne = { path = "lib/cretonne", version = "0.3.4" }
+cretonne-reader = { path = "lib/reader", version = "0.3.4" }
+cretonne-frontend = { path = "lib/frontend", version = "0.3.4" }
+cretonne-wasm = { path = "lib/wasm", version = "0.3.4" }
+cretonne-native = { path = "lib/native", version = "0.3.4" }
 filecheck = { path = "lib/filecheck" }
 docopt = "0.8.0"
 serde = "1.0.8"
 serde_derive = "1.0.8"
 num_cpus = "1.5.1"
 tempdir="0.3.5"
-term = "0.4.6"
+term = "0.5"
 
 [workspace]
 
diff --git a/cranelift/docs/Makefile b/cranelift/docs/Makefile
index 635aafde1c..335779f1f3 100644
--- a/cranelift/docs/Makefile
+++ b/cranelift/docs/Makefile
@@ -1,196 +1,24 @@
-# Makefile for Sphinx documentation
+# Minimal makefile for Sphinx documentation
 #
 
 # You can set these variables from the command line.
 SPHINXOPTS    =
 SPHINXBUILD   = sphinx-build
 SPHINXABUILD  = sphinx-autobuild
-PAPER         =
+SPHINXPROJ    = cretonne
+SOURCEDIR     = .
 BUILDDIR      = _build
 
-# User-friendly check for sphinx-build
-ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
-$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
-endif
-
-# Internal variables.
-PAPEROPT_a4     = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-# the i18n builder cannot share the environment and doctrees with the others
-I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-
-.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
-
+# Put it first so that "make" without argument is like "make help".
 help:
-	@echo "Please use \`make <target>' where <target> is one of"
-	@echo "  html       to make standalone HTML files"
-	@echo "  dirhtml    to make HTML files named index.html in directories"
-	@echo "  singlehtml to make a single large HTML file"
-	@echo "  pickle     to make pickle files"
-	@echo "  json       to make JSON files"
-	@echo "  htmlhelp   to make HTML files and a HTML help project"
-	@echo "  qthelp     to make HTML files and a qthelp project"
-	@echo "  applehelp  to make an Apple Help Book"
-	@echo "  devhelp    to make HTML files and a Devhelp project"
-	@echo "  epub       to make an epub"
-	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
-	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
-	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
-	@echo "  text       to make text files"
-	@echo "  man        to make manual pages"
-	@echo "  texinfo    to make Texinfo files"
-	@echo "  info       to make Texinfo files and run them through makeinfo"
-	@echo "  gettext    to make PO message catalogs"
-	@echo "  changes    to make an overview of all changed/added/deprecated items"
-	@echo "  xml        to make Docutils-native XML files"
-	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
-	@echo "  linkcheck  to check all external links for integrity"
-	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
-	@echo "  coverage   to run coverage check of the documentation (if enabled)"
-
-clean:
-	rm -rf $(BUILDDIR)/*
-
-html:
-	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
-	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
 autohtml: html
 	$(SPHINXABUILD) -z ../lib/cretonne/meta --ignore '.*' -b html -E $(ALLSPHINXOPTS) $(BUILDDIR)/html
 
-dirhtml:
-	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
-	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+.PHONY: help Makefile
 
-singlehtml:
-	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
-	@echo
-	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
-
-pickle:
-	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
-	@echo
-	@echo "Build finished; now you can process the pickle files."
-
-json:
-	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
-	@echo
-	@echo "Build finished; now you can process the JSON files."
-
-htmlhelp:
-	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
-	@echo
-	@echo "Build finished; now you can run HTML Help Workshop with the" \
-	      ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-qthelp:
-	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
-	@echo
-	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
-	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
-	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/cretonne.qhcp"
-	@echo "To view the help file:"
-	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/cretonne.qhc"
-
-applehelp:
-	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
-	@echo
-	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
-	@echo "N.B. You won't be able to view it unless you put it in" \
-	      "~/Library/Documentation/Help or install it in your application" \
-	      "bundle."
-
-devhelp:
-	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
-	@echo
-	@echo "Build finished."
-	@echo "To view the help file:"
-	@echo "# mkdir -p $$HOME/.local/share/devhelp/cretonne"
-	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/cretonne"
-	@echo "# devhelp"
-
-epub:
-	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
-	@echo
-	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
-
-latex:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo
-	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
-	@echo "Run \`make' in that directory to run these through (pdf)latex" \
-	      "(use \`make latexpdf' here to do that automatically)."
-
-latexpdf:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo "Running LaTeX files through pdflatex..."
-	$(MAKE) -C $(BUILDDIR)/latex all-pdf
-	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-latexpdfja:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo "Running LaTeX files through platex and dvipdfmx..."
-	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
-	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-text:
-	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
-	@echo
-	@echo "Build finished. The text files are in $(BUILDDIR)/text."
-
-man:
-	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
-	@echo
-	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
-
-texinfo:
-	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
-	@echo
-	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
-	@echo "Run \`make' in that directory to run these through makeinfo" \
-	      "(use \`make info' here to do that automatically)."
-
-info:
-	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
-	@echo "Running Texinfo files through makeinfo..."
-	make -C $(BUILDDIR)/texinfo info
-	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
-
-gettext:
-	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
-	@echo
-	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
-
-changes:
-	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
-	@echo
-	@echo "The overview file is in $(BUILDDIR)/changes."
-
-linkcheck:
-	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
-	@echo
-	@echo "Link check complete; look for any errors in the above output " \
-	      "or in $(BUILDDIR)/linkcheck/output.txt."
-
-doctest:
-	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
-	@echo "Testing of doctests in the sources finished, look at the " \
-	      "results in $(BUILDDIR)/doctest/output.txt."
-
-coverage:
-	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
-	@echo "Testing of coverage in the sources finished, look at the " \
-	      "results in $(BUILDDIR)/coverage/python.txt."
-
-xml:
-	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
-	@echo
-	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
-
-pseudoxml:
-	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
-	@echo
-	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/cranelift/docs/compare-llvm.rst b/cranelift/docs/compare-llvm.rst
index 03b82dc7f7..0180f6cc92 100644
--- a/cranelift/docs/compare-llvm.rst
+++ b/cranelift/docs/compare-llvm.rst
@@ -2,9 +2,9 @@
 Cretonne compared to LLVM
 *************************
 
-`LLVM <http://llvm.org>`_ is a collection of compiler components implemented as
+`LLVM <https://llvm.org>`_ is a collection of compiler components implemented as
 a set of C++ libraries. It can be used to build both JIT compilers and static
-compilers like `Clang <http://clang.llvm.org>`_, and it is deservedly very
+compilers like `Clang <https://clang.llvm.org>`_, and it is deservedly very
 popular. `Chris Lattner's chapter about LLVM
 <http://www.aosabook.org/en/llvm.html>`_ in the `Architecture of Open Source
 Applications <http://aosabook.org/en/index.html>`_ book gives an excellent
@@ -40,7 +40,7 @@ Intermediate representations
 LLVM uses multiple intermediate representations as it translates a program to
 binary machine code:
 
-`LLVM IR <http://llvm.org/docs/LangRef.html>`_
+`LLVM IR <https://llvm.org/docs/LangRef.html>`_
     This is the primary intermediate language which has textual, binary, and
     in-memory representations. It serves two main purposes:
 
@@ -49,7 +49,7 @@ binary machine code:
     - Intermediate representation for common mid-level optimizations. A large
       library of code analysis and transformation passes operate on LLVM IR.
 
-`SelectionDAG <http://llvm.org/docs/CodeGenerator.html#instruction-selection-section>`_
+`SelectionDAG <https://llvm.org/docs/CodeGenerator.html#instruction-selection-section>`_
     A graph-based representation of the code in a single basic block is used by
     the instruction selector. It has both ISA-agnostic and ISA-specific
     opcodes. These main passes are run on the SelectionDAG representation:
@@ -65,7 +65,7 @@ binary machine code:
     The SelectionDAG representation automatically eliminates common
     subexpressions and dead code.
 
-`MachineInstr <http://llvm.org/docs/CodeGenerator.html#machine-code-representation>`_
+`MachineInstr <https://llvm.org/docs/CodeGenerator.html#machine-code-representation>`_
     A linear representation of ISA-specific instructions that initially is in
     SSA form, but it can also represent non-SSA form during and after register
     allocation. Many low-level optimizations run on MI code. The most important
@@ -74,7 +74,7 @@ binary machine code:
     - Scheduling.
     - Register allocation.
 
-`MC <http://llvm.org/docs/CodeGenerator.html#the-mc-layer>`_
+`MC <https://llvm.org/docs/CodeGenerator.html#the-mc-layer>`_
     MC serves as the output abstraction layer and is the basis for LLVM's
     integrated assembler. It is used for:
 
@@ -126,7 +126,7 @@ condition is false. The Cretonne representation is closer to how machine code
 works; LLVM's representation is more abstract.
 
 LLVM uses `phi instructions
-<http://llvm.org/docs/LangRef.html#phi-instruction>`_ in its SSA
+<https://llvm.org/docs/LangRef.html#phi-instruction>`_ in its SSA
 representation. Cretonne passes arguments to EBBs instead. The two
 representations are equivalent, but the EBB arguments are better suited to
 handle EBBs that may contain multiple branches to the same destination block
diff --git a/cranelift/docs/conf.py b/cranelift/docs/conf.py
index 0603a27bfd..20cfd32f91 100644
--- a/cranelift/docs/conf.py
+++ b/cranelift/docs/conf.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 #
 # cretonne documentation build configuration file, created by
-# sphinx-quickstart on Fri Jan  8 10:11:19 2016.
+# sphinx-quickstart on Fri Mar  2 12:49:24 2018.
 #
 # This file is execfile()d with the current directory set to its
 # containing dir.
@@ -12,14 +12,13 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-from __future__ import absolute_import
-import sys
-import os
-
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
+
+from __future__ import absolute_import
+import os
+import sys
 sys.path.insert(0, os.path.abspath('.'))
 
 # Also add the meta directory to sys.path so autodoc can find the Cretonne meta
@@ -28,6 +27,10 @@ sys.path.insert(0, os.path.abspath('../lib/cretonne/meta'))
 
 # -- General configuration ------------------------------------------------
 
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
@@ -47,6 +50,7 @@ templates_path = ['_templates']
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
+#
 # source_suffix = ['.rst', '.md']
 source_suffix = '.rst'
 
@@ -55,7 +59,7 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'cretonne'
-copyright = u'2016, Cretonne Developers'
+copyright = u'2018, Cretonne Developers'
 author = u'Cretonne Developers'
 
 # The version info for the project you're documenting, acts as replacement for
@@ -76,7 +80,8 @@ language = None
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build']
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
@@ -89,22 +94,54 @@ todo_include_todos = True
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
+#
 html_theme = 'sphinx_rtd_theme'
 
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+#
+# html_static_path = ['_static']
+
+
+# -- Options for HTMLHelp output ------------------------------------------
+
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'cretonnedoc'
 
+
 # -- Options for LaTeX output ---------------------------------------------
 
 latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-  (master_doc, 'cretonne.tex', u'cretonne Documentation',
-   author, 'manual'),
+    (master_doc, 'cretonne.tex', u'cretonne Documentation',
+     author, 'manual'),
 ]
 
 
@@ -124,9 +161,9 @@ man_pages = [
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-  (master_doc, 'cretonne', u'cretonne Documentation',
-   author, 'cretonne', 'One line description of project.',
-   'Miscellaneous'),
+    (master_doc, 'cretonne', u'cretonne Documentation',
+     author, 'cretonne', 'One line description of project.',
+     'Miscellaneous'),
 ]
 
 
diff --git a/cranelift/docs/example.cton b/cranelift/docs/example.cton
index 1a3117c521..a36fef3238 100644
--- a/cranelift/docs/example.cton
+++ b/cranelift/docs/example.cton
@@ -1,7 +1,7 @@
 test verifier
 
 function %average(i32, i32) -> f32 native {
-    ss1 = local 8            ; Stack slot for ``sum``.
+    ss1 = explicit_slot 8         ; Stack slot for ``sum``.
 
 ebb1(v1: i32, v2: i32):
     v3 = f64const 0x0.0
diff --git a/cranelift/docs/index.rst b/cranelift/docs/index.rst
index 301346a9e3..c2e5b6d5ef 100644
--- a/cranelift/docs/index.rst
+++ b/cranelift/docs/index.rst
@@ -12,6 +12,28 @@ Contents:
    regalloc
    compare-llvm
 
+Rust Crate Documentation
+========================
+
+`cretonne <https://docs.rs/cretonne/>`_
+    This is the core code generator crate. It takes Cretonne IR as input
+    and emits encoded machine instructions, along with symbolic relocations,
+    as output.
+
+`cretonne-wasm <https://docs.rs/cretonne-wasm/>`_
+    This crate translates WebAssembly code into Cretonne IR.
+
+`cretonne-frontend <https://docs.rs/cretonne-frontend/>`_
+    This crate provides utilities for translating code into Cretonne IR.
+
+`cretonne-native <https://docs.rs/cretonne-native/>`_
+    This crate performs auto-detection of the host, allowing Cretonne to
+    generate code optimized for the machine it's running on.
+
+`cretonne-reader <https://docs.rs/cretonne-reader/>`_
+    This crate translates from Cretonne IR's text format into Cretonne IR
+    in in-memory data structures.
+
 Indices and tables
 ==================
 
diff --git a/cranelift/docs/langref.rst b/cranelift/docs/langref.rst
index d80554dc67..3a855c32b4 100644
--- a/cranelift/docs/langref.rst
+++ b/cranelift/docs/langref.rst
@@ -37,7 +37,7 @@ The first line of a function definition provides the function *name* and
 the :term:`function signature` which declares the parameter and return types.
 Then follows the :term:`function preamble` which declares a number of entities
 that can be referenced inside the function. In the example above, the preamble
-declares a single local variable, ``ss1``.
+declares a single explicit stack slot, ``ss1``.
 
 After the preamble follows the :term:`function body` which consists of
 :term:`extended basic block`\s (EBBs), the first of which is the
@@ -440,7 +440,7 @@ Cretonne provides fully general :inst:`load` and :inst:`store` instructions for
 accessing memory, as well as :ref:`extending loads and truncating stores
 <extload-truncstore>`.
 
-If the memory at the given addresss is not :term:`addressable`, the behavior of
+If the memory at the given address is not :term:`addressable`, the behavior of
 these instructions is undefined. If it is addressable but not
 :term:`accessible`, they :term:`trap`.
 
@@ -471,8 +471,8 @@ the expected alignment. By default, misaligned loads and stores are allowed,
 but when the ``aligned`` flag is set, a misaligned memory access is allowed to
 :term:`trap`.
 
-Local variables
----------------
+Explicit Stack Slots
+--------------------
 
 One set of restricted memory operations access the current function's stack
 frame. The stack frame is divided into fixed-size stack slots that are
@@ -480,9 +480,9 @@ allocated in the :term:`function preamble`. Stack slots are not typed, they
 simply represent a contiguous sequence of :term:`accessible` bytes in the stack
 frame.
 
-.. inst:: SS = local Bytes, Flags...
+.. inst:: SS = explicit_slot Bytes, Flags...
 
-    Allocate a stack slot for a local variable in the preamble.
+    Allocate a stack slot in the preamble.
 
     If no alignment is specified, Cretonne will pick an appropriate alignment
     for the stack slot based on its size and access patterns.
@@ -559,7 +559,7 @@ runtime data structures.
     The address of GV can be computed by first loading a pointer from BaseGV
     and adding Offset to it.
 
-    It is assumed the BaseGV resides in readable memory with the apropriate
+    It is assumed the BaseGV resides in readable memory with the appropriate
     alignment for storing a pointer.
 
     Chains of ``deref`` global variables are possible, but cycles are not
@@ -782,7 +782,7 @@ Integer operations
 
     For example, see
     `llvm.sadd.with.overflow.*` and `llvm.ssub.with.overflow.*` in
-    `LLVM <http://llvm.org/docs/LangRef.html#arithmetic-with-overflow-intrinsics>`_.
+    `LLVM <https://llvm.org/docs/LangRef.html#arithmetic-with-overflow-intrinsics>`_.
 
 .. autoinst:: imul
 .. autoinst:: imul_imm
@@ -1135,7 +1135,7 @@ Glossary
         A list of declarations of entities that are used by the function body.
         Some of the entities that can be declared in the preamble are:
 
-        - Local variables.
+        - Stack slots.
         - Functions that are called directly.
         - Function signatures for indirect function calls.
         - Function flags and attributes that are not part of the signature.
@@ -1160,7 +1160,19 @@ Glossary
 
     stack slot
         A fixed size memory allocation in the current function's activation
-        frame. Also called a local variable.
+        frame. These include :term:`explicit stack slot`\s and
+        :term:`spill stack slot`\s.
+
+    explicit stack slot
+        A fixed size memory allocation in the current function's activation
+        frame. These differ from :term:`spill stack slot`\s in that they can
+        be created by frontends and they may have their addresses taken.
+
+    spill stack slot
+        A fixed size memory allocation in the current function's activation
+        frame. These differ from :term:`explicit stack slot`\s in that they are
+        only created during register allocation, and they may not have their
+        address taken.
 
     terminator instruction
         A control flow instruction that unconditionally directs the flow of
diff --git a/cranelift/docs/make.bat b/cranelift/docs/make.bat
index 3f6fe2e48d..2958a2ba6e 100644
--- a/cranelift/docs/make.bat
+++ b/cranelift/docs/make.bat
@@ -1,62 +1,19 @@
 @ECHO OFF
 
+pushd %~dp0
+
 REM Command file for Sphinx documentation
 
 if "%SPHINXBUILD%" == "" (
 	set SPHINXBUILD=sphinx-build
 )
+set SOURCEDIR=.
 set BUILDDIR=_build
-set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
-set I18NSPHINXOPTS=%SPHINXOPTS% .
-if NOT "%PAPER%" == "" (
-	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
-	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
-)
+set SPHINXPROJ=cretonne
 
 if "%1" == "" goto help
 
-if "%1" == "help" (
-	:help
-	echo.Please use `make ^<target^>` where ^<target^> is one of
-	echo.  html       to make standalone HTML files
-	echo.  dirhtml    to make HTML files named index.html in directories
-	echo.  singlehtml to make a single large HTML file
-	echo.  pickle     to make pickle files
-	echo.  json       to make JSON files
-	echo.  htmlhelp   to make HTML files and a HTML help project
-	echo.  qthelp     to make HTML files and a qthelp project
-	echo.  devhelp    to make HTML files and a Devhelp project
-	echo.  epub       to make an epub
-	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
-	echo.  text       to make text files
-	echo.  man        to make manual pages
-	echo.  texinfo    to make Texinfo files
-	echo.  gettext    to make PO message catalogs
-	echo.  changes    to make an overview over all changed/added/deprecated items
-	echo.  xml        to make Docutils-native XML files
-	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
-	echo.  linkcheck  to check all external links for integrity
-	echo.  doctest    to run all doctests embedded in the documentation if enabled
-	echo.  coverage   to run coverage check of the documentation if enabled
-	goto end
-)
-
-if "%1" == "clean" (
-	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
-	del /q /s %BUILDDIR%\*
-	goto end
-)
-
-
-REM Check if sphinx-build is available and fallback to Python version if any
-%SPHINXBUILD% 1>NUL 2>NUL
-if errorlevel 9009 goto sphinx_python
-goto sphinx_ok
-
-:sphinx_python
-
-set SPHINXBUILD=python -m sphinx.__init__
-%SPHINXBUILD% 2> nul
+%SPHINXBUILD% >NUL 2>NUL
 if errorlevel 9009 (
 	echo.
 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
@@ -65,199 +22,15 @@ if errorlevel 9009 (
 	echo.may add the Sphinx directory to PATH.
 	echo.
 	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
+	echo.https://sphinx-doc.org/
 	exit /b 1
 )
 
-:sphinx_ok
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
 
-
-if "%1" == "html" (
-	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
-	goto end
-)
-
-if "%1" == "dirhtml" (
-	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
-	goto end
-)
-
-if "%1" == "singlehtml" (
-	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
-	goto end
-)
-
-if "%1" == "pickle" (
-	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can process the pickle files.
-	goto end
-)
-
-if "%1" == "json" (
-	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can process the JSON files.
-	goto end
-)
-
-if "%1" == "htmlhelp" (
-	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can run HTML Help Workshop with the ^
-.hhp project file in %BUILDDIR%/htmlhelp.
-	goto end
-)
-
-if "%1" == "qthelp" (
-	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can run "qcollectiongenerator" with the ^
-.qhcp project file in %BUILDDIR%/qthelp, like this:
-	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\cretonne.qhcp
-	echo.To view the help file:
-	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\cretonne.ghc
-	goto end
-)
-
-if "%1" == "devhelp" (
-	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished.
-	goto end
-)
-
-if "%1" == "epub" (
-	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The epub file is in %BUILDDIR%/epub.
-	goto end
-)
-
-if "%1" == "latex" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "latexpdf" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	cd %BUILDDIR%/latex
-	make all-pdf
-	cd %~dp0
-	echo.
-	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "latexpdfja" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	cd %BUILDDIR%/latex
-	make all-pdf-ja
-	cd %~dp0
-	echo.
-	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "text" (
-	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The text files are in %BUILDDIR%/text.
-	goto end
-)
-
-if "%1" == "man" (
-	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The manual pages are in %BUILDDIR%/man.
-	goto end
-)
-
-if "%1" == "texinfo" (
-	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
-	goto end
-)
-
-if "%1" == "gettext" (
-	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
-	goto end
-)
-
-if "%1" == "changes" (
-	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.The overview file is in %BUILDDIR%/changes.
-	goto end
-)
-
-if "%1" == "linkcheck" (
-	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Link check complete; look for any errors in the above output ^
-or in %BUILDDIR%/linkcheck/output.txt.
-	goto end
-)
-
-if "%1" == "doctest" (
-	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Testing of doctests in the sources finished, look at the ^
-results in %BUILDDIR%/doctest/output.txt.
-	goto end
-)
-
-if "%1" == "coverage" (
-	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Testing of coverage in the sources finished, look at the ^
-results in %BUILDDIR%/coverage/python.txt.
-	goto end
-)
-
-if "%1" == "xml" (
-	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The XML files are in %BUILDDIR%/xml.
-	goto end
-)
-
-if "%1" == "pseudoxml" (
-	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
-	goto end
-)
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
 
 :end
+popd
diff --git a/cranelift/docs/testing.rst b/cranelift/docs/testing.rst
index 951149c044..2b84b1c19b 100644
--- a/cranelift/docs/testing.rst
+++ b/cranelift/docs/testing.rst
@@ -119,7 +119,7 @@ All types of tests allow shared Cretonne settings to be modified:
     option        : flag | setting "=" value
 
 The shared settings available for all target ISAs are defined in
-:file:`lib/cretonne/meta/cretonne/settings.py`.
+:file:`lib/cretonne/meta/base/settings.py`.
 
 The ``set`` lines apply settings cumulatively::
 
diff --git a/cranelift/filetests/isa/intel/binary32.cton b/cranelift/filetests/isa/intel/binary32.cton
index 7d3e3a683b..df98a86515 100644
--- a/cranelift/filetests/isa/intel/binary32.cton
+++ b/cranelift/filetests/isa/intel/binary32.cton
@@ -552,3 +552,35 @@ ebb1:
 
     return
 }
+
+; Tests for i32/i8 conversion instructions.
+function %I32_I8() {
+ebb0:
+    [-,%rcx]            v1 = iconst.i32 1
+
+    [-,%rcx]            v11 = ireduce.i8 v1             ; bin:
+
+    ; asm: movsbl %cl, %esi
+    [-,%rsi]            v20 = sextend.i32 v11           ; bin: 0f be f1
+
+    ; asm: movzbl %cl, %esi
+    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b6 f1
+
+    trap user0                                          ; bin: 0f 0b
+}
+
+; Tests for i32/i16 conversion instructions.
+function %I32_I16() {
+ebb0:
+    [-,%rcx]            v1 = iconst.i32 1
+
+    [-,%rcx]            v11 = ireduce.i16 v1            ; bin:
+
+    ; asm: movswl %cx, %esi
+    [-,%rsi]            v20 = sextend.i32 v11           ; bin: 0f bf f1
+
+    ; asm: movzwl %cx, %esi
+    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b7 f1
+
+    trap user0                                          ; bin: 0f 0b
+}
diff --git a/cranelift/filetests/isa/intel/binary64-float.cton b/cranelift/filetests/isa/intel/binary64-float.cton
index a6f47e53db..69bfeb6c36 100644
--- a/cranelift/filetests/isa/intel/binary64-float.cton
+++ b/cranelift/filetests/isa/intel/binary64-float.cton
@@ -20,6 +20,7 @@ ebb0:
     [-,%rsi]            v1 = iconst.i32 2
     [-,%rax]            v2 = iconst.i64 11
     [-,%r14]            v3 = iconst.i64 12
+    [-,%r13]            v4 = iconst.i64 13
 
     ; asm: cvtsi2ssl %r11d, %xmm5
     [-,%xmm5]           v10 = fcvt_from_sint.f32 v0             ; bin: f3 41 0f 2a eb
@@ -173,6 +174,10 @@ ebb0:
     [-]                 store.f32 v100, v3                      ; bin: 66 41 0f 7e 2e
     ; asm: movd %xmm10, (%rax)
     [-]                 store.f32 v101, v2                      ; bin: 66 44 0f 7e 10
+    ; asm: movd %xmm5, (%r13)
+    [-]                 store.f32 v100, v4                      ; bin: 66 41 0f 7e 6d 00
+    ; asm: movd %xmm10, (%r13)
+    [-]                 store.f32 v101, v4                      ; bin: 66 45 0f 7e 55 00
     ; asm: movd %xmm5, 50(%r14)
     [-]                 store.f32 v100, v3+50                   ; bin: 66 41 0f 7e 6e 32
     ; asm: movd %xmm10, -50(%rax)
@@ -250,6 +255,7 @@ ebb0:
     [-,%rsi]            v1 = iconst.i32 2
     [-,%rax]            v2 = iconst.i64 11
     [-,%r14]            v3 = iconst.i64 12
+    [-,%r13]            v4 = iconst.i64 13
 
     ; asm: cvtsi2sdl %r11d, %xmm5
     [-,%xmm5]           v10 = fcvt_from_sint.f64 v0             ; bin: f2 41 0f 2a eb
@@ -403,6 +409,10 @@ ebb0:
     [-]                 store.f64 v100, v3                      ; bin: 66 41 0f d6 2e
     ; asm: movq %xmm10, (%rax)
     [-]                 store.f64 v101, v2                      ; bin: 66 44 0f d6 10
+    ; asm: movq %xmm5, (%r13)
+    [-]                 store.f64 v100, v4                      ; bin: 66 41 0f d6 6d 00
+    ; asm: movq %xmm10, (%r13)
+    [-]                 store.f64 v101, v4                      ; bin: 66 45 0f d6 55 00
     ; asm: movq %xmm5, 50(%r14)
     [-]                 store.f64 v100, v3+50                   ; bin: 66 41 0f d6 6e 32
     ; asm: movq %xmm10, -50(%rax)
diff --git a/cranelift/filetests/isa/intel/binary64.cton b/cranelift/filetests/isa/intel/binary64.cton
index 0b546961d8..9eca950ee6 100644
--- a/cranelift/filetests/isa/intel/binary64.cton
+++ b/cranelift/filetests/isa/intel/binary64.cton
@@ -336,6 +336,28 @@ ebb0:
     ; asm: divq %r10
     [-,%rax,%rdx] v202, v203 = x86_udivmodx v190, v191, v3  ; bin: 49 f7 f2
 
+    ; double-length multiply instructions, 64 bit
+    [-,%rax]       v1001 = iconst.i64 1
+    [-,%r15]       v1002 = iconst.i64 2
+    ; asm: mulq %r15
+    [-,%rax,%rdx]  v1003, v1004 = x86_umulx v1001, v1002 ; bin: 49 f7 e7
+    ; asm: imulq %r15
+    [-,%rax,%rdx]  v1005, v1006 = x86_smulx v1001, v1002 ; bin: 49 f7 ef
+
+    ; double-length multiply instructions, 32 bit
+    [-,%rax]       v1011 = iconst.i32 1
+    [-,%r15]       v1012 = iconst.i32 2
+    [-,%rcx]       v1017 = iconst.i32 3
+    ; asm: mull %r15d
+    [-,%rax,%rdx]  v1013, v1014 = x86_umulx v1011, v1012    ; bin: 41 f7 e7
+    ; asm: imull %r15d
+    [-,%rax,%rdx]  v1015, v1016 = x86_smulx v1011, v1012    ; bin: 41 f7 ef
+
+    ; asm: mull %ecx
+    [-,%rax,%rdx]  v1018, v1019 = x86_umulx v1011, v1017    ; bin: f7 e1
+    ; asm: imull %ecx
+    [-,%rax,%rdx]  v1020, v1021 = x86_smulx v1011, v1017    ; bin: f7 e9
+
     ; Bit-counting instructions.
 
     ; asm: popcntq %rsi, %rcx
@@ -1062,6 +1084,118 @@ ebb2:
     jump ebb1                                   ; bin: eb fd
 }
 
+; Tests for i32/i8 conversion instructions.
+function %I32_I8() {
+ebb0:
+    [-,%rcx]            v1 = iconst.i32 1
+    [-,%rsi]            v2 = iconst.i32 2
+    [-,%r10]            v3 = iconst.i32 3
+
+    [-,%rcx]            v11 = ireduce.i8 v1             ; bin:
+    [-,%rsi]            v12 = ireduce.i8 v2             ; bin:
+    [-,%r10]            v13 = ireduce.i8 v3             ; bin:
+
+    ; asm: movsbl %cl, %esi
+    [-,%rsi]            v20 = sextend.i32 v11           ; bin: 0f be f1
+    ; asm: movsbl %sil, %r10d
+    [-,%r10]            v21 = sextend.i32 v12           ; bin: 44 0f be d6
+    ; asm: movsbl %r10b, %ecx
+    [-,%rcx]            v22 = sextend.i32 v13           ; bin: 41 0f be ca
+
+    ; asm: movzbl %cl, %esi
+    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b6 f1
+    ; asm: movzbl %sil, %r10d
+    [-,%r10]            v31 = uextend.i32 v12           ; bin: 44 0f b6 d6
+    ; asm: movzbl %r10b, %ecx
+    [-,%rcx]            v32 = uextend.i32 v13           ; bin: 41 0f b6 ca
+
+    trap user0                                          ; bin: 0f 0b
+}
+
+; Tests for i32/i16 conversion instructions.
+function %I32_I16() {
+ebb0:
+    [-,%rcx]            v1 = iconst.i32 1
+    [-,%rsi]            v2 = iconst.i32 2
+    [-,%r10]            v3 = iconst.i32 3
+
+    [-,%rcx]            v11 = ireduce.i16 v1            ; bin:
+    [-,%rsi]            v12 = ireduce.i16 v2            ; bin:
+    [-,%r10]            v13 = ireduce.i16 v3            ; bin:
+
+    ; asm: movswl %cx, %esi
+    [-,%rsi]            v20 = sextend.i32 v11           ; bin: 0f bf f1
+    ; asm: movswl %si, %r10d
+    [-,%r10]            v21 = sextend.i32 v12           ; bin: 44 0f bf d6
+    ; asm: movswl %r10w, %ecx
+    [-,%rcx]            v22 = sextend.i32 v13           ; bin: 41 0f bf ca
+
+    ; asm: movzwl %cx, %esi
+    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b7 f1
+    ; asm: movzwl %si, %r10d
+    [-,%r10]            v31 = uextend.i32 v12           ; bin: 44 0f b7 d6
+    ; asm: movzwl %r10w, %ecx
+    [-,%rcx]            v32 = uextend.i32 v13           ; bin: 41 0f b7 ca
+
+    trap user0                                          ; bin: 0f 0b
+}
+
+; Tests for i64/i8 conversion instructions.
+function %I64_I8() {
+ebb0:
+    [-,%rcx]            v1 = iconst.i64 1
+    [-,%rsi]            v2 = iconst.i64 2
+    [-,%r10]            v3 = iconst.i64 3
+
+    [-,%rcx]            v11 = ireduce.i8 v1             ; bin:
+    [-,%rsi]            v12 = ireduce.i8 v2             ; bin:
+    [-,%r10]            v13 = ireduce.i8 v3             ; bin:
+
+    ; asm: movsbq %cl, %rsi
+    [-,%rsi]            v20 = sextend.i64 v11           ; bin: 48 0f be f1
+    ; asm: movsbq %sil, %r10
+    [-,%r10]            v21 = sextend.i64 v12           ; bin: 4c 0f be d6
+    ; asm: movsbq %r10b, %rcx
+    [-,%rcx]            v22 = sextend.i64 v13           ; bin: 49 0f be ca
+
+    ; asm: movzbl %cl, %esi
+    [-,%rsi]            v30 = uextend.i64 v11           ; bin: 0f b6 f1
+    ; asm: movzbl %sil, %r10d
+    [-,%r10]            v31 = uextend.i64 v12           ; bin: 44 0f b6 d6
+    ; asm: movzbl %r10b, %ecx
+    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 41 0f b6 ca
+
+    trap user0                                          ; bin: 0f 0b
+}
+
+; Tests for i64/i16 conversion instructions.
+function %I64_I16() {
+ebb0:
+    [-,%rcx]            v1 = iconst.i64 1
+    [-,%rsi]            v2 = iconst.i64 2
+    [-,%r10]            v3 = iconst.i64 3
+
+    [-,%rcx]            v11 = ireduce.i16 v1            ; bin:
+    [-,%rsi]            v12 = ireduce.i16 v2            ; bin:
+    [-,%r10]            v13 = ireduce.i16 v3            ; bin:
+
+    ; asm: movswq %cx, %rsi
+    [-,%rsi]            v20 = sextend.i64 v11           ; bin: 48 0f bf f1
+    ; asm: movswq %si, %r10
+    [-,%r10]            v21 = sextend.i64 v12           ; bin: 4c 0f bf d6
+    ; asm: movswq %r10w, %rcx
+    [-,%rcx]            v22 = sextend.i64 v13           ; bin: 49 0f bf ca
+
+    ; asm: movzwl %cx, %esi
+    [-,%rsi]            v30 = uextend.i64 v11           ; bin: 0f b7 f1
+    ; asm: movzwl %si, %r10d
+    [-,%r10]            v31 = uextend.i64 v12           ; bin: 44 0f b7 d6
+    ; asm: movzwl %r10w, %ecx
+    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 41 0f b7 ca
+
+    trap user0                                          ; bin: 0f 0b
+}
+
 ; Tests for i64/i32 conversion instructions.
 function %I64_I32() {
 ebb0:
diff --git a/cranelift/filetests/isa/intel/legalize-mulhi.cton b/cranelift/filetests/isa/intel/legalize-mulhi.cton
new file mode 100644
index 0000000000..673a19db3b
--- /dev/null
+++ b/cranelift/filetests/isa/intel/legalize-mulhi.cton
@@ -0,0 +1,45 @@
+
+test compile
+set is_64bit
+isa intel baseline
+
+; umulhi/smulhi on 64 bit operands
+
+function %i64_umulhi(i64, i64) -> i64 {
+ebb0(v10: i64, v11: i64):
+  v12 = umulhi v10, v11
+  ; check: %rdi -> %rax
+  ; check: x86_umulx
+  ; check: %rdx -> %rax
+  return v12
+}
+
+function %i64_smulhi(i64, i64) -> i64 {
+ebb0(v20: i64, v21: i64):
+  v22 = smulhi v20, v21
+  ; check: %rdi -> %rax
+  ; check: x86_smulx
+  ; check: %rdx -> %rax
+  return v22
+}
+
+
+; umulhi/smulhi on 32 bit operands
+
+function %i32_umulhi(i32, i32) -> i32 {
+ebb0(v30: i32, v31: i32):
+  v32 = umulhi v30, v31
+  ; check: %rdi -> %rax
+  ; check: x86_umulx
+  ; check: %rdx -> %rax
+  return v32
+}
+
+function %i32_smulhi(i32, i32) -> i32 {
+ebb0(v40: i32, v41: i32):
+  v42 = smulhi v40, v41
+  ; check: %rdi -> %rax
+  ; check: x86_smulx
+  ; check: %rdx -> %rax
+  return v42
+}
diff --git a/cranelift/filetests/isa/intel/prologue-epilogue.cton b/cranelift/filetests/isa/intel/prologue-epilogue.cton
index 425b790fbf..fc53f72008 100644
--- a/cranelift/filetests/isa/intel/prologue-epilogue.cton
+++ b/cranelift/filetests/isa/intel/prologue-epilogue.cton
@@ -4,13 +4,13 @@ set is_compressed
 isa intel haswell
 
 function %foo() {
-    ss0 = local 168
+    ss0 = explicit_slot 168
 ebb0:
     return
 }
 
 ; check: function %foo(i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] native {
-; nextln:     ss0 = local 168, offset -224
+; nextln:     ss0 = explicit_slot 168, offset -224
 ; nextln:     ss1 = incoming_arg 56, offset -56
 ; check: ebb0(v0: i64 [%rbp], v1: i64 [%rbx], v2: i64 [%r12], v3: i64 [%r13], v4: i64 [%r14], v5: i64 [%r15]):
 ; nextln:     x86_push v0
@@ -29,4 +29,4 @@ ebb0:
 ; nextln:     v7 = x86_pop.i64
 ; nextln:     v6 = x86_pop.i64
 ; nextln:     return v6, v7, v8, v9, v10, v11
-; nextln: }
\ No newline at end of file
+; nextln: }
diff --git a/cranelift/filetests/parser/tiny.cton b/cranelift/filetests/parser/tiny.cton
index a5a6489808..0c619565a0 100644
--- a/cranelift/filetests/parser/tiny.cton
+++ b/cranelift/filetests/parser/tiny.cton
@@ -124,7 +124,7 @@ ebb0(v90: i32, v91: f32):
 ; Stack slot references
 function %stack() {
     ss10 = spill_slot 8
-    ss2 = local 4
+    ss2 = explicit_slot 4
     ss3 = incoming_arg 4, offset 8
     ss4 = outgoing_arg 4
     ss5 = emergency_slot 4
@@ -136,7 +136,7 @@ ebb0:
     stack_store v2, ss2
 }
 ; sameln: function %stack() native {
-; check:     ss2 = local 4
+; check:     ss2 = explicit_slot 4
 ; check:     ss3 = incoming_arg 4, offset 8
 ; check:     ss4 = outgoing_arg 4
 ; check:     ss5 = emergency_slot 4
diff --git a/cranelift/filetests/preopt/div_by_const_indirect.cton b/cranelift/filetests/preopt/div_by_const_indirect.cton
new file mode 100644
index 0000000000..ccc83cd49b
--- /dev/null
+++ b/cranelift/filetests/preopt/div_by_const_indirect.cton
@@ -0,0 +1,60 @@
+
+test preopt
+isa intel baseline
+
+; Cases where the denominator is created by an iconst
+
+function %indir_udiv32(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 7
+    v2 = udiv v0, v1
+    ; check: iconst.i32 7
+    ; check: iconst.i32 0x2492_4925
+    ; check: umulhi v0, v3
+    ; check: isub v0, v4
+    ; check: ushr_imm v5, 1
+    ; check: iadd v6, v4
+    ; check: ushr_imm v7, 2
+    ; check: copy v8
+    return v2
+}
+
+function %indir_sdiv32(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 -17
+    v2 = sdiv v0, v1
+    ; check: iconst.i32 -17
+    ; check: iconst.i32 0xffff_ffff_8787_8787
+    ; check: smulhi v0, v3
+    ; check: sshr_imm v4, 3
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: copy v7
+    return v2
+}
+
+function %indir_udiv64(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = iconst.i64 1337
+    v2 = udiv v0, v1
+    ; check: iconst.i64 1337
+    ; check: iconst.i64 0xc411_9d95_2866_a139
+    ; check: umulhi v0, v3
+    ; check: ushr_imm v4, 10
+    ; check: copy v5
+    return v2
+}
+
+function %indir_sdiv64(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = iconst.i64 -90210
+    v2 = sdiv v0, v1
+    ; check: iconst.i64 0xffff_ffff_fffe_9f9e
+    ; check: iconst.i64 0xd181_4ee8_939c_b8bb
+    ; check: smulhi v0, v3
+    ; check: sshr_imm v4, 14
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: copy v7
+    return v2
+}
diff --git a/cranelift/filetests/preopt/div_by_const_non_power_of_2.cton b/cranelift/filetests/preopt/div_by_const_non_power_of_2.cton
new file mode 100644
index 0000000000..18811fcd82
--- /dev/null
+++ b/cranelift/filetests/preopt/div_by_const_non_power_of_2.cton
@@ -0,0 +1,267 @@
+
+test preopt
+isa intel baseline
+
+; -------- U32 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_udiv32_p7(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 7
+    ; check: iconst.i32 0x2492_4925
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 2
+    ; check: copy v7
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_udiv32_p125(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 125
+    ; check: iconst.i32 0x1062_4dd3
+    ; check: umulhi v0, v2
+    ; check: ushr_imm v3, 3
+    ; check: copy v4
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_udiv32_p641(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 641
+    ; check: iconst.i32 0x0066_3d81
+    ; check: umulhi v0, v2
+    ; check: copy v3
+    return v1
+}
+
+
+; -------- S32 --------
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_sdiv32_n6(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -6
+    ; check: iconst.i32 0xffff_ffff_d555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: iadd v3, v4
+    ; check: copy v5
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv32_n5(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -5
+    ; check: iconst.i32 0xffff_ffff_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 31
+    ; check: iadd v4, v5
+    ; check: copy v6
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_sdiv32_n3(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -3
+    ; check: iconst.i32 0x5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: copy v7
+    return v1
+}
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_sdiv32_p6(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 6
+    ; check: iconst.i32 0x2aaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: iadd v3, v4
+    ; check: copy v5
+    return v1
+}
+
+; case d > 0 && M < 0 (mull, add, shift, add-sign-bit)
+function %t_sdiv32_p7(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 7
+    ; check: iconst.i32 0xffff_ffff_9249_2493
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 2
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: copy v7
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv32_p625(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 625
+    ; check: iconst.i32 0x68db_8bad
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 8
+    ; check: ushr_imm v4, 31
+    ; check: iadd v4, v5
+    ; check: copy v6
+    return v1
+}
+
+
+; -------- U64 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_udiv64_p7(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 7
+    ; check: iconst.i64 0x2492_4924_9249_2493
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 2
+    ; check: copy v7
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_udiv64_p9(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 9
+    ; check: iconst.i64 0xe38e_38e3_8e38_e38f
+    ; check: umulhi v0, v2
+    ; check: ushr_imm v3, 3
+    ; check: copy v4
+    return v1
+}
+
+; complex case (mul, sub, shift, add, shift)
+function %t_udiv64_p125(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 125
+    ; check: iconst.i64 0x0624_dd2f_1a9f_be77
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 6
+    ; check: copy v7
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_udiv64_p274177(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 274177
+    ; check: iconst.i64 0x3d30_f19c_d101
+    ; check: umulhi v0, v2
+    ; check: copy v3
+    return v1
+}
+
+
+; -------- S64 --------
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv64_n625(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -625
+    ; check: iconst.i64 0xcb92_3a29_c779_a6b5
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: copy v6
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_sdiv64_n6(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -6
+    ; check: iconst.i64 0xd555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: iadd v3, v4
+    ; check: copy v5
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_sdiv64_n5(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -5
+    ; check: iconst.i64 0x9999_9999_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: copy v6
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_sdiv64_n3(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -3
+    ; check: iconst.i64 0x5555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: copy v7
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_sdiv64_p6(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 6
+    ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: iadd v3, v4
+    ; check: copy v5
+    return v1
+}
+
+; case d > 0 && M < 0 (mul, add, shift, add-sign-bit)
+function %t_sdiv64_p15(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 15
+    ; check: iconst.i64 0x8888_8888_8888_8889
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 3
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: copy v7
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv64_p625(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 625
+    ; check: iconst.i64 0x346d_c5d6_3886_594b
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: copy v6
+    return v1
+}
diff --git a/cranelift/filetests/preopt/div_by_const_power_of_2.cton b/cranelift/filetests/preopt/div_by_const_power_of_2.cton
new file mode 100644
index 0000000000..dc51c5395d
--- /dev/null
+++ b/cranelift/filetests/preopt/div_by_const_power_of_2.cton
@@ -0,0 +1,293 @@
+
+test preopt
+isa intel baseline
+
+; -------- U32 --------
+
+; ignored
+function %t_udiv32_p0(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 0
+    ; check: udiv_imm v0, 0
+    return v1
+}
+
+; converted to a copy
+function %t_udiv32_p1(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 1
+    ; check: copy v0
+    return v1
+}
+
+; shift
+function %t_udiv32_p2(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 2
+    ; check: ushr_imm v0, 1
+    return v1
+}
+
+; shift
+function %t_udiv32_p2p31(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 0x8000_0000
+    ; check: ushr_imm v0, 31
+    return v1
+}
+
+
+; -------- U64 --------
+
+; ignored
+function %t_udiv64_p0(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 0
+    ; check: udiv_imm v0, 0
+    return v1
+}
+
+; converted to a copy
+function %t_udiv64_p1(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 1
+    ; check: copy v0
+    return v1
+}
+
+; shift
+function %t_udiv64_p2(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 2
+    ; check: ushr_imm v0, 1
+    return v1
+}
+
+; shift
+function %t_udiv64_p2p63(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 0x8000_0000_0000_0000
+    ; check: ushr_imm v0, 63
+    return v1
+}
+
+
+; -------- S32 --------
+
+; ignored
+function %t_sdiv32_p0(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 0
+    ; check: sdiv_imm v0, 0
+    return v1
+}
+
+; converted to a copy
+function %t_sdiv32_p1(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 1
+    ; check: copy v0
+    return v1
+}
+
+; ignored
+function %t_sdiv32_n1(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -1
+    ; check: sdiv_imm v0, -1
+    return v1
+}
+
+; shift
+function %t_sdiv32_p2(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: copy v4
+    return v1
+}
+
+; shift
+function %t_sdiv32_n2(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: irsub_imm v4, 0
+    return v1
+}
+
+; shift
+function %t_sdiv32_p4(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 4
+    ; check: v2 = sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 2
+    ; check: copy v5
+
+    return v1
+}
+
+; shift
+function %t_sdiv32_n4(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 2
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; shift
+function %t_sdiv32_p2p30(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 30
+    ; check: copy v5
+    return v1
+}
+
+; shift
+function %t_sdiv32_n2p30(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 30
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000) isn't
+; representable.
+function %t_sdiv32_n2p31(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -0x8000_0000
+    ; check: sshr_imm v0, 30
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 31
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+
+; -------- S64 --------
+
+; ignored
+function %t_sdiv64_p0(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 0
+    ; check: sdiv_imm v0, 0
+    return v1
+}
+
+; converted to a copy
+function %t_sdiv64_p1(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 1
+    ; check: copy v0
+    return v1
+}
+
+; ignored
+function %t_sdiv64_n1(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -1
+    ; check: sdiv_imm v0, -1
+    return v1
+}
+
+; shift
+function %t_sdiv64_p2(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: copy v4
+    return v1
+}
+
+; shift
+function %t_sdiv64_n2(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: irsub_imm v4, 0
+    return v1
+}
+
+; shift
+function %t_sdiv64_p4(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 2
+    ; check: copy v5
+    return v1
+}
+
+; shift
+function %t_sdiv64_n4(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 2
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; shift
+function %t_sdiv64_p2p62(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 62
+    ; check: copy v5
+    return v1
+}
+
+; shift
+function %t_sdiv64_n2p62(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 62
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't
+; representable.
+function %t_sdiv64_n2p63(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -0x8000_0000_0000_0000
+    ; check: sshr_imm v0, 62
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 63
+    ; check: irsub_imm v5, 0
+    return v1
+}
diff --git a/cranelift/filetests/preopt/rem_by_const_non_power_of_2.cton b/cranelift/filetests/preopt/rem_by_const_non_power_of_2.cton
new file mode 100644
index 0000000000..c142a16359
--- /dev/null
+++ b/cranelift/filetests/preopt/rem_by_const_non_power_of_2.cton
@@ -0,0 +1,286 @@
+
+test preopt
+isa intel baseline
+
+; -------- U32 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_urem32_p7(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 7
+    ; check: iconst.i32 0x2492_4925
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 2
+    ; check: imul_imm v7, 7
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_urem32_p125(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 125
+    ; check: iconst.i32 0x1062_4dd3
+    ; check: umulhi v0, v2
+    ; check: ushr_imm v3, 3
+    ; check: imul_imm v4, 125
+    ; check: isub v0, v5
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_urem32_p641(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 641
+    ; check: iconst.i32 0x0066_3d81
+    ; check: umulhi v0, v2
+    ; check: imul_imm v3, 641
+    ; check: isub v0, v4
+    return v1
+}
+
+
+; -------- S32 --------
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_srem32_n6(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -6
+    ; check: iconst.i32 0xffff_ffff_d555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, -6
+    ; check: isub v0, v6
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem32_n5(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -5
+    ; check: iconst.i32 0xffff_ffff_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 31
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, -5
+    ; check: isub v0, v7
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_srem32_n3(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -3
+    ; check: iconst.i32 0x5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, -3
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_srem32_p6(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 6
+    ; check: iconst.i32 0x2aaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, 6
+    ; check: isub v0, v6
+    return v1
+}
+
+; case d > 0 && M < 0 (mull, add, shift, add-sign-bit)
+function %t_srem32_p7(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 7
+    ; check: iconst.i32 0xffff_ffff_9249_2493
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 2
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, 7
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem32_p625(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 625
+    ; check: iconst.i32 0x68db_8bad
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 8
+    ; check: ushr_imm v4, 31
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, 625
+    ; check: isub v0, v7
+    return v1
+}
+
+
+; -------- U64 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_urem64_p7(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 7
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 2
+    ; check: imul_imm v7, 7
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_urem64_p9(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 9
+    ; check: iconst.i64 0xe38e_38e3_8e38_e38f
+    ; check: umulhi v0, v2
+    ; check: ushr_imm v3, 3
+    ; check: imul_imm v4, 9
+    ; check: isub v0, v5
+    return v1
+}
+
+; complex case (mul, sub, shift, add, shift)
+function %t_urem64_p125(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 125
+    ; check: iconst.i64 0x0624_dd2f_1a9f_be77
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 6
+    ; check: imul_imm v7, 125
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_urem64_p274177(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 274177
+    ; check: iconst.i64 0x3d30_f19c_d101
+    ; check: umulhi v0, v2
+    ; check: imul_imm v3, 0x0004_2f01
+    ; check: isub v0, v4
+    return v1
+}
+
+
+; -------- S64 --------
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem64_n625(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -625
+    ; check: iconst.i64 0xcb92_3a29_c779_a6b5
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, -625
+    ; check: isub v0, v7
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_srem64_n6(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -6
+    ; check: iconst.i64 0xd555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, -6
+    ; check: isub v0, v6
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_srem64_n5(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -5
+    ; check: iconst.i64 0x9999_9999_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, -5
+    ; check: isub v0, v7
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_srem64_n3(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -3
+    ; check: iconst.i64 0x5555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, -3
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_srem64_p6(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 6
+    ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, 6
+    ; check: isub v0, v6
+    return v1
+}
+
+; case d > 0 && M < 0 (mul, add, shift, add-sign-bit)
+function %t_srem64_p15(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 15
+    ; check: iconst.i64 0x8888_8888_8888_8889
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 3
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, 15
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem64_p625(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 625
+    ; check: iconst.i64 0x346d_c5d6_3886_594b
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, 625
+    ; check: isub v0, v7
+    return v1
+}
diff --git a/cranelift/filetests/preopt/rem_by_const_power_of_2.cton b/cranelift/filetests/preopt/rem_by_const_power_of_2.cton
new file mode 100644
index 0000000000..931623d2e7
--- /dev/null
+++ b/cranelift/filetests/preopt/rem_by_const_power_of_2.cton
@@ -0,0 +1,292 @@
+
+test preopt
+isa intel baseline
+
+; -------- U32 --------
+
+; ignored
+function %t_urem32_p0(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 0
+    ; check: urem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_urem32_p1(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 1
+    ; check: iconst.i32 0
+    return v1
+}
+
+; shift
+function %t_urem32_p2(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 2
+    ; check: band_imm v0, 1
+    return v1
+}
+
+; shift
+function %t_urem32_p2p31(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 0x8000_0000
+    ; check: band_imm v0, 0x7fff_ffff
+    return v1
+}
+
+
+; -------- U64 --------
+
+; ignored
+function %t_urem64_p0(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 0
+    ; check: urem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_urem64_p1(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 1
+    ; check: iconst.i64 0
+    return v1
+}
+
+; shift
+function %t_urem64_p2(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 2
+    ; check: band_imm v0, 1
+   return v1
+}
+
+; shift
+function %t_urem64_p2p63(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 0x8000_0000_0000_0000
+    ; check: band_imm v0, 0x7fff_ffff_ffff_ffff
+    return v1
+}
+
+
+; -------- S32 --------
+
+; ignored
+function %t_srem32_n1(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -1
+    ; check: srem_imm v0, -1
+    return v1
+}
+
+; ignored
+function %t_srem32_p0(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 0
+    ; check: srem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_srem32_p1(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 1
+    ; check: iconst.i32 0
+    return v1
+}
+
+; shift
+function %t_srem32_p2(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem32_n2(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem32_p4(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem32_n4(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem32_p2p30(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xffff_ffff_c000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem32_n2p30(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xffff_ffff_c000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000) isn't
+; representable.
+function %t_srem32_n2p31(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -0x8000_0000
+    ; check: sshr_imm v0, 30
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xffff_ffff_8000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+
+; -------- S64 --------
+
+; ignored
+function %t_srem64_n1(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -1
+    ; check: srem_imm v0, -1
+    return v1
+}
+
+; ignored
+function %t_srem64_p0(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 0
+    ; check: srem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_srem64_p1(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 1
+    ; check: iconst.i64 0
+    return v1
+}
+
+; shift
+function %t_srem64_p2(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem64_n2(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem64_p4(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem64_n4(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem64_p2p62(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xc000_0000_0000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem64_n2p62(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xc000_0000_0000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't
+; representable.
+function %t_srem64_n2p63(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -0x8000_0000_0000_0000
+    ; check: sshr_imm v0, 62
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0x8000_0000_0000_0000
+    ; check: isub v0, v5
+   return v1
+}
diff --git a/cranelift/format-all.sh b/cranelift/format-all.sh
index 6ccac9c289..a99da7db09 100755
--- a/cranelift/format-all.sh
+++ b/cranelift/format-all.sh
@@ -1,10 +1,8 @@
 #!/bin/bash
+set -euo pipefail
 
 # Format all sources using rustfmt.
 
-# Exit immediately on errors.
-set -e
-
 cd $(dirname "$0")
 
 # Make sure we can find rustfmt.
diff --git a/cranelift/publish-all.sh b/cranelift/publish-all.sh
index 144aa519f8..7d8b7f7f3c 100755
--- a/cranelift/publish-all.sh
+++ b/cranelift/publish-all.sh
@@ -1,11 +1,11 @@
 #!/bin/bash
-set -e
+set -euo pipefail
 cd $(dirname "$0")
-topdir=$(pwd)
+topdir="$(pwd)"
 
 # All the cretonne-* crates have the same version number
 # The filecheck crate version is managed independently.
-version="0.1.0"
+version="0.3.4"
 
 # Update all of the Cargo.toml files.
 #
@@ -16,9 +16,9 @@ for crate in . lib/*; do
         continue
     fi
     # Update the version number of this crate to $version.
-    sed -i "" -e "s/^version = .*/version = \"$version\"/" $crate/Cargo.toml
+    sed -i.bk -e "s/^version = .*/version = \"$version\"/" "$crate/Cargo.toml"
     # Update the required version number of any cretonne* dependencies.
-    sed -i "" -e "/^cretonne/s/version = \"[^\"]*\"/version = \"$version\"/" $crate/Cargo.toml
+    sed -i.bk -e "/^cretonne/s/version = \"[^\"]*\"/version = \"$version\"/" "$crate/Cargo.toml"
 done
 
 # Update our local Cargo.lock (not checked in).
@@ -29,6 +29,10 @@ cargo update
 #
 # Note that libraries need to be published in topological order.
 
+echo git commit -a -m "\"Bump version to $version"\"
+echo git push
 for crate in filecheck cretonne frontend native reader wasm; do
-    echo cargo publish --manifest-path lib/$crate/Cargo.toml
+    echo cargo publish --manifest-path "lib/$crate/Cargo.toml"
 done
+echo
+echo Then, go to https://github.com/Cretonne/cretonne/releases/ and define a new release.
diff --git a/cranelift/src/cat.rs b/cranelift/src/cat.rs
index e8519b10d9..36ae818e2c 100644
--- a/cranelift/src/cat.rs
+++ b/cranelift/src/cat.rs
@@ -13,7 +13,7 @@ use filetest::subtest::{self, SubTest, Context, Result as STResult};
 pub fn run(files: Vec<String>) -> CommandResult {
     for (i, f) in files.into_iter().enumerate() {
         if i != 0 {
-            println!("");
+            println!();
         }
         cat_one(f)?
     }
@@ -30,7 +30,7 @@ fn cat_one(filename: String) -> CommandResult {
 
     for (idx, func) in items.into_iter().enumerate() {
         if idx != 0 {
-            println!("");
+            println!();
         }
         print!("{}", func);
     }
diff --git a/cranelift/src/compile.rs b/cranelift/src/compile.rs
index ae0c87ad36..9a9644b1cf 100644
--- a/cranelift/src/compile.rs
+++ b/cranelift/src/compile.rs
@@ -6,9 +6,45 @@ use cton_reader::parse_test;
 use std::path::PathBuf;
 use cretonne::Context;
 use cretonne::settings::FlagsOrIsa;
+use cretonne::{binemit, ir};
 use std::path::Path;
 use utils::{pretty_error, read_to_string, parse_sets_and_isa};
 
+struct PrintRelocs {
+    flag_print: bool,
+}
+
+impl binemit::RelocSink for PrintRelocs {
+    fn reloc_ebb(
+        &mut self,
+        where_: binemit::CodeOffset,
+        r: binemit::Reloc,
+        offset: binemit::CodeOffset,
+    ) {
+        if self.flag_print {
+            println!("reloc_ebb: {} {} at {}", r, offset, where_);
+        }
+    }
+
+    fn reloc_external(
+        &mut self,
+        where_: binemit::CodeOffset,
+        r: binemit::Reloc,
+        name: &ir::ExternalName,
+        addend: binemit::Addend,
+    ) {
+        if self.flag_print {
+            println!("reloc_ebb: {} {} {} at {}", r, name, addend, where_);
+        }
+    }
+
+    fn reloc_jt(&mut self, where_: binemit::CodeOffset, r: binemit::Reloc, jt: ir::JumpTable) {
+        if self.flag_print {
+            println!("reloc_ebb: {} {} at {}", r, jt, where_);
+        }
+    }
+}
+
 pub fn run(
     files: Vec<String>,
     flag_print: bool,
@@ -37,7 +73,7 @@ fn handle_module(
     let test_file = parse_test(&buffer).map_err(|e| format!("{}: {}", name, e))?;
 
     // If we have an isa from the command-line, use that. Otherwise if the
-    // file contins a unique isa, use that.
+    // file contains a unique isa, use that.
     let isa = if let Some(isa) = fisa.isa {
         isa
     } else if let Some(isa) = test_file.isa_spec.unique_isa() {
@@ -49,12 +85,32 @@ fn handle_module(
     for (func, _) in test_file.functions {
         let mut context = Context::new();
         context.func = func;
-        context.compile(isa).map_err(|err| {
+        let size = context.compile(isa).map_err(|err| {
             pretty_error(&context.func, Some(isa), err)
         })?;
         if flag_print {
             println!("{}", context.func.display(isa));
         }
+
+        // Encode the result as machine code.
+        let mut mem = Vec::new();
+        let mut relocs = PrintRelocs { flag_print };
+        mem.resize(size as usize, 0);
+        context.emit_to_memory(mem.as_mut_ptr(), &mut relocs, &*isa);
+
+        if flag_print {
+            print!(".byte ");
+            let mut first = true;
+            for byte in &mem {
+                if first {
+                    first = false;
+                } else {
+                    print!(", ");
+                }
+                print!("{}", byte);
+            }
+            println!();
+        }
     }
 
     Ok(())
diff --git a/cranelift/src/filetest/binemit.rs b/cranelift/src/filetest/binemit.rs
index 550703f0bf..3c52a63b2b 100644
--- a/cranelift/src/filetest/binemit.rs
+++ b/cranelift/src/filetest/binemit.rs
@@ -126,7 +126,7 @@ impl SubTest for TestBinEmit {
         // Fix the stack frame layout so we can test spill/fill encodings.
         let min_offset = func.stack_slots
             .keys()
-            .map(|ss| func.stack_slots[ss].offset)
+            .map(|ss| func.stack_slots[ss].offset.unwrap())
             .min();
         func.stack_slots.frame_size = min_offset.map(|off| (-off) as u32);
 
@@ -271,13 +271,12 @@ impl SubTest for TestBinEmit {
                                 "No encodings found for: {}",
                                 func.dfg.display_inst(inst, isa)
                             ));
-                        } else {
-                            return Err(format!(
+                        }
+                        return Err(format!(
                                 "No matching encodings for {} in {}",
                                 func.dfg.display_inst(inst, isa),
                                 DisplayList(&encodings),
                             ));
-                        }
                     }
                     let have = sink.text.trim();
                     if have != want {
diff --git a/cranelift/src/filetest/concurrent.rs b/cranelift/src/filetest/concurrent.rs
index cb188577e0..a651c14071 100644
--- a/cranelift/src/filetest/concurrent.rs
+++ b/cranelift/src/filetest/concurrent.rs
@@ -119,7 +119,7 @@ fn worker_thread(
             loop {
                 // Lock the mutex only long enough to extract a request.
                 let Request(jobid, path) = match requests.lock().unwrap().recv() {
-                    Err(..) => break, // TX end shuit down. exit thread.
+                    Err(..) => break, // TX end shut down. exit thread.
                     Ok(req) => req,
                 };
 
diff --git a/cranelift/src/filetest/mod.rs b/cranelift/src/filetest/mod.rs
index 2d2a9c6cac..5286307811 100644
--- a/cranelift/src/filetest/mod.rs
+++ b/cranelift/src/filetest/mod.rs
@@ -19,6 +19,7 @@ mod concurrent;
 mod domtree;
 mod legalizer;
 mod licm;
+mod preopt;
 mod regalloc;
 mod runner;
 mod runone;
@@ -64,6 +65,7 @@ fn new_subtest(parsed: &TestCommand) -> subtest::Result<Box<subtest::SubTest>> {
         "domtree" => domtree::subtest(parsed),
         "legalizer" => legalizer::subtest(parsed),
         "licm" => licm::subtest(parsed),
+        "preopt" => preopt::subtest(parsed),
         "print-cfg" => print_cfg::subtest(parsed),
         "regalloc" => regalloc::subtest(parsed),
         "simple-gvn" => simple_gvn::subtest(parsed),
diff --git a/cranelift/src/filetest/preopt.rs b/cranelift/src/filetest/preopt.rs
new file mode 100644
index 0000000000..60d03f8207
--- /dev/null
+++ b/cranelift/src/filetest/preopt.rs
@@ -0,0 +1,50 @@
+//! Test command for testing the preopt pass.
+//!
+//! The resulting function is sent to `filecheck`.
+
+use cretonne::ir::Function;
+use cretonne;
+use cton_reader::TestCommand;
+use filetest::subtest::{SubTest, Context, Result, run_filecheck};
+use std::borrow::Cow;
+use std::fmt::Write;
+use utils::pretty_error;
+
+struct TestPreopt;
+
+pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
+    assert_eq!(parsed.command, "preopt");
+    if !parsed.options.is_empty() {
+        Err(format!("No options allowed on {}", parsed))
+    } else {
+        Ok(Box::new(TestPreopt))
+    }
+}
+
+impl SubTest for TestPreopt {
+    fn name(&self) -> Cow<str> {
+        Cow::from("preopt")
+    }
+
+    fn is_mutating(&self) -> bool {
+        true
+    }
+
+    fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
+        // Create a compilation context, and drop in the function.
+        let mut comp_ctx = cretonne::Context::new();
+        comp_ctx.func = func.into_owned();
+        let isa = context.isa.expect("preopt needs an ISA");
+
+        comp_ctx.flowgraph();
+        comp_ctx.preopt(isa).map_err(|e| {
+            pretty_error(&comp_ctx.func, context.isa, Into::into(e))
+        })?;
+
+        let mut text = String::new();
+        write!(&mut text, "{}", &comp_ctx.func).map_err(
+            |e| e.to_string(),
+        )?;
+        run_filecheck(&text, context)
+    }
+}
diff --git a/cranelift/src/filetest/runner.rs b/cranelift/src/filetest/runner.rs
index 320db9d942..09ec3a19a9 100644
--- a/cranelift/src/filetest/runner.rs
+++ b/cranelift/src/filetest/runner.rs
@@ -221,7 +221,7 @@ impl TestRunner {
         }
         self.tests[jobid].state = State::Done(result);
 
-        // Rports jobs in order.
+        // Reports jobs in order.
         while self.report_job() {
             self.reported_tests += 1;
         }
diff --git a/cranelift/src/print_cfg.rs b/cranelift/src/print_cfg.rs
index f3b958be77..df624993ad 100644
--- a/cranelift/src/print_cfg.rs
+++ b/cranelift/src/print_cfg.rs
@@ -17,7 +17,7 @@ use utils::read_to_string;
 pub fn run(files: Vec<String>) -> CommandResult {
     for (i, f) in files.into_iter().enumerate() {
         if i != 0 {
-            println!("");
+            println!();
         }
         print_cfg(f)?
     }
@@ -100,7 +100,7 @@ fn print_cfg(filename: String) -> CommandResult {
 
     for (idx, func) in items.into_iter().enumerate() {
         if idx != 0 {
-            println!("");
+            println!();
         }
         print!("{}", CFGPrinter::new(&func));
     }
diff --git a/cranelift/test-all.sh b/cranelift/test-all.sh
index a5b2e92066..2402eb7a39 100755
--- a/cranelift/test-all.sh
+++ b/cranelift/test-all.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -euo pipefail
 
 # This is the top-level test script:
 #
@@ -10,8 +11,9 @@
 #
 # All tests run by this script should be passing at all times.
 
-# Exit immediately on errors.
-set -e
+# Disable generation of .pyc files because they cause trouble for vendoring
+# scripts, and this is a build step that isn't run very often anyway.
+export PYTHONDONTWRITEBYTECODE=1
 
 # Repository top-level directory.
 cd $(dirname "$0")
diff --git a/cranelift/wasmtests/unreachable_code.wat b/cranelift/wasmtests/unreachable_code.wat
new file mode 100644
index 0000000000..38c1a315ce
--- /dev/null
+++ b/cranelift/wasmtests/unreachable_code.wat
@@ -0,0 +1,77 @@
+(module
+  (type (;0;) (func (param i32 i64 f64) (result f64)))
+  (type (;1;) (func))
+  (type (;2;) (func (result f32)))
+  (type (;3;) (func (result f64)))
+  (type (;4;) (func (param f64 f64) (result f64)))
+  (type (;5;) (func (result i32)))
+  (func (result i32)
+      block (result i32)
+        unreachable
+      end
+      block
+      end
+      i32.clz
+  )
+  (func (result i32)
+      loop (result i32)
+        unreachable
+      end
+      block
+      end
+      i32.clz
+  )
+  (func (;0;) (type 5) (result i32)
+    nop
+    block (result i32)  ;; label = @1
+      block  ;; label = @2
+        block  ;; label = @3
+          nop
+          block  ;; label = @4
+            i32.const 1
+            if  ;; label = @5
+              nop
+              block  ;; label = @6
+                nop
+                nop
+                loop (result i32)  ;; label = @7
+                  nop
+                  block (result i32)  ;; label = @8
+                    nop
+                    nop
+                    block (result i32)  ;; label = @9
+                      nop
+                      unreachable
+                    end
+                  end
+                end
+                block (result i32)  ;; label = @7
+                  block  ;; label = @8
+                    nop
+                  end
+                  i32.const 0
+                end
+                br_if 5 (;@1;)
+                drop
+              end
+            else
+              nop
+            end
+            nop
+          end
+        end
+      end
+      unreachable
+    end)
+  (func
+    block (result i32)
+      block (result i32)
+        i32.const 1
+        br 1
+      end
+    end
+    drop
+  )
+  (table (;0;) 16 anyfunc)
+  (elem (i32.const 0))
+)
diff --git a/lib/cretonne/Cargo.toml b/lib/cretonne/Cargo.toml
index 09d3a7dd7b..89340e0a1a 100644
--- a/lib/cretonne/Cargo.toml
+++ b/lib/cretonne/Cargo.toml
@@ -1,12 +1,13 @@
 [package]
 authors = ["The Cretonne Project Developers"]
 name = "cretonne"
-version = "0.1.0"
+version = "0.3.4"
 description = "Low-level code generator library"
 license = "Apache-2.0"
 documentation = "https://cretonne.readthedocs.io/"
 repository = "https://github.com/Cretonne/cretonne"
 readme = "README.md"
+keywords = [ "compile", "compiler", "jit" ]
 build = "build.rs"
 
 [lib]
diff --git a/lib/cretonne/build.rs b/lib/cretonne/build.rs
index 4d1b0edf72..69be14de94 100644
--- a/lib/cretonne/build.rs
+++ b/lib/cretonne/build.rs
@@ -46,7 +46,7 @@ fn main() {
     let cur_dir = env::current_dir().expect("Can't access current working directory");
     let crate_dir = cur_dir.as_path();
 
-    // Make sure we rebuild is this build script changes.
+    // Make sure we rebuild if this build script changes.
     // I guess that won't happen if you have non-UTF8 bytes in your path names.
     // The `build.py` script prints out its own dependencies.
     println!(
@@ -59,8 +59,11 @@ fn main() {
     let build_script = meta_dir.join("build.py");
 
     // Launch build script with Python. We'll just find python in the path.
+    // Use -B to disable .pyc files, because they cause trouble for vendoring
+    // scripts, and this is a build step that isn't run very often anyway.
     let status = process::Command::new("python")
         .current_dir(crate_dir)
+        .arg("-B")
         .arg(build_script)
         .arg("--out-dir")
         .arg(out_dir)
diff --git a/lib/cretonne/meta/base/instructions.py b/lib/cretonne/meta/base/instructions.py
index 7200e45e7d..8ba1aa65fa 100644
--- a/lib/cretonne/meta/base/instructions.py
+++ b/lib/cretonne/meta/base/instructions.py
@@ -833,6 +833,26 @@ imul = Instruction(
         """,
         ins=(x, y), outs=a)
 
+umulhi = Instruction(
+        'umulhi', r"""
+        Unsigned integer multiplication, producing the high half of a
+        double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        """,
+        ins=(x, y), outs=a)
+
+smulhi = Instruction(
+        'smulhi', """
+        Signed integer multiplication, producing the high half of a
+        double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        """,
+        ins=(x, y), outs=a)
+
 udiv = Instruction(
         'udiv', r"""
         Unsigned integer division: :math:`a := \lfloor {x \over y} \rfloor`.
@@ -1679,7 +1699,7 @@ fpromote = Instruction(
         This is an exact operation.
 
         Cretonne currently only supports two floating point formats
-        - :type:`f32` and :type:`f64`.  This may change in the future.
+        - :type:`f32` and :type:`f64`. This may change in the future.
 
         The result type must have the same number of vector lanes as the input,
         and the result lanes must not have fewer bits than the input lanes. If
@@ -1695,10 +1715,10 @@ fdemote = Instruction(
         by rounding to nearest, ties to even.
 
         Cretonne currently only supports two floating point formats
-        - :type:`f32` and :type:`f64`.  This may change in the future.
+        - :type:`f32` and :type:`f64`. This may change in the future.
 
         The result type must have the same number of vector lanes as the input,
-        and the result lanes must not have more bits than the input lanes.  If
+        and the result lanes must not have more bits than the input lanes. If
         the input and output types are the same, this is a no-op.
         """,
         ins=x, outs=a, constraints=WiderOrEq(Float, FloatTo))
diff --git a/lib/cretonne/meta/base/settings.py b/lib/cretonne/meta/base/settings.py
index 8d0522af79..e8785fbc08 100644
--- a/lib/cretonne/meta/base/settings.py
+++ b/lib/cretonne/meta/base/settings.py
@@ -56,7 +56,11 @@ avoid_div_traps = BoolSetting(
 is_compressed = BoolSetting("Enable compressed instructions")
 
 enable_float = BoolSetting(
-        """Enable the use of floating-point instructions""",
+        """
+        Enable the use of floating-point instructions
+
+        Disabling use of floating-point instructions is not yet implemented.
+        """,
         default=True)
 
 enable_simd = BoolSetting(
diff --git a/lib/cretonne/meta/cdsl/test_ti.py b/lib/cretonne/meta/cdsl/test_ti.py
index b9b3e0b646..d3baa4d3c5 100644
--- a/lib/cretonne/meta/cdsl/test_ti.py
+++ b/lib/cretonne/meta/cdsl/test_ti.py
@@ -96,7 +96,7 @@ def check_concrete_typing_rtl(var_types, rtl):
     # type: (VarTyping, Rtl) -> None
     """
     Check that a concrete type assignment var_types (Dict[Var, TypeVar]) is
-    valid for an Rtl rtl.  Specifically check that:
+    valid for an Rtl rtl. Specifically check that:
 
     1) For each Var v \in rtl, v is defined in var_types
 
diff --git a/lib/cretonne/meta/cdsl/ti.py b/lib/cretonne/meta/cdsl/ti.py
index b2673366be..bc2c16d5b1 100644
--- a/lib/cretonne/meta/cdsl/ti.py
+++ b/lib/cretonne/meta/cdsl/ti.py
@@ -322,7 +322,7 @@ class TypeEnv(object):
         # type: (TypeVar, TypeVar) -> None
         """
         Record a that the free tv1 is part of the same equivalence class as
-        tv2.  The canonical representative of the merged class is tv2's
+        tv2. The canonical representative of the merged class is tv2's
         cannonical representative.
         """
         assert not tv1.is_derived
@@ -364,9 +364,9 @@ class TypeEnv(object):
         # type: (TypeVar) -> int
         """
         Get the rank of tv in the partial order. TVs directly associated with a
-        Var get their rank from the Var (see register()).  Internally generated
+        Var get their rank from the Var (see register()). Internally generated
         non-derived TVs implicitly get the lowest rank (0). Derived variables
-        get their rank from their free typevar.  Singletons have the highest
+        get their rank from their free typevar. Singletons have the highest
         rank. TVs associated with vars in a source pattern have a higher rank
         than TVs associted with temporary vars.
         """
@@ -381,7 +381,7 @@ class TypeEnv(object):
     def register(self, v):
         # type: (Var) -> None
         """
-        Register a new Var v.  This computes a rank for the associated TypeVar
+        Register a new Var v. This computes a rank for the associated TypeVar
         for v, which is used to impose a partial order on type variables.
         """
         self.vars.add(v)
@@ -848,7 +848,7 @@ def ti_def(definition, typ):
 def ti_rtl(rtl, typ):
     # type: (Rtl, TypeEnv) -> TypingOrError
     """
-    Perform type inference on an Rtl in a starting type env typ.  Return an
+    Perform type inference on an Rtl in a starting type env typ. Return an
     updated type environment or error.
     """
     for (i, d) in enumerate(rtl.rtl):
@@ -866,7 +866,7 @@ def ti_rtl(rtl, typ):
 def ti_xform(xform, typ):
     # type: (XForm, TypeEnv) -> TypingOrError
     """
-    Perform type inference on an Rtl in a starting type env typ.  Return an
+    Perform type inference on an Rtl in a starting type env typ. Return an
     updated type environment or error.
     """
     typ_or_err = ti_rtl(xform.src, typ)
diff --git a/lib/cretonne/meta/cdsl/xform.py b/lib/cretonne/meta/cdsl/xform.py
index bad15e0245..607c1776a2 100644
--- a/lib/cretonne/meta/cdsl/xform.py
+++ b/lib/cretonne/meta/cdsl/xform.py
@@ -113,8 +113,8 @@ class Rtl(object):
         # type: (Rtl) -> None
         """
         Given that there is only 1 possible concrete typing T for self, assign
-        a singleton TV with type t=T[v] for each Var v \in self.  Its an error
-        to call this on an Rtl with more than 1 possible typing.  This modifies
+        a singleton TV with type t=T[v] for each Var v \in self. Its an error
+        to call this on an Rtl with more than 1 possible typing. This modifies
         the Rtl in-place.
         """
         from .ti import ti_rtl, TypeEnv
diff --git a/lib/cretonne/meta/check.sh b/lib/cretonne/meta/check.sh
index 655092e6cb..aa0e88ce1e 100755
--- a/lib/cretonne/meta/check.sh
+++ b/lib/cretonne/meta/check.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-set -e
+set -euo pipefail
 cd $(dirname "$0")
 
 runif() {
diff --git a/lib/cretonne/meta/gen_instr.py b/lib/cretonne/meta/gen_instr.py
index 2bd34a926c..ac8e338eda 100644
--- a/lib/cretonne/meta/gen_instr.py
+++ b/lib/cretonne/meta/gen_instr.py
@@ -211,7 +211,7 @@ def gen_instruction_data_impl(fmt):
                     if f.has_value_list:
                         fmt.line(n + ' { ref mut args, .. } => args,')
                 fmt.line('_ => panic!("No value list: {:?}", self),')
-            fmt.line('assert!(args.is_empty(), "Value list already in use");')
+            fmt.line('debug_assert!(args.is_empty(), "Value list already in use");')
             fmt.line('*args = vlist;')
 
 
diff --git a/lib/cretonne/meta/gen_legalizer.py b/lib/cretonne/meta/gen_legalizer.py
index 51368fea44..04c48ade51 100644
--- a/lib/cretonne/meta/gen_legalizer.py
+++ b/lib/cretonne/meta/gen_legalizer.py
@@ -211,7 +211,7 @@ def unwrap_inst(iref, node, fmt):
             fmt.format('let typeof_{0} = pos.func.dfg.value_type({0});', v)
 
     # If the node has results, detach the values.
-    # Place the values in  locals.
+    # Place the values in locals.
     replace_inst = False
     if len(node.defs) > 0:
         if node.defs == node.defs[0].dst_def.defs:
@@ -348,7 +348,8 @@ def gen_xform(xform, fmt, type_sets):
         # Delete the original instruction if we didn't have an opportunity to
         # replace it.
         if not replace_inst:
-            fmt.line('assert_eq!(pos.remove_inst(), inst);')
+            fmt.line('let removed = pos.remove_inst();')
+            fmt.line('debug_assert_eq!(removed, inst);')
         fmt.line('return true;')
 
 
diff --git a/lib/cretonne/meta/gen_settings.py b/lib/cretonne/meta/gen_settings.py
index 0cd4e0e860..dfc2c2b70a 100644
--- a/lib/cretonne/meta/gen_settings.py
+++ b/lib/cretonne/meta/gen_settings.py
@@ -245,7 +245,7 @@ def gen_constructor(sgrp, parent, fmt):
                 'pub fn new({}) -> Flags {{'.format(args), '}'):
             fmt.line('let bvec = builder.state_for("{}");'.format(sgrp.name))
             fmt.line('let mut bytes = [0; {}];'.format(sgrp.byte_size()))
-            fmt.line('assert_eq!(bvec.len(), {});'.format(sgrp.settings_size))
+            fmt.line('debug_assert_eq!(bvec.len(), {});'.format(sgrp.settings_size))
             with fmt.indented(
                     'for (i, b) in bvec.iter().enumerate() {', '}'):
                 fmt.line('bytes[i] = *b;')
diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py
index 1f47a5da06..162caccf65 100644
--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -120,6 +120,9 @@ enc_i32_i64(base.imul, r.rrx, 0x0f, 0xaf)
 enc_i32_i64(x86.sdivmodx, r.div, 0xf7, rrr=7)
 enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6)
 
+enc_i32_i64(x86.smulx, r.mulx, 0xf7, rrr=5)
+enc_i32_i64(x86.umulx, r.mulx, 0xf7, rrr=4)
+
 enc_i32_i64(base.copy, r.umr, 0x89)
 enc_both(base.copy.b1, r.umr, 0x89)
 enc_i32_i64(base.regmove, r.rmov, 0x89)
@@ -403,9 +406,55 @@ I64.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))
 
 # Numerical conversions.
 
-# Converting i64 to i32 is a no-op in 64-bit mode.
+# Reducing an integer is a no-op.
+I32.enc(base.ireduce.i8.i32, r.null, 0)
+I32.enc(base.ireduce.i16.i32, r.null, 0)
+I64.enc(base.ireduce.i8.i32, r.null, 0)
+I64.enc(base.ireduce.i16.i32, r.null, 0)
+I64.enc(base.ireduce.i8.i64, r.null, 0)
+I64.enc(base.ireduce.i16.i64, r.null, 0)
 I64.enc(base.ireduce.i32.i64, r.null, 0)
+
+# TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
+# instructions for %al/%ax/%eax to %ax/%eax/%rax.
+
+# movsbl
+I32.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
+I64.enc(base.sextend.i32.i8, *r.urm.rex(0x0f, 0xbe))
+I64.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
+
+# movswl
+I32.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
+I64.enc(base.sextend.i32.i16, *r.urm.rex(0x0f, 0xbf))
+I64.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
+
+# movsbq
+I64.enc(base.sextend.i64.i8, *r.urm.rex(0x0f, 0xbe, w=1))
+
+# movswq
+I64.enc(base.sextend.i64.i16, *r.urm.rex(0x0f, 0xbf, w=1))
+
+# movslq
 I64.enc(base.sextend.i64.i32, *r.urm.rex(0x63, w=1))
+
+# movzbl
+I32.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
+I64.enc(base.uextend.i32.i8, *r.urm.rex(0x0f, 0xb6))
+I64.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
+
+# movzwl
+I32.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
+I64.enc(base.uextend.i32.i16, *r.urm.rex(0x0f, 0xb7))
+I64.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
+
+# movzbq, encoded as movzbl because it's equivalent and shorter
+I64.enc(base.uextend.i64.i8, *r.urm.rex(0x0f, 0xb6))
+I64.enc(base.uextend.i64.i8, *r.urm(0x0f, 0xb6))
+
+# movzwq, encoded as movzwl because it's equivalent and shorter
+I64.enc(base.uextend.i64.i16, *r.urm.rex(0x0f, 0xb7))
+I64.enc(base.uextend.i64.i16, *r.urm(0x0f, 0xb7))
+
 # A 32-bit register copy clears the high 32 bits.
 I64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
 I64.enc(base.uextend.i64.i32, *r.umr(0x89))
diff --git a/lib/cretonne/meta/isa/intel/instructions.py b/lib/cretonne/meta/isa/intel/instructions.py
index 277cf62b4a..1fe75ff984 100644
--- a/lib/cretonne/meta/isa/intel/instructions.py
+++ b/lib/cretonne/meta/isa/intel/instructions.py
@@ -47,6 +47,28 @@ sdivmodx = Instruction(
         """,
         ins=(nlo, nhi, d), outs=(q, r), can_trap=True)
 
+argL = Operand('argL', iWord)
+argR = Operand('argR', iWord)
+resLo = Operand('resLo', iWord)
+resHi = Operand('resHi', iWord)
+
+umulx = Instruction(
+        'x86_umulx', r"""
+        Unsigned integer multiplication, producing a double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        """,
+        ins=(argL, argR), outs=(resLo, resHi))
+
+smulx = Instruction(
+        'x86_smulx', r"""
+        Signed integer multiplication, producing a double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        """,
+        ins=(argL, argR), outs=(resLo, resHi))
 
 Float = TypeVar(
         'Float', 'A scalar or vector floating point number',
@@ -132,7 +154,7 @@ rflags = Operand('rflags', iflags)
 bsr = Instruction(
     'x86_bsr', r"""
     Bit Scan Reverse -- returns the bit-index of the most significant 1
-    in the word.  Result is undefined if the argument is zero.  However, it
+    in the word. Result is undefined if the argument is zero. However, it
     sets the Z flag depending on the argument, so it is at least easy to
     detect and handle that case.
 
@@ -144,7 +166,7 @@ bsr = Instruction(
 bsf = Instruction(
     'x86_bsf', r"""
     Bit Scan Forwards -- returns the bit-index of the least significant 1
-    in the word.  Is otherwise identical to 'bsr', just above.
+    in the word. Is otherwise identical to 'bsr', just above.
     """,
     ins=x, outs=(y, rflags))
 
diff --git a/lib/cretonne/meta/isa/intel/legalize.py b/lib/cretonne/meta/isa/intel/legalize.py
index 32f0a98153..5806bb9284 100644
--- a/lib/cretonne/meta/isa/intel/legalize.py
+++ b/lib/cretonne/meta/isa/intel/legalize.py
@@ -37,6 +37,23 @@ intel_expand.custom_legalize(insts.srem, 'expand_sdivrem')
 intel_expand.custom_legalize(insts.udiv, 'expand_udivrem')
 intel_expand.custom_legalize(insts.urem, 'expand_udivrem')
 
+#
+# Double length (widening) multiplication
+#
+resLo = Var('resLo')
+resHi = Var('resHi')
+intel_expand.legalize(
+        resHi << insts.umulhi(x, y),
+        Rtl(
+            (resLo, resHi) << x86.umulx(x, y)
+        ))
+
+intel_expand.legalize(
+        resHi << insts.smulhi(x, y),
+        Rtl(
+            (resLo, resHi) << x86.smulx(x, y)
+        ))
+
 # Floating point condition codes.
 #
 # The 8 condition codes in `supported_floatccs` are directly supported by a
diff --git a/lib/cretonne/meta/isa/intel/recipes.py b/lib/cretonne/meta/isa/intel/recipes.py
index 965bc4331d..9d03d02053 100644
--- a/lib/cretonne/meta/isa/intel/recipes.py
+++ b/lib/cretonne/meta/isa/intel/recipes.py
@@ -453,6 +453,15 @@ div = TailRecipe(
         modrm_r_bits(in_reg2, bits, sink);
         ''')
 
+# XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo)
+mulx = TailRecipe(
+        'mulx', Binary, size=1,
+        ins=(GPR.rax, GPR), outs=(GPR.rax, GPR.rdx),
+        emit='''
+        PUT_OP(bits, rex1(in_reg1), sink);
+        modrm_r_bits(in_reg1, bits, sink);
+        ''')
+
 # XX /n ib with 8-bit immediate sign-extended.
 rib = TailRecipe(
         'rib', BinaryImm, size=2, ins=GPR, outs=0,
@@ -675,7 +684,7 @@ st_abcd = TailRecipe(
 
 # XX /r register-indirect store of FPR with no offset.
 fst = TailRecipe(
-        'fst', Store, size=1, ins=(FPR, GPR), outs=(),
+        'fst', Store, size=1, ins=(FPR, GPR_ZERO_DEREF_SAFE), outs=(),
         instp=IsEqual(Store.offset, 0),
         clobbers_flags=False,
         emit='''
diff --git a/lib/cretonne/meta/isa/intel/settings.py b/lib/cretonne/meta/isa/intel/settings.py
index 5817c48c0e..c62012e0c1 100644
--- a/lib/cretonne/meta/isa/intel/settings.py
+++ b/lib/cretonne/meta/isa/intel/settings.py
@@ -11,9 +11,6 @@ ISA.settings = SettingGroup('intel', parent=shared.group)
 
 # The has_* settings here correspond to CPUID bits.
 
-# CPUID.01H:EDX
-has_sse2 = BoolSetting("SSE2: CPUID.01H:EDX.SSE2[bit 26]")
-
 # CPUID.01H:ECX
 has_sse3 = BoolSetting("SSE3: CPUID.01H:ECX.SSE3[bit 0]")
 has_ssse3 = BoolSetting("SSSE3: CPUID.01H:ECX.SSSE3[bit 9]")
@@ -40,9 +37,9 @@ use_lzcnt = And(has_lzcnt)
 
 # Presets corresponding to Intel CPUs.
 
-baseline = Preset(has_sse2)
+baseline = Preset()
 nehalem = Preset(
-        has_sse2, has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt)
+        has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt)
 haswell = Preset(nehalem, has_bmi1, has_lzcnt)
 
 ISA.settings.close(globals())
diff --git a/lib/cretonne/meta/isa/riscv/__init__.py b/lib/cretonne/meta/isa/riscv/__init__.py
index cf61cbdcf5..f40086414d 100644
--- a/lib/cretonne/meta/isa/riscv/__init__.py
+++ b/lib/cretonne/meta/isa/riscv/__init__.py
@@ -2,7 +2,7 @@
 RISC-V Target
 -------------
 
-`RISC-V <http://riscv.org/>`_ is an open instruction set architecture
+`RISC-V <https://riscv.org/>`_ is an open instruction set architecture
 originally developed at UC Berkeley. It is a RISC-style ISA with either a
 32-bit (RV32I) or 64-bit (RV32I) base instruction set and a number of optional
 extensions:
diff --git a/lib/cretonne/meta/semantics/__init__.py b/lib/cretonne/meta/semantics/__init__.py
index 94e32b652c..8a55b1d595 100644
--- a/lib/cretonne/meta/semantics/__init__.py
+++ b/lib/cretonne/meta/semantics/__init__.py
@@ -17,7 +17,7 @@ def verify_semantics(inst, src, xforms):
     # type: (Instruction, Rtl, InstructionSemantics) -> None
     """
     Verify that the semantics transforms in xforms correctly describe the
-    instruction described by the src Rtl.  This involves checking that:
+    instruction described by the src Rtl. This involves checking that:
         0) src is a single instance of inst
         1) For all x\in xforms x.src is a single instance of inst
         2) For any concrete values V of Literals in inst:
diff --git a/lib/cretonne/src/abi.rs b/lib/cretonne/src/abi.rs
index 45840d63db..b69821e657 100644
--- a/lib/cretonne/src/abi.rs
+++ b/lib/cretonne/src/abi.rs
@@ -12,7 +12,7 @@ use std::vec::Vec;
 ///
 /// An argument may go through a sequence of legalization steps before it reaches the final
 /// `Assign` action.
-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Debug)]
 pub enum ArgAction {
     /// Assign the argument to the given location.
     Assign(ArgumentLoc),
@@ -151,7 +151,7 @@ pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
     match have_bits.cmp(&arg_bits) {
         // We have fewer bits than the ABI argument.
         Ordering::Less => {
-            assert!(
+            debug_assert!(
                 have.is_int() && arg.value_type.is_int(),
                 "Can only extend integer values"
             );
@@ -164,8 +164,8 @@ pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
         // We have the same number of bits as the argument.
         Ordering::Equal => {
             // This must be an integer vector that is split and then extended.
-            assert!(arg.value_type.is_int());
-            assert!(have.is_vector());
+            debug_assert!(arg.value_type.is_int());
+            debug_assert!(have.is_vector());
             ValueConversion::VectorSplit
         }
         // We have more bits than the argument.
diff --git a/lib/cretonne/src/bforest/node.rs b/lib/cretonne/src/bforest/node.rs
index 779554f0c8..d5c46f54bd 100644
--- a/lib/cretonne/src/bforest/node.rs
+++ b/lib/cretonne/src/bforest/node.rs
@@ -54,8 +54,8 @@ impl<F: Forest> Clone for NodeData<F> {
 impl<F: Forest> NodeData<F> {
     /// Is this a free/unused node?
     pub fn is_free(&self) -> bool {
-        match self {
-            &NodeData::Free { .. } => true,
+        match *self {
+            NodeData::Free { .. } => true,
             _ => false,
         }
     }
@@ -65,10 +65,10 @@ impl<F: Forest> NodeData<F> {
     /// This is the number of outgoing edges in an inner node, or the number of key-value pairs in
     /// a leaf node.
     pub fn entries(&self) -> usize {
-        match self {
-            &NodeData::Inner { size, .. } => usize::from(size) + 1,
-            &NodeData::Leaf { size, .. } => usize::from(size),
-            &NodeData::Free { .. } => panic!("freed node"),
+        match *self {
+            NodeData::Inner { size, .. } => usize::from(size) + 1,
+            NodeData::Leaf { size, .. } => usize::from(size),
+            NodeData::Free { .. } => panic!("freed node"),
         }
     }
 
@@ -96,8 +96,8 @@ impl<F: Forest> NodeData<F> {
 
     /// Unwrap an inner node into two slices (keys, trees).
     pub fn unwrap_inner(&self) -> (&[F::Key], &[Node]) {
-        match self {
-            &NodeData::Inner {
+        match *self {
+            NodeData::Inner {
                 size,
                 ref keys,
                 ref tree,
@@ -113,8 +113,8 @@ impl<F: Forest> NodeData<F> {
 
     /// Unwrap a leaf node into two slices (keys, values) of the same length.
     pub fn unwrap_leaf(&self) -> (&[F::Key], &[F::Value]) {
-        match self {
-            &NodeData::Leaf {
+        match *self {
+            NodeData::Leaf {
                 size,
                 ref keys,
                 ref vals,
@@ -132,8 +132,8 @@ impl<F: Forest> NodeData<F> {
 
     /// Unwrap a mutable leaf node into two slices (keys, values) of the same length.
     pub fn unwrap_leaf_mut(&mut self) -> (&mut [F::Key], &mut [F::Value]) {
-        match self {
-            &mut NodeData::Leaf {
+        match *self {
+            NodeData::Leaf {
                 size,
                 ref mut keys,
                 ref mut vals,
@@ -152,8 +152,8 @@ impl<F: Forest> NodeData<F> {
     /// Get the critical key for a leaf node.
     /// This is simply the first key.
     pub fn leaf_crit_key(&self) -> F::Key {
-        match self {
-            &NodeData::Leaf { size, ref keys, .. } => {
+        match *self {
+            NodeData::Leaf { size, ref keys, .. } => {
                 debug_assert!(size > 0, "Empty leaf node");
                 keys.borrow()[0]
             }
@@ -165,8 +165,8 @@ impl<F: Forest> NodeData<F> {
     /// This means that `key` is inserted at `keys[i]` and `node` is inserted at `tree[i + 1]`.
     /// If the node is full, this leaves the node unchanged and returns false.
     pub fn try_inner_insert(&mut self, index: usize, key: F::Key, node: Node) -> bool {
-        match self {
-            &mut NodeData::Inner {
+        match *self {
+            NodeData::Inner {
                 ref mut size,
                 ref mut keys,
                 ref mut tree,
@@ -191,8 +191,8 @@ impl<F: Forest> NodeData<F> {
     /// Try to insert `key, value` at `index` in a leaf node, but fail and return false if the node
     /// is full.
     pub fn try_leaf_insert(&mut self, index: usize, key: F::Key, value: F::Value) -> bool {
-        match self {
-            &mut NodeData::Leaf {
+        match *self {
+            NodeData::Leaf {
                 ref mut size,
                 ref mut keys,
                 ref mut vals,
@@ -222,8 +222,8 @@ impl<F: Forest> NodeData<F> {
     /// The `insert_index` parameter is the position where an insertion was tried and failed. The
     /// node will be split in half with a bias towards an even split after the insertion is retried.
     pub fn split(&mut self, insert_index: usize) -> SplitOff<F> {
-        match self {
-            &mut NodeData::Inner {
+        match *self {
+            NodeData::Inner {
                 ref mut size,
                 ref keys,
                 ref tree,
@@ -262,7 +262,7 @@ impl<F: Forest> NodeData<F> {
                     },
                 }
             }
-            &mut NodeData::Leaf {
+            NodeData::Leaf {
                 ref mut size,
                 ref keys,
                 ref vals,
@@ -307,8 +307,8 @@ impl<F: Forest> NodeData<F> {
     ///
     /// Return an indication of the node's health (i.e. below half capacity).
     pub fn inner_remove(&mut self, index: usize) -> Removed {
-        match self {
-            &mut NodeData::Inner {
+        match *self {
+            NodeData::Inner {
                 ref mut size,
                 ref mut keys,
                 ref mut tree,
@@ -332,8 +332,8 @@ impl<F: Forest> NodeData<F> {
     ///
     /// Return an indication of the node's health (i.e. below half capacity).
     pub fn leaf_remove(&mut self, index: usize) -> Removed {
-        match self {
-            &mut NodeData::Leaf {
+        match *self {
+            NodeData::Leaf {
                 ref mut size,
                 ref mut keys,
                 ref mut vals,
@@ -553,15 +553,15 @@ where
     F::Value: ValDisp,
 {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            &NodeData::Inner { size, keys, tree } => {
+        match *self {
+            NodeData::Inner { size, keys, tree } => {
                 write!(f, "[ {}", tree[0])?;
                 for i in 0..usize::from(size) {
                     write!(f, " {} {}", keys[i], tree[i + 1])?;
                 }
                 write!(f, " ]")
             }
-            &NodeData::Leaf { size, keys, vals } => {
+            NodeData::Leaf { size, keys, vals } => {
                 let keys = keys.borrow();
                 let vals = vals.borrow();
                 write!(f, "[")?;
@@ -571,8 +571,8 @@ where
                 }
                 write!(f, " ]")
             }
-            &NodeData::Free { next: Some(n) } => write!(f, "[ free -> {} ]", n),
-            &NodeData::Free { next: None } => write!(f, "[ free ]"),
+            NodeData::Free { next: Some(n) } => write!(f, "[ free -> {} ]", n),
+            NodeData::Free { next: None } => write!(f, "[ free ]"),
         }
     }
 }
diff --git a/lib/cretonne/src/bforest/path.rs b/lib/cretonne/src/bforest/path.rs
index 3393014fee..1add8f6e63 100644
--- a/lib/cretonne/src/bforest/path.rs
+++ b/lib/cretonne/src/bforest/path.rs
@@ -285,7 +285,7 @@ impl<F: Forest> Path<F> {
     fn split_and_insert(&mut self, mut key: F::Key, value: F::Value, pool: &mut NodePool<F>) {
         let orig_root = self.node[0];
 
-        // Loop invariant: We need to split the  node at `level` and then retry a failed insertion.
+        // Loop invariant: We need to split the node at `level` and then retry a failed insertion.
         // The items to insert are either `(key, ins_node)` or `(key, value)`.
         let mut ins_node = None;
         let mut split;
@@ -316,7 +316,8 @@ impl<F: Forest> Path<F> {
             // Now that we have a not-full node, it must be possible to insert.
             match ins_node {
                 None => {
-                    assert!(pool[node].try_leaf_insert(entry, key, value));
+                    let inserted = pool[node].try_leaf_insert(entry, key, value);
+                    debug_assert!(inserted);
                     // If we inserted at the front of the new rhs_node leaf, we need to propagate
                     // the inserted key as the critical key instead of the previous front key.
                     if entry == 0 && node == rhs_node {
@@ -324,7 +325,8 @@ impl<F: Forest> Path<F> {
                     }
                 }
                 Some(n) => {
-                    assert!(pool[node].try_inner_insert(entry, key, n));
+                    let inserted = pool[node].try_inner_insert(entry, key, n);
+                    debug_assert!(inserted);
                     // The lower level was moved to the new RHS node, so make sure that is
                     // reflected here.
                     if n == self.node[level + 1] {
diff --git a/lib/cretonne/src/binemit/mod.rs b/lib/cretonne/src/binemit/mod.rs
index 713d25bed5..bca1f6fa4d 100644
--- a/lib/cretonne/src/binemit/mod.rs
+++ b/lib/cretonne/src/binemit/mod.rs
@@ -110,7 +110,7 @@ where
     let mut divert = RegDiversions::new();
     for ebb in func.layout.ebbs() {
         divert.clear();
-        assert_eq!(func.offsets[ebb], sink.offset());
+        debug_assert_eq!(func.offsets[ebb], sink.offset());
         for inst in func.layout.ebb_insts(ebb) {
             emit_inst(func, inst, &mut divert, sink);
         }
diff --git a/lib/cretonne/src/binemit/relaxation.rs b/lib/cretonne/src/binemit/relaxation.rs
index 94835b736e..b0bf6196c6 100644
--- a/lib/cretonne/src/binemit/relaxation.rs
+++ b/lib/cretonne/src/binemit/relaxation.rs
@@ -60,7 +60,7 @@ pub fn relax_branches(func: &mut Function, isa: &TargetIsa) -> Result<CodeOffset
         while let Some(ebb) = cur.next_ebb() {
             // Record the offset for `ebb` and make sure we iterate until offsets are stable.
             if cur.func.offsets[ebb] != offset {
-                assert!(
+                debug_assert!(
                     cur.func.offsets[ebb] < offset,
                     "Code shrinking during relaxation"
                 );
@@ -111,7 +111,7 @@ fn fallthroughs(func: &mut Function) {
                 Opcode::Fallthrough => {
                     // Somebody used a fall-through instruction before the branch relaxation pass.
                     // Make sure it is correct, i.e. the destination is the layout successor.
-                    assert_eq!(destination, succ, "Illegal fall-through in {}", ebb)
+                    debug_assert_eq!(destination, succ, "Illegal fall-through in {}", ebb)
                 }
                 Opcode::Jump => {
                     // If this is a jump to the successor EBB, change it to a fall-through.
@@ -152,13 +152,23 @@ fn relax_branch(
     if let Some(enc) = isa.legal_encodings(dfg, &dfg[inst], ctrl_type).find(
         |&enc| {
             let range = encinfo.branch_range(enc).expect("Branch with no range");
-            let in_range = range.contains(offset, dest_offset);
-            dbg!(
-                "  trying [{}]: {}",
-                encinfo.display(enc),
-                if in_range { "OK" } else { "out of range" }
-            );
-            in_range
+            if !range.contains(offset, dest_offset) {
+                dbg!("  trying [{}]: out of range", encinfo.display(enc));
+                false
+            } else if encinfo.operand_constraints(enc) !=
+                       encinfo.operand_constraints(cur.func.encodings[inst])
+            {
+                // Conservatively give up if the encoding has different constraints
+                // than the original, so that we don't risk picking a new encoding
+                // which the existing operands don't satisfy. We can't check for
+                // validity directly because we don't have a RegDiversions active so
+                // we don't know which registers are actually in use.
+                dbg!("  trying [{}]: constraints differ", encinfo.display(enc));
+                false
+            } else {
+                dbg!("  trying [{}]: OK", encinfo.display(enc));
+                true
+            }
         },
     )
     {
diff --git a/lib/cretonne/src/bitset.rs b/lib/cretonne/src/bitset.rs
index 62e0286bb3..f212790be0 100644
--- a/lib/cretonne/src/bitset.rs
+++ b/lib/cretonne/src/bitset.rs
@@ -36,8 +36,8 @@ where
 
     /// Check if this BitSet contains the number num
     pub fn contains(&self, num: u8) -> bool {
-        assert!((num as usize) < Self::bits());
-        assert!((num as usize) < Self::max_bits());
+        debug_assert!((num as usize) < Self::bits());
+        debug_assert!((num as usize) < Self::max_bits());
         self.0.into() & (1 << num) != 0
     }
 
@@ -62,8 +62,8 @@ where
 
     /// Construct a BitSet with the half-open range [lo,hi) filled in
     pub fn from_range(lo: u8, hi: u8) -> Self {
-        assert!(lo <= hi);
-        assert!((hi as usize) <= Self::bits());
+        debug_assert!(lo <= hi);
+        debug_assert!((hi as usize) <= Self::bits());
         let one: T = T::from(1);
         // I can't just do (one << hi) - one here as the shift may overflow
         let hi_rng = if hi >= 1 {
diff --git a/lib/cretonne/src/context.rs b/lib/cretonne/src/context.rs
index 8aff5aa706..0eb7ff5c05 100644
--- a/lib/cretonne/src/context.rs
+++ b/lib/cretonne/src/context.rs
@@ -18,11 +18,12 @@ use isa::TargetIsa;
 use legalize_function;
 use regalloc;
 use result::{CtonError, CtonResult};
-use settings::FlagsOrIsa;
+use settings::{FlagsOrIsa, OptLevel};
 use unreachable_code::eliminate_unreachable_code;
 use verifier;
 use simple_gvn::do_simple_gvn;
 use licm::do_licm;
+use preopt::do_preopt;
 use timing;
 
 /// Persistent data structures and compilation pipeline.
@@ -87,15 +88,14 @@ impl Context {
         self.verify_if(isa)?;
 
         self.compute_cfg();
+        self.preopt(isa)?;
         self.legalize(isa)?;
-        /* TODO: Enable additional optimization passes.
         if isa.flags().opt_level() == OptLevel::Best {
             self.compute_domtree();
             self.compute_loop_analysis();
             self.licm(isa)?;
             self.simple_gvn(isa)?;
         }
-        */
         self.compute_domtree();
         self.eliminate_unreachable_code(isa)?;
         self.regalloc(isa)?;
@@ -131,6 +131,27 @@ impl Context {
         }
     }
 
+    /// Run the locations verifier on the function.
+    pub fn verify_locations<'a>(&self, isa: &TargetIsa) -> verifier::Result {
+        verifier::verify_locations(isa, &self.func, None)
+    }
+
+    /// Run the locations verifier only if the `enable_verifier` setting is true.
+    pub fn verify_locations_if<'a>(&self, isa: &TargetIsa) -> CtonResult {
+        if isa.flags().enable_verifier() {
+            self.verify_locations(isa).map_err(Into::into)
+        } else {
+            Ok(())
+        }
+    }
+
+    /// Perform pre-legalization rewrites on the function.
+    pub fn preopt(&mut self, isa: &TargetIsa) -> CtonResult {
+        do_preopt(&mut self.func);
+        self.verify_if(isa)?;
+        Ok(())
+    }
+
     /// Run the legalizer for `isa` on the function.
     pub fn legalize(&mut self, isa: &TargetIsa) -> CtonResult {
         // Legalization invalidates the domtree and loop_analysis by mutating the CFG.
@@ -205,13 +226,16 @@ impl Context {
     /// Insert prologue and epilogues after computing the stack frame layout.
     pub fn prologue_epilogue(&mut self, isa: &TargetIsa) -> CtonResult {
         isa.prologue_epilogue(&mut self.func)?;
-        self.verify_if(isa)
+        self.verify_if(isa)?;
+        self.verify_locations_if(isa)?;
+        Ok(())
     }
 
     /// Run the branch relaxation pass and return the final code size.
     pub fn relax_branches(&mut self, isa: &TargetIsa) -> Result<CodeOffset, CtonError> {
         let code_size = relax_branches(&mut self.func, isa)?;
         self.verify_if(isa)?;
+        self.verify_locations_if(isa)?;
 
         Ok(code_size)
     }
diff --git a/lib/cretonne/src/cursor.rs b/lib/cretonne/src/cursor.rs
index 56ad657043..ed2528d5be 100644
--- a/lib/cretonne/src/cursor.rs
+++ b/lib/cretonne/src/cursor.rs
@@ -256,7 +256,7 @@ pub trait Cursor {
     /// Go to a specific instruction which must be inserted in the layout.
     /// New instructions will be inserted before `inst`.
     fn goto_inst(&mut self, inst: ir::Inst) {
-        assert!(self.layout().inst_ebb(inst).is_some());
+        debug_assert!(self.layout().inst_ebb(inst).is_some());
         self.set_position(CursorPosition::At(inst));
     }
 
@@ -287,14 +287,14 @@ pub trait Cursor {
     /// At this position, instructions cannot be inserted, but `next_inst()` will move to the first
     /// instruction in `ebb`.
     fn goto_top(&mut self, ebb: ir::Ebb) {
-        assert!(self.layout().is_ebb_inserted(ebb));
+        debug_assert!(self.layout().is_ebb_inserted(ebb));
         self.set_position(CursorPosition::Before(ebb));
     }
 
     /// Go to the bottom of `ebb` which must be inserted into the layout.
     /// At this position, inserted instructions will be appended to `ebb`.
     fn goto_bottom(&mut self, ebb: ir::Ebb) {
-        assert!(self.layout().is_ebb_inserted(ebb));
+        debug_assert!(self.layout().is_ebb_inserted(ebb));
         self.set_position(CursorPosition::After(ebb));
     }
 
diff --git a/lib/cretonne/src/divconst_magic_numbers.rs b/lib/cretonne/src/divconst_magic_numbers.rs
new file mode 100644
index 0000000000..b046e22411
--- /dev/null
+++ b/lib/cretonne/src/divconst_magic_numbers.rs
@@ -0,0 +1,542 @@
+//! Compute "magic numbers" for division-by-constants transformations.
+
+#![allow(non_snake_case)]
+
+//----------------------------------------------------------------------
+//
+// Math helpers for division by (non-power-of-2) constants. This is based
+// on the presentation in "Hacker's Delight" by Henry Warren, 2003. There
+// are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size
+// makes little difference, but the signed-vs-unsigned aspect has a large
+// effect. Therefore everything is presented in the order U32 U64 S32 S64
+// so as to emphasise the similarity of the U32 and U64 cases and the S32
+// and S64 cases.
+
+// Structures to hold the "magic numbers" computed.
+
+#[derive(PartialEq, Debug)]
+pub struct MU32 {
+    pub mulBy: u32,
+    pub doAdd: bool,
+    pub shiftBy: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MU64 {
+    pub mulBy: u64,
+    pub doAdd: bool,
+    pub shiftBy: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MS32 {
+    pub mulBy: i32,
+    pub shiftBy: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MS64 {
+    pub mulBy: i64,
+    pub shiftBy: i32,
+}
+
+// The actual "magic number" generators follow.
+
+pub fn magicU32(d: u32) -> MU32 {
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
+
+    let mut do_add: bool = false;
+    let mut p: i32 = 31;
+    let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d;
+    let mut q1: u32 = 0x80000000u32 / nc;
+    let mut r1: u32 = 0x80000000u32 - q1 * nc;
+    let mut q2: u32 = 0x7FFFFFFFu32 / d;
+    let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d;
+    loop {
+        p = p + 1;
+        if r1 >= nc - r1 {
+            q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1);
+            r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc);
+        } else {
+            q1 = 2 * q1;
+            r1 = 2 * r1;
+        }
+        if r2 + 1 >= d - r2 {
+            if q2 >= 0x7FFFFFFFu32 {
+                do_add = true;
+            }
+            q2 = 2 * q2 + 1;
+            r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d);
+        } else {
+            if q2 >= 0x80000000u32 {
+                do_add = true;
+            }
+            q2 = u32::wrapping_mul(2, q2);
+            r2 = 2 * r2 + 1;
+        }
+        let delta: u32 = d - 1 - r2;
+        if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) {
+            break;
+        }
+    }
+
+    MU32 {
+        mulBy: q2 + 1,
+        doAdd: do_add,
+        shiftBy: p - 32,
+    }
+}
+
+pub fn magicU64(d: u64) -> MU64 {
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
+
+    let mut do_add: bool = false;
+    let mut p: i32 = 63;
+    let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d;
+    let mut q1: u64 = 0x8000000000000000u64 / nc;
+    let mut r1: u64 = 0x8000000000000000u64 - q1 * nc;
+    let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d;
+    let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d;
+    loop {
+        p = p + 1;
+        if r1 >= nc - r1 {
+            q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1);
+            r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc);
+        } else {
+            q1 = 2 * q1;
+            r1 = 2 * r1;
+        }
+        if r2 + 1 >= d - r2 {
+            if q2 >= 0x7FFFFFFFFFFFFFFFu64 {
+                do_add = true;
+            }
+            q2 = 2 * q2 + 1;
+            r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d);
+        } else {
+            if q2 >= 0x8000000000000000u64 {
+                do_add = true;
+            }
+            q2 = u64::wrapping_mul(2, q2);
+            r2 = 2 * r2 + 1;
+        }
+        let delta: u64 = d - 1 - r2;
+        if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) {
+            break;
+        }
+    }
+
+    MU64 {
+        mulBy: q2 + 1,
+        doAdd: do_add,
+        shiftBy: p - 64,
+    }
+}
+
+pub fn magicS32(d: i32) -> MS32 {
+    debug_assert_ne!(d, -1);
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1);
+    let two31: u32 = 0x80000000u32;
+    let mut p: i32 = 31;
+    let ad: u32 = i32::wrapping_abs(d) as u32;
+    let t: u32 = two31 + ((d as u32) >> 31);
+    let anc: u32 = u32::wrapping_sub(t - 1, t % ad);
+    let mut q1: u32 = two31 / anc;
+    let mut r1: u32 = two31 - q1 * anc;
+    let mut q2: u32 = two31 / ad;
+    let mut r2: u32 = two31 - q2 * ad;
+    loop {
+        p = p + 1;
+        q1 = 2 * q1;
+        r1 = 2 * r1;
+        if r1 >= anc {
+            q1 = q1 + 1;
+            r1 = r1 - anc;
+        }
+        q2 = 2 * q2;
+        r2 = 2 * r2;
+        if r2 >= ad {
+            q2 = q2 + 1;
+            r2 = r2 - ad;
+        }
+        let delta: u32 = ad - r2;
+        if !(q1 < delta || (q1 == delta && r1 == 0)) {
+            break;
+        }
+    }
+
+    MS32 {
+        mulBy: (if d < 0 {
+                    u32::wrapping_neg(q2 + 1)
+                } else {
+                    q2 + 1
+                }) as i32,
+        shiftBy: p - 32,
+    }
+}
+
+pub fn magicS64(d: i64) -> MS64 {
+    debug_assert_ne!(d, -1);
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1);
+    let two63: u64 = 0x8000000000000000u64;
+    let mut p: i32 = 63;
+    let ad: u64 = i64::wrapping_abs(d) as u64;
+    let t: u64 = two63 + ((d as u64) >> 63);
+    let anc: u64 = u64::wrapping_sub(t - 1, t % ad);
+    let mut q1: u64 = two63 / anc;
+    let mut r1: u64 = two63 - q1 * anc;
+    let mut q2: u64 = two63 / ad;
+    let mut r2: u64 = two63 - q2 * ad;
+    loop {
+        p = p + 1;
+        q1 = 2 * q1;
+        r1 = 2 * r1;
+        if r1 >= anc {
+            q1 = q1 + 1;
+            r1 = r1 - anc;
+        }
+        q2 = 2 * q2;
+        r2 = 2 * r2;
+        if r2 >= ad {
+            q2 = q2 + 1;
+            r2 = r2 - ad;
+        }
+        let delta: u64 = ad - r2;
+        if !(q1 < delta || (q1 == delta && r1 == 0)) {
+            break;
+        }
+    }
+
+    MS64 {
+        mulBy: (if d < 0 {
+                    u64::wrapping_neg(q2 + 1)
+                } else {
+                    q2 + 1
+                }) as i64,
+        shiftBy: p - 64,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{magicU32, magicU64, magicS32, magicS64};
+    use super::{MU32, MU64, MS32, MS64};
+
+    fn mkMU32(mulBy: u32, doAdd: bool, shiftBy: i32) -> MU32 {
+        MU32 {
+            mulBy,
+            doAdd,
+            shiftBy,
+        }
+    }
+
+    fn mkMU64(mulBy: u64, doAdd: bool, shiftBy: i32) -> MU64 {
+        MU64 {
+            mulBy,
+            doAdd,
+            shiftBy,
+        }
+    }
+
+    fn mkMS32(mulBy: i32, shiftBy: i32) -> MS32 {
+        MS32 { mulBy, shiftBy }
+    }
+
+    fn mkMS64(mulBy: i64, shiftBy: i32) -> MS64 {
+        MS64 { mulBy, shiftBy }
+    }
+
+    #[test]
+    fn test_magicU32() {
+        assert_eq!(magicU32(2u32), mkMU32(0x80000000u32, false, 0));
+        assert_eq!(magicU32(3u32), mkMU32(0xaaaaaaabu32, false, 1));
+        assert_eq!(magicU32(4u32), mkMU32(0x40000000u32, false, 0));
+        assert_eq!(magicU32(5u32), mkMU32(0xcccccccdu32, false, 2));
+        assert_eq!(magicU32(6u32), mkMU32(0xaaaaaaabu32, false, 2));
+        assert_eq!(magicU32(7u32), mkMU32(0x24924925u32, true, 3));
+        assert_eq!(magicU32(9u32), mkMU32(0x38e38e39u32, false, 1));
+        assert_eq!(magicU32(10u32), mkMU32(0xcccccccdu32, false, 3));
+        assert_eq!(magicU32(11u32), mkMU32(0xba2e8ba3u32, false, 3));
+        assert_eq!(magicU32(12u32), mkMU32(0xaaaaaaabu32, false, 3));
+        assert_eq!(magicU32(25u32), mkMU32(0x51eb851fu32, false, 3));
+        assert_eq!(magicU32(125u32), mkMU32(0x10624dd3u32, false, 3));
+        assert_eq!(magicU32(625u32), mkMU32(0xd1b71759u32, false, 9));
+        assert_eq!(magicU32(1337u32), mkMU32(0x88233b2bu32, true, 11));
+        assert_eq!(magicU32(65535u32), mkMU32(0x80008001u32, false, 15));
+        assert_eq!(magicU32(65536u32), mkMU32(0x00010000u32, false, 0));
+        assert_eq!(magicU32(65537u32), mkMU32(0xffff0001u32, false, 16));
+        assert_eq!(magicU32(31415927u32), mkMU32(0x445b4553u32, false, 23));
+        assert_eq!(magicU32(0xdeadbeefu32), mkMU32(0x93275ab3u32, false, 31));
+        assert_eq!(magicU32(0xfffffffdu32), mkMU32(0x40000001u32, false, 30));
+        assert_eq!(magicU32(0xfffffffeu32), mkMU32(0x00000003u32, true, 32));
+        assert_eq!(magicU32(0xffffffffu32), mkMU32(0x80000001u32, false, 31));
+    }
+    #[test]
+    fn test_magicU64() {
+        assert_eq!(magicU64(2u64), mkMU64(0x8000000000000000u64, false, 0));
+        assert_eq!(magicU64(3u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 1));
+        assert_eq!(magicU64(4u64), mkMU64(0x4000000000000000u64, false, 0));
+        assert_eq!(magicU64(5u64), mkMU64(0xcccccccccccccccdu64, false, 2));
+        assert_eq!(magicU64(6u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 2));
+        assert_eq!(magicU64(7u64), mkMU64(0x2492492492492493u64, true, 3));
+        assert_eq!(magicU64(9u64), mkMU64(0xe38e38e38e38e38fu64, false, 3));
+        assert_eq!(magicU64(10u64), mkMU64(0xcccccccccccccccdu64, false, 3));
+        assert_eq!(magicU64(11u64), mkMU64(0x2e8ba2e8ba2e8ba3u64, false, 1));
+        assert_eq!(magicU64(12u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 3));
+        assert_eq!(magicU64(25u64), mkMU64(0x47ae147ae147ae15u64, true, 5));
+        assert_eq!(magicU64(125u64), mkMU64(0x0624dd2f1a9fbe77u64, true, 7));
+        assert_eq!(magicU64(625u64), mkMU64(0x346dc5d63886594bu64, false, 7));
+        assert_eq!(magicU64(1337u64), mkMU64(0xc4119d952866a139u64, false, 10));
+        assert_eq!(
+            magicU64(31415927u64),
+            mkMU64(0x116d154b9c3d2f85u64, true, 25)
+        );
+        assert_eq!(
+            magicU64(0x00000000deadbeefu64),
+            mkMU64(0x93275ab2dfc9094bu64, false, 31)
+        );
+        assert_eq!(
+            magicU64(0x00000000fffffffdu64),
+            mkMU64(0x8000000180000005u64, false, 31)
+        );
+        assert_eq!(
+            magicU64(0x00000000fffffffeu64),
+            mkMU64(0x0000000200000005u64, true, 32)
+        );
+        assert_eq!(
+            magicU64(0x00000000ffffffffu64),
+            mkMU64(0x8000000080000001u64, false, 31)
+        );
+        assert_eq!(
+            magicU64(0x0000000100000000u64),
+            mkMU64(0x0000000100000000u64, false, 0)
+        );
+        assert_eq!(
+            magicU64(0x0000000100000001u64),
+            mkMU64(0xffffffff00000001u64, false, 32)
+        );
+        assert_eq!(
+            magicU64(0x0ddc0ffeebadf00du64),
+            mkMU64(0x2788e9d394b77da1u64, true, 60)
+        );
+        assert_eq!(
+            magicU64(0xfffffffffffffffdu64),
+            mkMU64(0x4000000000000001u64, false, 62)
+        );
+        assert_eq!(
+            magicU64(0xfffffffffffffffeu64),
+            mkMU64(0x0000000000000003u64, true, 64)
+        );
+        assert_eq!(
+            magicU64(0xffffffffffffffffu64),
+            mkMU64(0x8000000000000001u64, false, 63)
+        );
+    }
+    #[test]
+    fn test_magicS32() {
+        assert_eq!(magicS32(-0x80000000i32), mkMS32(0x7fffffffu32 as i32, 30));
+        assert_eq!(magicS32(-0x7FFFFFFFi32), mkMS32(0xbfffffffu32 as i32, 29));
+        assert_eq!(magicS32(-0x7FFFFFFEi32), mkMS32(0x7ffffffdu32 as i32, 30));
+        assert_eq!(magicS32(-31415927i32), mkMS32(0xbba4baadu32 as i32, 23));
+        assert_eq!(magicS32(-1337i32), mkMS32(0x9df73135u32 as i32, 9));
+        assert_eq!(magicS32(-256i32), mkMS32(0x7fffffffu32 as i32, 7));
+        assert_eq!(magicS32(-5i32), mkMS32(0x99999999u32 as i32, 1));
+        assert_eq!(magicS32(-3i32), mkMS32(0x55555555u32 as i32, 1));
+        assert_eq!(magicS32(-2i32), mkMS32(0x7fffffffu32 as i32, 0));
+        assert_eq!(magicS32(2i32), mkMS32(0x80000001u32 as i32, 0));
+        assert_eq!(magicS32(3i32), mkMS32(0x55555556u32 as i32, 0));
+        assert_eq!(magicS32(4i32), mkMS32(0x80000001u32 as i32, 1));
+        assert_eq!(magicS32(5i32), mkMS32(0x66666667u32 as i32, 1));
+        assert_eq!(magicS32(6i32), mkMS32(0x2aaaaaabu32 as i32, 0));
+        assert_eq!(magicS32(7i32), mkMS32(0x92492493u32 as i32, 2));
+        assert_eq!(magicS32(9i32), mkMS32(0x38e38e39u32 as i32, 1));
+        assert_eq!(magicS32(10i32), mkMS32(0x66666667u32 as i32, 2));
+        assert_eq!(magicS32(11i32), mkMS32(0x2e8ba2e9u32 as i32, 1));
+        assert_eq!(magicS32(12i32), mkMS32(0x2aaaaaabu32 as i32, 1));
+        assert_eq!(magicS32(25i32), mkMS32(0x51eb851fu32 as i32, 3));
+        assert_eq!(magicS32(125i32), mkMS32(0x10624dd3u32 as i32, 3));
+        assert_eq!(magicS32(625i32), mkMS32(0x68db8badu32 as i32, 8));
+        assert_eq!(magicS32(1337i32), mkMS32(0x6208cecbu32 as i32, 9));
+        assert_eq!(magicS32(31415927i32), mkMS32(0x445b4553u32 as i32, 23));
+        assert_eq!(magicS32(0x7ffffffei32), mkMS32(0x80000003u32 as i32, 30));
+        assert_eq!(magicS32(0x7fffffffi32), mkMS32(0x40000001u32 as i32, 29));
+    }
+    #[test]
+    fn test_magicS64() {
+        assert_eq!(
+            magicS64(-0x8000000000000000i64),
+            mkMS64(0x7fffffffffffffffu64 as i64, 62)
+        );
+        assert_eq!(
+            magicS64(-0x7FFFFFFFFFFFFFFFi64),
+            mkMS64(0xbfffffffffffffffu64 as i64, 61)
+        );
+        assert_eq!(
+            magicS64(-0x7FFFFFFFFFFFFFFEi64),
+            mkMS64(0x7ffffffffffffffdu64 as i64, 62)
+        );
+        assert_eq!(
+            magicS64(-0x0ddC0ffeeBadF00di64),
+            mkMS64(0x6c3b8b1635a4412fu64 as i64, 59)
+        );
+        assert_eq!(
+            magicS64(-0x100000001i64),
+            mkMS64(0x800000007fffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0x100000000i64),
+            mkMS64(0x7fffffffffffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xFFFFFFFFi64),
+            mkMS64(0x7fffffff7fffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xFFFFFFFEi64),
+            mkMS64(0x7ffffffefffffffdu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xFFFFFFFDi64),
+            mkMS64(0x7ffffffe7ffffffbu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xDeadBeefi64),
+            mkMS64(0x6cd8a54d2036f6b5u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-31415927i64),
+            mkMS64(0x7749755a31e1683du64 as i64, 24)
+        );
+        assert_eq!(magicS64(-1337i64), mkMS64(0x9df731356bccaf63u64 as i64, 9));
+        assert_eq!(magicS64(-256i64), mkMS64(0x7fffffffffffffffu64 as i64, 7));
+        assert_eq!(magicS64(-5i64), mkMS64(0x9999999999999999u64 as i64, 1));
+        assert_eq!(magicS64(-3i64), mkMS64(0x5555555555555555u64 as i64, 1));
+        assert_eq!(magicS64(-2i64), mkMS64(0x7fffffffffffffffu64 as i64, 0));
+        assert_eq!(magicS64(2i64), mkMS64(0x8000000000000001u64 as i64, 0));
+        assert_eq!(magicS64(3i64), mkMS64(0x5555555555555556u64 as i64, 0));
+        assert_eq!(magicS64(4i64), mkMS64(0x8000000000000001u64 as i64, 1));
+        assert_eq!(magicS64(5i64), mkMS64(0x6666666666666667u64 as i64, 1));
+        assert_eq!(magicS64(6i64), mkMS64(0x2aaaaaaaaaaaaaabu64 as i64, 0));
+        assert_eq!(magicS64(7i64), mkMS64(0x4924924924924925u64 as i64, 1));
+        assert_eq!(magicS64(9i64), mkMS64(0x1c71c71c71c71c72u64 as i64, 0));
+        assert_eq!(magicS64(10i64), mkMS64(0x6666666666666667u64 as i64, 2));
+        assert_eq!(magicS64(11i64), mkMS64(0x2e8ba2e8ba2e8ba3u64 as i64, 1));
+        assert_eq!(magicS64(12i64), mkMS64(0x2aaaaaaaaaaaaaabu64 as i64, 1));
+        assert_eq!(magicS64(25i64), mkMS64(0xa3d70a3d70a3d70bu64 as i64, 4));
+        assert_eq!(magicS64(125i64), mkMS64(0x20c49ba5e353f7cfu64 as i64, 4));
+        assert_eq!(magicS64(625i64), mkMS64(0x346dc5d63886594bu64 as i64, 7));
+        assert_eq!(magicS64(1337i64), mkMS64(0x6208ceca9433509du64 as i64, 9));
+        assert_eq!(
+            magicS64(31415927i64),
+            mkMS64(0x88b68aa5ce1e97c3u64 as i64, 24)
+        );
+        assert_eq!(
+            magicS64(0x00000000deadbeefi64),
+            mkMS64(0x93275ab2dfc9094bu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x00000000fffffffdi64),
+            mkMS64(0x8000000180000005u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x00000000fffffffei64),
+            mkMS64(0x8000000100000003u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x00000000ffffffffi64),
+            mkMS64(0x8000000080000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x0000000100000000i64),
+            mkMS64(0x8000000000000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x0000000100000001i64),
+            mkMS64(0x7fffffff80000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x0ddc0ffeebadf00di64),
+            mkMS64(0x93c474e9ca5bbed1u64 as i64, 59)
+        );
+        assert_eq!(
+            magicS64(0x7ffffffffffffffdi64),
+            mkMS64(0x2000000000000001u64 as i64, 60)
+        );
+        assert_eq!(
+            magicS64(0x7ffffffffffffffei64),
+            mkMS64(0x8000000000000003u64 as i64, 62)
+        );
+        assert_eq!(
+            magicS64(0x7fffffffffffffffi64),
+            mkMS64(0x4000000000000001u64 as i64, 61)
+        );
+    }
+    #[test]
+    fn test_magic_generators_dont_panic() {
+        // The point of this is to check that the magic number generators
+        // don't panic with integer wraparounds, especially at boundary
+        // cases for their arguments. The actual results are thrown away.
+        let mut total: u64 = 0;
+        println!("Testing UP magicU32");
+        for x in 2..(200 * 1000u32) {
+            let m = magicU32(x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+        println!("Testing DOWN magicU32");
+        for x in 0..(200 * 1000u32) {
+            let m = magicU32(0xFFFF_FFFFu32 - x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+
+        println!("Testing UP magicU64");
+        for x in 2..(200 * 1000u64) {
+            let m = magicU64(x);
+            total = total ^ m.mulBy;
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+        println!("Testing DOWN magicU64");
+        for x in 0..(200 * 1000u64) {
+            let m = magicU64(0xFFFF_FFFF_FFFF_FFFFu64 - x);
+            total = total ^ m.mulBy;
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+
+        println!("Testing UP magicS32");
+        for x in 0..(200 * 1000i32) {
+            let m = magicS32(-0x8000_0000i32 + x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+        println!("Testing DOWN magicS32");
+        for x in 0..(200 * 1000i32) {
+            let m = magicS32(0x7FFF_FFFFi32 - x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+
+        println!("Testing UP magicS64");
+        for x in 0..(200 * 1000i64) {
+            let m = magicS64(-0x8000_0000_0000_0000i64 + x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+        println!("Testing DOWN magicS64");
+        for x in 0..(200 * 1000i64) {
+            let m = magicS64(0x7FFF_FFFF_FFFF_FFFFi64 - x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+        // Force `total` -- and hence, the entire computation -- to
+        // be used, so that rustc can't optimise it out.
+        assert_eq!(total, 7547519887532559585u64);
+    }
+}
diff --git a/lib/cretonne/src/dominator_tree.rs b/lib/cretonne/src/dominator_tree.rs
index 6617085834..823469d081 100644
--- a/lib/cretonne/src/dominator_tree.rs
+++ b/lib/cretonne/src/dominator_tree.rs
@@ -197,7 +197,7 @@ impl DominatorTree {
             }
         }
 
-        assert_eq!(a.0, b.0, "Unreachable block passed to common_dominator?");
+        debug_assert_eq!(a.0, b.0, "Unreachable block passed to common_dominator?");
 
         // We're in the same EBB. The common dominator is the earlier instruction.
         if layout.cmp(a.1, b.1) == Ordering::Less {
@@ -241,7 +241,7 @@ impl DominatorTree {
     pub fn clear(&mut self) {
         self.nodes.clear();
         self.postorder.clear();
-        assert!(self.stack.is_empty());
+        debug_assert!(self.stack.is_empty());
         self.valid = false;
     }
 
@@ -340,7 +340,7 @@ impl DominatorTree {
     /// post-order except for the insertion of the new EBB header at the split point.
     fn push_successors(&mut self, func: &Function, ebb: Ebb) {
         for inst in func.layout.ebb_insts(ebb) {
-            match func.dfg[inst].analyze_branch(&func.dfg.value_lists) {
+            match func.dfg.analyze_branch(inst) {
                 BranchInfo::SingleDest(succ, _) => {
                     if self.nodes[succ].rpo_number == 0 {
                         self.nodes[succ].rpo_number = SEEN;
@@ -539,7 +539,7 @@ impl DominatorTreePreorder {
     /// Recompute this data structure to match `domtree`.
     pub fn compute(&mut self, domtree: &DominatorTree, layout: &Layout) {
         self.nodes.clear();
-        assert_eq!(self.stack.len(), 0);
+        debug_assert_eq!(self.stack.len(), 0);
 
         // Step 1: Populate the child and sibling links.
         //
@@ -557,7 +557,7 @@ impl DominatorTreePreorder {
         }
 
         // Step 2. Assign pre-order numbers from a DFS of the dominator tree.
-        assert!(self.stack.len() <= 1);
+        debug_assert!(self.stack.len() <= 1);
         let mut n = 0;
         while let Some(ebb) = self.stack.pop() {
             n += 1;
diff --git a/lib/cretonne/src/entity/list.rs b/lib/cretonne/src/entity/list.rs
index 58c91d9287..ad03dc6c85 100644
--- a/lib/cretonne/src/entity/list.rs
+++ b/lib/cretonne/src/entity/list.rs
@@ -220,8 +220,8 @@ impl<T: EntityRef> ListPool<T> {
         to_sclass: SizeClass,
         elems_to_copy: usize,
     ) -> usize {
-        assert!(elems_to_copy <= sclass_size(from_sclass));
-        assert!(elems_to_copy <= sclass_size(to_sclass));
+        debug_assert!(elems_to_copy <= sclass_size(from_sclass));
+        debug_assert!(elems_to_copy <= sclass_size(to_sclass));
         let new_block = self.alloc(to_sclass);
 
         if elems_to_copy > 0 {
@@ -302,7 +302,7 @@ impl<T: EntityRef> EntityList<T> {
     pub fn clear(&mut self, pool: &mut ListPool<T>) {
         let idx = self.index as usize;
         match pool.len_of(self) {
-            None => assert_eq!(idx, 0, "Invalid pool"),
+            None => debug_assert_eq!(idx, 0, "Invalid pool"),
             Some(len) => pool.free(idx - 1, sclass_for_length(len)),
         }
         // Switch back to the empty list representation which has no storage.
@@ -323,7 +323,7 @@ impl<T: EntityRef> EntityList<T> {
         match pool.len_of(self) {
             None => {
                 // This is an empty list. Allocate a block and set length=1.
-                assert_eq!(idx, 0, "Invalid pool");
+                debug_assert_eq!(idx, 0, "Invalid pool");
                 let block = pool.alloc(sclass_for_length(1));
                 pool.data[block] = T::new(1);
                 pool.data[block + 1] = element;
@@ -359,7 +359,7 @@ impl<T: EntityRef> EntityList<T> {
         match pool.len_of(self) {
             None => {
                 // This is an empty list. Allocate a block.
-                assert_eq!(idx, 0, "Invalid pool");
+                debug_assert_eq!(idx, 0, "Invalid pool");
                 if count == 0 {
                     return &mut [];
                 }
@@ -410,7 +410,7 @@ impl<T: EntityRef> EntityList<T> {
             }
             tail[0] = element;
         } else {
-            assert_eq!(index, seq.len());
+            debug_assert_eq!(index, seq.len());
         }
     }
 
@@ -420,7 +420,7 @@ impl<T: EntityRef> EntityList<T> {
         {
             let seq = self.as_mut_slice(pool);
             len = seq.len();
-            assert!(index < len);
+            debug_assert!(index < len);
 
             // Copy elements down.
             for i in index..len - 1 {
@@ -450,7 +450,7 @@ impl<T: EntityRef> EntityList<T> {
     /// the list.
     pub fn swap_remove(&mut self, index: usize, pool: &mut ListPool<T>) {
         let len = self.len(pool);
-        assert!(index < len);
+        debug_assert!(index < len);
         if index == len - 1 {
             self.remove(index, pool);
         } else {
diff --git a/lib/cretonne/src/entity/mod.rs b/lib/cretonne/src/entity/mod.rs
index 3cc8ec6f6d..6457e47f56 100644
--- a/lib/cretonne/src/entity/mod.rs
+++ b/lib/cretonne/src/entity/mod.rs
@@ -61,7 +61,7 @@ macro_rules! entity_impl {
     ($entity:ident) => {
         impl $crate::entity::EntityRef for $entity {
             fn new(index: usize) -> Self {
-                assert!(index < (::std::u32::MAX as usize));
+                debug_assert!(index < (::std::u32::MAX as usize));
                 $entity(index as u32)
             }
 
diff --git a/lib/cretonne/src/entity/sparse.rs b/lib/cretonne/src/entity/sparse.rs
index a4836cb9d5..488fd55393 100644
--- a/lib/cretonne/src/entity/sparse.rs
+++ b/lib/cretonne/src/entity/sparse.rs
@@ -150,7 +150,7 @@ where
 
         // There was no previous entry for `key`. Add it to the end of `dense`.
         let idx = self.dense.len();
-        assert!(idx <= u32::MAX as usize, "SparseMap overflow");
+        debug_assert!(idx <= u32::MAX as usize, "SparseMap overflow");
         self.dense.push(value);
         self.sparse[key] = idx as u32;
         None
diff --git a/lib/cretonne/src/flowgraph.rs b/lib/cretonne/src/flowgraph.rs
index 9c0fa09f26..dce60162a4 100644
--- a/lib/cretonne/src/flowgraph.rs
+++ b/lib/cretonne/src/flowgraph.rs
@@ -108,7 +108,7 @@ impl ControlFlowGraph {
 
     fn compute_ebb(&mut self, func: &Function, ebb: Ebb) {
         for inst in func.layout.ebb_insts(ebb) {
-            match func.dfg[inst].analyze_branch(&func.dfg.value_lists) {
+            match func.dfg.analyze_branch(inst) {
                 BranchInfo::SingleDest(dest, _) => {
                     self.add_edge((ebb, inst), dest);
                 }
diff --git a/lib/cretonne/src/ir/dfg.rs b/lib/cretonne/src/ir/dfg.rs
index 2f1cc8763b..344483a6d9 100644
--- a/lib/cretonne/src/ir/dfg.rs
+++ b/lib/cretonne/src/ir/dfg.rs
@@ -217,11 +217,11 @@ impl DataFlowGraph {
     ///
     /// The `dest` value can't be attached to an instruction or EBB.
     pub fn change_to_alias(&mut self, dest: Value, src: Value) {
-        assert!(!self.value_is_attached(dest));
+        debug_assert!(!self.value_is_attached(dest));
         // Try to create short alias chains by finding the original source value.
         // This also avoids the creation of loops.
         let original = self.resolve_aliases(src);
-        assert_ne!(
+        debug_assert_ne!(
             dest,
             original,
             "Aliasing {} to {} would create a loop",
@@ -229,7 +229,7 @@ impl DataFlowGraph {
             src
         );
         let ty = self.value_type(original);
-        assert_eq!(
+        debug_assert_eq!(
             self.value_type(dest),
             ty,
             "Aliasing {} to {} would change its type {} to {}",
@@ -273,7 +273,7 @@ impl DataFlowGraph {
         {
             let original = src;
             let ty = self.value_type(original);
-            assert_eq!(
+            debug_assert_eq!(
                 self.value_type(dest),
                 ty,
                 "Aliasing {} to {} would change its type {} to {}",
@@ -498,9 +498,9 @@ impl DataFlowGraph {
     /// This is a very low-level operation. Usually, instruction results with the correct types are
     /// created automatically. The `res` value must not be attached to anything else.
     pub fn attach_result(&mut self, inst: Inst, res: Value) {
-        assert!(!self.value_is_attached(res));
+        debug_assert!(!self.value_is_attached(res));
         let num = self.results[inst].push(res, &mut self.value_lists);
-        assert!(num <= u16::MAX as usize, "Too many result values");
+        debug_assert!(num <= u16::MAX as usize, "Too many result values");
         let ty = self.value_type(res);
         self.values[res] = ValueData::Inst {
             ty,
@@ -533,7 +533,7 @@ impl DataFlowGraph {
                 .expect("Replacing detached result"),
             new_value,
         );
-        assert_eq!(
+        debug_assert_eq!(
             attached,
             old_value,
             "{} wasn't detached from {}",
@@ -547,7 +547,7 @@ impl DataFlowGraph {
     pub fn append_result(&mut self, inst: Inst, ty: Type) -> Value {
         let res = self.values.next_key();
         let num = self.results[inst].push(res, &mut self.value_lists);
-        assert!(num <= u16::MAX as usize, "Too many result values");
+        debug_assert!(num <= u16::MAX as usize, "Too many result values");
         self.make_value(ValueData::Inst {
             ty,
             inst,
@@ -684,7 +684,7 @@ impl DataFlowGraph {
     pub fn append_ebb_param(&mut self, ebb: Ebb, ty: Type) -> Value {
         let param = self.values.next_key();
         let num = self.ebbs[ebb].params.push(param, &mut self.value_lists);
-        assert!(num <= u16::MAX as usize, "Too many parameters on EBB");
+        debug_assert!(num <= u16::MAX as usize, "Too many parameters on EBB");
         self.make_value(ValueData::Param {
             ty,
             num: num as u16,
@@ -761,9 +761,9 @@ impl DataFlowGraph {
     ///
     /// In almost all cases, you should be using `append_ebb_param()` instead of this method.
     pub fn attach_ebb_param(&mut self, ebb: Ebb, param: Value) {
-        assert!(!self.value_is_attached(param));
+        debug_assert!(!self.value_is_attached(param));
         let num = self.ebbs[ebb].params.push(param, &mut self.value_lists);
-        assert!(num <= u16::MAX as usize, "Too many parameters on EBB");
+        debug_assert!(num <= u16::MAX as usize, "Too many parameters on EBB");
         let ty = self.value_type(param);
         self.values[param] = ValueData::Param {
             ty,
@@ -859,7 +859,7 @@ impl DataFlowGraph {
     /// to create invalid values for index padding which may be reassigned later.
     #[cold]
     fn set_value_type_for_parser(&mut self, v: Value, t: Type) {
-        debug_assert!(
+        assert!(
             self.value_type(v) == types::VOID,
             "this function is only for assigning types to previously invalid values"
         );
@@ -882,7 +882,7 @@ impl DataFlowGraph {
     ) -> usize {
         // Get the call signature if this is a function call.
         if let Some(sig) = self.call_signature(inst) {
-            debug_assert_eq!(self.insts[inst].opcode().constraints().fixed_results(), 0);
+            assert_eq!(self.insts[inst].opcode().constraints().fixed_results(), 0);
             for res_idx in 0..self.signatures[sig].returns.len() {
                 let ty = self.signatures[sig].returns[res_idx].value_type;
                 if let Some(v) = reuse.get(res_idx) {
diff --git a/lib/cretonne/src/ir/immediates.rs b/lib/cretonne/src/ir/immediates.rs
index 0dca3144cb..2e6339cc01 100644
--- a/lib/cretonne/src/ir/immediates.rs
+++ b/lib/cretonne/src/ir/immediates.rs
@@ -490,7 +490,7 @@ fn parse_float(s: &str, w: u8, t: u8) -> Result<u64, &'static str> {
         significand <<= adjust;
         exponent -= i32::from(adjust);
     }
-    assert_eq!(significand >> t, 1);
+    debug_assert_eq!(significand >> t, 1);
 
     // Trailing significand excludes the high bit.
     let t_bits = significand & ((1 << t) - 1);
@@ -538,6 +538,17 @@ impl Ieee32 {
         Ieee32(exponent << t)
     }
 
+    /// Create an `Ieee32` number representing the greatest negative value
+    /// not convertable from f32 to a signed integer with width n.
+    pub fn fcvt_to_sint_negative_overflow<I: Into<i32>>(n: I) -> Ieee32 {
+        let n = n.into();
+        debug_assert!(n < 32);
+        debug_assert!(23 + 1 - n < 32);
+        Self::with_bits(
+            (1u32 << (32 - 1)) | Self::pow2(n - 1).0 | (1u32 << (23 + 1 - n)),
+        )
+    }
+
     /// Return self negated.
     pub fn neg(self) -> Ieee32 {
         Ieee32(self.0 ^ (1 << 31))
@@ -590,6 +601,17 @@ impl Ieee64 {
         Ieee64(exponent << t)
     }
 
+    /// Create an `Ieee64` number representing the greatest negative value
+    /// not convertable from f64 to a signed integer with width n.
+    pub fn fcvt_to_sint_negative_overflow<I: Into<i64>>(n: I) -> Ieee64 {
+        let n = n.into();
+        debug_assert!(n < 64);
+        debug_assert!(52 + 1 - n < 64);
+        Self::with_bits(
+            (1u64 << (64 - 1)) | Self::pow2(n - 1).0 | (1u64 << (52 + 1 - n)),
+        )
+    }
+
     /// Return self negated.
     pub fn neg(self) -> Ieee64 {
         Ieee64(self.0 ^ (1 << 63))
@@ -858,6 +880,15 @@ mod tests {
         assert_eq!(Ieee32::pow2(1).neg().to_string(), "-0x1.000000p1");
     }
 
+    #[test]
+    fn fcvt_to_sint_negative_overflow_ieee32() {
+        for n in &[8, 16] {
+            assert_eq!(-((1u32 << (n - 1)) as f32) - 1.0, unsafe {
+                mem::transmute(Ieee32::fcvt_to_sint_negative_overflow(*n))
+            });
+        }
+    }
+
     #[test]
     fn format_ieee64() {
         assert_eq!(Ieee64::with_float(0.0).to_string(), "0.0");
@@ -986,4 +1017,13 @@ mod tests {
 
         assert_eq!(Ieee64::pow2(1).neg().to_string(), "-0x1.0000000000000p1");
     }
+
+    #[test]
+    fn fcvt_to_sint_negative_overflow_ieee64() {
+        for n in &[8, 16, 32] {
+            assert_eq!(-((1u64 << (n - 1)) as f64) - 1.0, unsafe {
+                mem::transmute(Ieee64::fcvt_to_sint_negative_overflow(*n))
+            });
+        }
+    }
 }
diff --git a/lib/cretonne/src/ir/instructions.rs b/lib/cretonne/src/ir/instructions.rs
index 22800ac634..b4b82c4ea6 100644
--- a/lib/cretonne/src/ir/instructions.rs
+++ b/lib/cretonne/src/ir/instructions.rs
@@ -561,7 +561,7 @@ impl OpcodeConstraints {
     /// Get the value type of result number `n`, having resolved the controlling type variable to
     /// `ctrl_type`.
     pub fn result_type(self, n: usize, ctrl_type: Type) -> Type {
-        assert!(n < self.fixed_results(), "Invalid result index");
+        debug_assert!(n < self.fixed_results(), "Invalid result index");
         if let ResolvedConstraint::Bound(t) =
             OPERAND_CONSTRAINTS[self.constraint_offset() + n].resolve(ctrl_type)
         {
@@ -577,7 +577,7 @@ impl OpcodeConstraints {
     /// Unlike results, it is possible for some input values to vary freely within a specific
     /// `ValueTypeSet`. This is represented with the `ArgumentConstraint::Free` variant.
     pub fn value_argument_constraint(self, n: usize, ctrl_type: Type) -> ResolvedConstraint {
-        assert!(
+        debug_assert!(
             n < self.fixed_value_arguments(),
             "Invalid value argument index"
         );
diff --git a/lib/cretonne/src/ir/layout.rs b/lib/cretonne/src/ir/layout.rs
index 1fc1c048fb..0e6e010e7c 100644
--- a/lib/cretonne/src/ir/layout.rs
+++ b/lib/cretonne/src/ir/layout.rs
@@ -88,7 +88,7 @@ const LOCAL_LIMIT: SequenceNumber = 100 * MINOR_STRIDE;
 // Compute the midpoint between `a` and `b`.
 // Return `None` if the midpoint would be equal to either.
 fn midpoint(a: SequenceNumber, b: SequenceNumber) -> Option<SequenceNumber> {
-    assert!(a < b);
+    debug_assert!(a < b);
     // Avoid integer overflow.
     let m = a + (b - a) / 2;
     if m > a { Some(m) } else { None }
@@ -148,7 +148,7 @@ impl Layout {
     /// Assign a valid sequence number to `ebb` such that the numbers are still monotonic. This may
     /// require renumbering.
     fn assign_ebb_seq(&mut self, ebb: Ebb) {
-        assert!(self.is_ebb_inserted(ebb));
+        debug_assert!(self.is_ebb_inserted(ebb));
 
         // Get the sequence number immediately before `ebb`, or 0.
         let prev_seq = self.ebbs[ebb]
@@ -334,13 +334,13 @@ impl Layout {
 
     /// Insert `ebb` as the last EBB in the layout.
     pub fn append_ebb(&mut self, ebb: Ebb) {
-        assert!(
+        debug_assert!(
             !self.is_ebb_inserted(ebb),
             "Cannot append EBB that is already in the layout"
         );
         {
             let node = &mut self.ebbs[ebb];
-            assert!(node.first_inst.is_none() && node.last_inst.is_none());
+            debug_assert!(node.first_inst.is_none() && node.last_inst.is_none());
             node.prev = self.last_ebb.into();
             node.next = None.into();
         }
@@ -355,11 +355,11 @@ impl Layout {
 
     /// Insert `ebb` in the layout before the existing EBB `before`.
     pub fn insert_ebb(&mut self, ebb: Ebb, before: Ebb) {
-        assert!(
+        debug_assert!(
             !self.is_ebb_inserted(ebb),
             "Cannot insert EBB that is already in the layout"
         );
-        assert!(
+        debug_assert!(
             self.is_ebb_inserted(before),
             "EBB Insertion point not in the layout"
         );
@@ -379,11 +379,11 @@ impl Layout {
 
     /// Insert `ebb` in the layout *after* the existing EBB `after`.
     pub fn insert_ebb_after(&mut self, ebb: Ebb, after: Ebb) {
-        assert!(
+        debug_assert!(
             !self.is_ebb_inserted(ebb),
             "Cannot insert EBB that is already in the layout"
         );
-        assert!(
+        debug_assert!(
             self.is_ebb_inserted(after),
             "EBB Insertion point not in the layout"
         );
@@ -403,8 +403,8 @@ impl Layout {
 
     /// Remove `ebb` from the layout.
     pub fn remove_ebb(&mut self, ebb: Ebb) {
-        assert!(self.is_ebb_inserted(ebb), "EBB not in the layout");
-        assert!(self.first_inst(ebb).is_none(), "EBB must be empty.");
+        debug_assert!(self.is_ebb_inserted(ebb), "EBB not in the layout");
+        debug_assert!(self.first_inst(ebb).is_none(), "EBB must be empty.");
 
         // Clear the `ebb` node and extract links.
         let prev;
@@ -521,8 +521,8 @@ impl Layout {
 
     /// Append `inst` to the end of `ebb`.
     pub fn append_inst(&mut self, inst: Inst, ebb: Ebb) {
-        assert_eq!(self.inst_ebb(inst), None);
-        assert!(
+        debug_assert_eq!(self.inst_ebb(inst), None);
+        debug_assert!(
             self.is_ebb_inserted(ebb),
             "Cannot append instructions to EBB not in layout"
         );
@@ -532,7 +532,7 @@ impl Layout {
                 let inst_node = &mut self.insts[inst];
                 inst_node.ebb = ebb.into();
                 inst_node.prev = ebb_node.last_inst;
-                assert!(inst_node.next.is_none());
+                debug_assert!(inst_node.next.is_none());
             }
             if ebb_node.first_inst.is_none() {
                 ebb_node.first_inst = inst.into();
@@ -566,7 +566,7 @@ impl Layout {
 
     /// Insert `inst` before the instruction `before` in the same EBB.
     pub fn insert_inst(&mut self, inst: Inst, before: Inst) {
-        assert_eq!(self.inst_ebb(inst), None);
+        debug_assert_eq!(self.inst_ebb(inst), None);
         let ebb = self.inst_ebb(before).expect(
             "Instruction before insertion point not in the layout",
         );
@@ -645,7 +645,7 @@ impl Layout {
         let old_ebb = self.inst_ebb(before).expect(
             "The `before` instruction must be in the layout",
         );
-        assert!(!self.is_ebb_inserted(new_ebb));
+        debug_assert!(!self.is_ebb_inserted(new_ebb));
 
         // Insert new_ebb after old_ebb.
         let next_ebb = self.ebbs[old_ebb].next;
diff --git a/lib/cretonne/src/ir/progpoint.rs b/lib/cretonne/src/ir/progpoint.rs
index 72a00602ee..ce5b108e54 100644
--- a/lib/cretonne/src/ir/progpoint.rs
+++ b/lib/cretonne/src/ir/progpoint.rs
@@ -19,7 +19,7 @@ pub struct ProgramPoint(u32);
 impl From<Inst> for ProgramPoint {
     fn from(inst: Inst) -> ProgramPoint {
         let idx = inst.index();
-        assert!(idx < (u32::MAX / 2) as usize);
+        debug_assert!(idx < (u32::MAX / 2) as usize);
         ProgramPoint((idx * 2) as u32)
     }
 }
@@ -27,7 +27,7 @@ impl From<Inst> for ProgramPoint {
 impl From<Ebb> for ProgramPoint {
     fn from(ebb: Ebb) -> ProgramPoint {
         let idx = ebb.index();
-        assert!(idx < (u32::MAX / 2) as usize);
+        debug_assert!(idx < (u32::MAX / 2) as usize);
         ProgramPoint((idx * 2 + 1) as u32)
     }
 }
diff --git a/lib/cretonne/src/ir/stackslot.rs b/lib/cretonne/src/ir/stackslot.rs
index 7baadde636..5f73fb4b37 100644
--- a/lib/cretonne/src/ir/stackslot.rs
+++ b/lib/cretonne/src/ir/stackslot.rs
@@ -41,9 +41,9 @@ pub enum StackSlotKind {
     /// A spill slot. This is a stack slot created by the register allocator.
     SpillSlot,
 
-    /// A local variable. This is a chunk of local stack memory for use by the `stack_load` and
-    /// `stack_store` instructions.
-    Local,
+    /// An explicit stack slot. This is a chunk of stack memory for use by the `stack_load`
+    /// and `stack_store` instructions.
+    ExplicitSlot,
 
     /// An incoming function argument.
     ///
@@ -72,7 +72,7 @@ impl FromStr for StackSlotKind {
     fn from_str(s: &str) -> Result<StackSlotKind, ()> {
         use self::StackSlotKind::*;
         match s {
-            "local" => Ok(Local),
+            "explicit_slot" => Ok(ExplicitSlot),
             "spill_slot" => Ok(SpillSlot),
             "incoming_arg" => Ok(IncomingArg),
             "outgoing_arg" => Ok(OutgoingArg),
@@ -86,7 +86,7 @@ impl fmt::Display for StackSlotKind {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         use self::StackSlotKind::*;
         f.write_str(match *self {
-            Local => "local",
+            ExplicitSlot => "explicit_slot",
             SpillSlot => "spill_slot",
             IncomingArg => "incoming_arg",
             OutgoingArg => "outgoing_arg",
@@ -112,7 +112,7 @@ pub struct StackSlotData {
     ///
     /// For `OutgoingArg` stack slots, the offset is relative to the current function's stack
     /// pointer immediately before the call.
-    pub offset: StackOffset,
+    pub offset: Option<StackOffset>,
 }
 
 impl StackSlotData {
@@ -121,7 +121,7 @@ impl StackSlotData {
         StackSlotData {
             kind,
             size,
-            offset: 0,
+            offset: None,
         }
     }
 
@@ -139,8 +139,8 @@ impl StackSlotData {
 impl fmt::Display for StackSlotData {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "{} {}", self.kind, self.size)?;
-        if self.offset != 0 {
-            write!(f, ", offset {}", self.offset)?;
+        if let Some(offset) = self.offset {
+            write!(f, ", offset {}", offset)?;
         }
         Ok(())
     }
@@ -205,7 +205,7 @@ impl StackSlots {
 
     /// Set the offset of a stack slot.
     pub fn set_offset(&mut self, ss: StackSlot, offset: StackOffset) {
-        self.slots[ss].offset = offset;
+        self.slots[ss].offset = Some(offset);
     }
 
     /// Get an iterator over all the stack slot keys.
@@ -245,8 +245,8 @@ impl StackSlots {
     /// Create a stack slot representing an incoming function argument.
     pub fn make_incoming_arg(&mut self, ty: Type, offset: StackOffset) -> StackSlot {
         let mut data = StackSlotData::new(StackSlotKind::IncomingArg, ty.bytes());
-        assert!(offset <= StackOffset::max_value() - data.size as StackOffset);
-        data.offset = offset;
+        debug_assert!(offset <= StackOffset::max_value() - data.size as StackOffset);
+        data.offset = Some(offset);
         self.push(data)
     }
 
@@ -262,7 +262,7 @@ impl StackSlots {
 
         // Look for an existing outgoing stack slot with the same offset and size.
         let inspos = match self.outgoing.binary_search_by_key(&(offset, size), |&ss| {
-            (self[ss].offset, self[ss].size)
+            (self[ss].offset.unwrap(), self[ss].size)
         }) {
             Ok(idx) => return self.outgoing[idx],
             Err(idx) => idx,
@@ -270,8 +270,8 @@ impl StackSlots {
 
         // No existing slot found. Make one and insert it into `outgoing`.
         let mut data = StackSlotData::new(StackSlotKind::OutgoingArg, size);
-        assert!(offset <= StackOffset::max_value() - size as StackOffset);
-        data.offset = offset;
+        debug_assert!(offset <= StackOffset::max_value() - size as StackOffset);
+        data.offset = Some(offset);
         let ss = self.slots.push(data);
         self.outgoing.insert(inspos, ss);
         ss
@@ -346,13 +346,13 @@ mod tests {
         let ss1 = sss.get_outgoing_arg(types::I32, 4);
         let ss2 = sss.get_outgoing_arg(types::I64, 8);
 
-        assert_eq!(sss[ss0].offset, 8);
+        assert_eq!(sss[ss0].offset, Some(8));
         assert_eq!(sss[ss0].size, 4);
 
-        assert_eq!(sss[ss1].offset, 4);
+        assert_eq!(sss[ss1].offset, Some(4));
         assert_eq!(sss[ss1].size, 4);
 
-        assert_eq!(sss[ss2].offset, 8);
+        assert_eq!(sss[ss2].offset, Some(8));
         assert_eq!(sss[ss2].size, 8);
 
         assert_eq!(sss.get_outgoing_arg(types::I32, 8), ss0);
@@ -368,7 +368,7 @@ mod tests {
         assert_eq!(slot.alignment(8), 8);
         assert_eq!(slot.alignment(16), 8);
 
-        let slot2 = StackSlotData::new(StackSlotKind::Local, 24);
+        let slot2 = StackSlotData::new(StackSlotKind::ExplicitSlot, 24);
 
         assert_eq!(slot2.alignment(4), 4);
         assert_eq!(slot2.alignment(8), 8);
diff --git a/lib/cretonne/src/isa/arm32/settings.rs b/lib/cretonne/src/isa/arm32/settings.rs
index e857716a64..b502deee40 100644
--- a/lib/cretonne/src/isa/arm32/settings.rs
+++ b/lib/cretonne/src/isa/arm32/settings.rs
@@ -5,5 +5,5 @@ use std::fmt;
 
 // Include code generated by `lib/cretonne/meta/gen_settings.py`. This file contains a public
 // `Flags` struct with an impl for all of the settings defined in
-// `lib/cretonne/meta/cretonne/settings.py`.
+// `lib/cretonne/meta/isa/arm32/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings-arm32.rs"));
diff --git a/lib/cretonne/src/isa/arm64/settings.rs b/lib/cretonne/src/isa/arm64/settings.rs
index 6427d7be99..b575168361 100644
--- a/lib/cretonne/src/isa/arm64/settings.rs
+++ b/lib/cretonne/src/isa/arm64/settings.rs
@@ -5,5 +5,5 @@ use std::fmt;
 
 // Include code generated by `lib/cretonne/meta/gen_settings.py`. This file contains a public
 // `Flags` struct with an impl for all of the settings defined in
-// `lib/cretonne/meta/cretonne/settings.py`.
+// `lib/cretonne/meta/isa/arm64/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));
diff --git a/lib/cretonne/src/isa/constraints.rs b/lib/cretonne/src/isa/constraints.rs
index eb6b33c3cb..f831c0e187 100644
--- a/lib/cretonne/src/isa/constraints.rs
+++ b/lib/cretonne/src/isa/constraints.rs
@@ -13,6 +13,7 @@ use ir::{Function, ValueLoc, Inst};
 use regalloc::RegDiversions;
 
 /// Register constraint for a single value operand or instruction result.
+#[derive(PartialEq, Debug)]
 pub struct OperandConstraint {
     /// The kind of constraint.
     pub kind: ConstraintKind,
@@ -53,7 +54,7 @@ impl OperandConstraint {
 }
 
 /// The different kinds of operand constraints.
-#[derive(Clone, Copy, PartialEq, Eq)]
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
 pub enum ConstraintKind {
     /// This operand or result must be a register from the given register class.
     Reg,
@@ -89,7 +90,7 @@ pub enum ConstraintKind {
 }
 
 /// Value operand constraints for an encoding recipe.
-#[derive(Clone)]
+#[derive(PartialEq, Clone)]
 pub struct RecipeConstraints {
     /// Constraints for the instruction's fixed value operands.
     ///
@@ -160,7 +161,7 @@ impl RecipeConstraints {
 /// - Intel uses the address of the instruction following the branch, `origin = 2` for a 2-byte
 ///   branch instruction.
 /// - ARM's A32 encoding uses the address of the branch instruction + 8 bytes, `origin = 8`.
-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Debug)]
 pub struct BranchRange {
     /// Offset in bytes from the address of the branch instruction to the origin used for computing
     /// the branch displacement. This is the destination of a branch that encodes a 0 displacement.
diff --git a/lib/cretonne/src/isa/enc_tables.rs b/lib/cretonne/src/isa/enc_tables.rs
index 5154138c1b..7453b7c044 100644
--- a/lib/cretonne/src/isa/enc_tables.rs
+++ b/lib/cretonne/src/isa/enc_tables.rs
@@ -225,7 +225,7 @@ impl<'a> Encodings<'a> {
         self.legalize_actions[self.legalize as usize]
     }
 
-    /// Check if the `rpred` recipe predicate s satisfied.
+    /// Check if the `rpred` recipe predicate is satisfied.
     fn check_recipe(&self, rpred: RecipePredicate) -> bool {
         match rpred {
             Some(p) => p(self.isa_preds, self.inst),
diff --git a/lib/cretonne/src/isa/intel/abi.rs b/lib/cretonne/src/isa/intel/abi.rs
index 21442250c5..0a14566b50 100644
--- a/lib/cretonne/src/isa/intel/abi.rs
+++ b/lib/cretonne/src/isa/intel/abi.rs
@@ -107,7 +107,7 @@ impl ArgAssigner for Args {
         // Assign a stack location.
         let loc = ArgumentLoc::Stack(self.offset as i32);
         self.offset += self.pointer_bytes;
-        assert!(self.offset <= i32::MAX as u32);
+        debug_assert!(self.offset <= i32::MAX as u32);
         loc.into()
     }
 }
@@ -180,15 +180,13 @@ pub fn spiderwasm_prologue_epilogue(
     func: &mut ir::Function,
     isa: &TargetIsa,
 ) -> result::CtonResult {
-    let (word_size, stack_align) = if isa.flags().is_64bit() {
-        (8, 16)
-    } else {
-        (4, 4)
-    };
+    // Spiderwasm on 32-bit x86 always aligns its stack pointer to 16 bytes.
+    let stack_align = 16;
+    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
     let bytes = StackSize::from(isa.flags().spiderwasm_prologue_words()) * word_size;
 
     let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
-    ss.offset = -(bytes as StackOffset);
+    ss.offset = Some(-(bytes as StackOffset));
     func.stack_slots.push(ss);
 
     layout_stack(&mut func.stack_slots, stack_align)?;
@@ -197,11 +195,10 @@ pub fn spiderwasm_prologue_epilogue(
 
 /// Insert a System V-compatible prologue and epilogue.
 pub fn native_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
-    let (word_size, stack_align) = if isa.flags().is_64bit() {
-        (8, 16)
-    } else {
-        (4, 4)
-    };
+    // The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but
+    // newer versions use a 16-byte aligned stack pointer.
+    let stack_align = 16;
+    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
     let csr_type = if isa.flags().is_64bit() {
         ir::types::I64
     } else {
@@ -220,11 +217,11 @@ pub fn native_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> res
     func.create_stack_slot(ir::StackSlotData {
         kind: ir::StackSlotKind::IncomingArg,
         size: csr_stack_size as u32,
-        offset: -csr_stack_size,
+        offset: Some(-csr_stack_size),
     });
 
     let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
-    let local_stack_size = (total_stack_size - csr_stack_size) as i64;
+    let local_stack_size = i64::from(total_stack_size - csr_stack_size);
 
     // Add CSRs to function signature
     let fp_arg = ir::AbiParam::special_reg(
diff --git a/lib/cretonne/src/isa/intel/enc_tables.rs b/lib/cretonne/src/isa/intel/enc_tables.rs
index 1a602cf8a1..d3f62adf7c 100644
--- a/lib/cretonne/src/isa/intel/enc_tables.rs
+++ b/lib/cretonne/src/isa/intel/enc_tables.rs
@@ -1,5 +1,6 @@
 //! Encoding tables for Intel ISAs.
 
+use bitset::BitSet;
 use cursor::{Cursor, FuncCursor};
 use flowgraph::ControlFlowGraph;
 use ir::{self, InstBuilder};
@@ -375,13 +376,22 @@ fn expand_fcvt_to_sint(
     let mut overflow_cc = FloatCC::LessThan;
     let output_bits = ty.lane_bits();
     let flimit = match xty {
-        ir::types::F32 => pos.ins().f32const(Ieee32::pow2(output_bits - 1).neg()),
+        // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
+        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+        ir::types::F32 => {
+            pos.ins().f32const(if output_bits < 32 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee32::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee32::pow2(output_bits - 1).neg()
+            })
+        }
         ir::types::F64 => {
             // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
             // there are values less than -2^(N-1) that convert correctly to INT_MIN.
             pos.ins().f64const(if output_bits < 64 {
                 overflow_cc = FloatCC::LessThanOrEqual;
-                Ieee64::with_float(-((1u64 << (output_bits - 1)) as f64) - 1.0)
+                Ieee64::fcvt_to_sint_negative_overflow(output_bits)
             } else {
                 Ieee64::pow2(output_bits - 1).neg()
             })
@@ -393,8 +403,8 @@ fn expand_fcvt_to_sint(
 
     // Finally, we could have a positive value that is too large.
     let fzero = match xty {
-        ir::types::F32 => pos.ins().f32const(Ieee32::with_float(0.0)),
-        ir::types::F64 => pos.ins().f64const(Ieee64::with_float(0.0)),
+        ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
+        ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
         _ => panic!("Can't convert {}", xty),
     };
     let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
diff --git a/lib/cretonne/src/isa/intel/settings.rs b/lib/cretonne/src/isa/intel/settings.rs
index f933e91b87..147af4c2fa 100644
--- a/lib/cretonne/src/isa/intel/settings.rs
+++ b/lib/cretonne/src/isa/intel/settings.rs
@@ -5,7 +5,7 @@ use std::fmt;
 
 // Include code generated by `lib/cretonne/meta/gen_settings.py`. This file contains a public
 // `Flags` struct with an impl for all of the settings defined in
-// `lib/cretonne/meta/cretonne/settings.py`.
+// `lib/cretonne/meta/isa/intel/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings-intel.rs"));
 
 #[cfg(test)]
diff --git a/lib/cretonne/src/isa/mod.rs b/lib/cretonne/src/isa/mod.rs
index 7e10f12620..aefe93a05d 100644
--- a/lib/cretonne/src/isa/mod.rs
+++ b/lib/cretonne/src/isa/mod.rs
@@ -252,7 +252,7 @@ pub trait TargetIsa: fmt::Display {
         if func.signature.call_conv == ir::CallConv::SpiderWASM {
             let bytes = StackSize::from(self.flags().spiderwasm_prologue_words()) * word_size;
             let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
-            ss.offset = -(bytes as StackOffset);
+            ss.offset = Some(-(bytes as StackOffset));
             func.stack_slots.push(ss);
         }
 
diff --git a/lib/cretonne/src/isa/riscv/abi.rs b/lib/cretonne/src/isa/riscv/abi.rs
index 52e715be43..1db377b0ef 100644
--- a/lib/cretonne/src/isa/riscv/abi.rs
+++ b/lib/cretonne/src/isa/riscv/abi.rs
@@ -80,7 +80,7 @@ impl ArgAssigner for Args {
             // Assign a stack location.
             let loc = ArgumentLoc::Stack(self.offset as i32);
             self.offset += self.pointer_bytes;
-            assert!(self.offset <= i32::MAX as u32);
+            debug_assert!(self.offset <= i32::MAX as u32);
             loc.into()
         }
     }
diff --git a/lib/cretonne/src/isa/riscv/binemit.rs b/lib/cretonne/src/isa/riscv/binemit.rs
index c20b1ca004..8cc1ef17d1 100644
--- a/lib/cretonne/src/isa/riscv/binemit.rs
+++ b/lib/cretonne/src/isa/riscv/binemit.rs
@@ -106,7 +106,7 @@ fn put_i<CS: CodeSink + ?Sized>(bits: u16, rs1: RegUnit, imm: i64, rd: RegUnit,
 ///
 /// Encoding bits: `opcode[6:2] | (funct3 << 5)`
 fn put_u<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) {
-    let bits = bits as u32;
+    let bits = u32::from(bits);
     let opcode5 = bits & 0x1f;
     let rd = u32::from(rd) & 0x1f;
 
@@ -133,7 +133,7 @@ fn put_sb<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rs1: RegUnit, rs2: RegUnit
     let rs1 = u32::from(rs1) & 0x1f;
     let rs2 = u32::from(rs2) & 0x1f;
 
-    assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm);
+    debug_assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm);
     let imm = imm as u32;
 
     // 0-6: opcode
@@ -164,7 +164,7 @@ fn put_uj<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS
     let opcode5 = bits & 0x1f;
     let rd = u32::from(rd) & 0x1f;
 
-    assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm);
+    debug_assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm);
     let imm = imm as u32;
 
     // 0-6: opcode
diff --git a/lib/cretonne/src/isa/riscv/registers.py b/lib/cretonne/src/isa/riscv/registers.py
deleted file mode 100644
index 8b13789179..0000000000
--- a/lib/cretonne/src/isa/riscv/registers.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/lib/cretonne/src/isa/riscv/settings.rs b/lib/cretonne/src/isa/riscv/settings.rs
index 7b609acf20..2f0f6822a9 100644
--- a/lib/cretonne/src/isa/riscv/settings.rs
+++ b/lib/cretonne/src/isa/riscv/settings.rs
@@ -5,7 +5,7 @@ use std::fmt;
 
 // Include code generated by `lib/cretonne/meta/gen_settings.py`. This file contains a public
 // `Flags` struct with an impl for all of the settings defined in
-// `lib/cretonne/meta/cretonne/settings.py`.
+// `lib/cretonne/meta/isa/riscv/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings-riscv.rs"));
 
 #[cfg(test)]
diff --git a/lib/cretonne/src/isa/stack.rs b/lib/cretonne/src/isa/stack.rs
index 730db28e18..e8a9b6981b 100644
--- a/lib/cretonne/src/isa/stack.rs
+++ b/lib/cretonne/src/isa/stack.rs
@@ -41,12 +41,12 @@ impl StackRef {
         let slot = &frame[ss];
         let offset = if slot.kind == StackSlotKind::OutgoingArg {
             // Outgoing argument slots have offsets relative to our stack pointer.
-            slot.offset
+            slot.offset.unwrap()
         } else {
             // All other slots have offsets relative to our caller's stack frame.
             // Offset where SP is pointing. (All ISAs have stacks growing downwards.)
             let sp_offset = -(size as StackOffset);
-            slot.offset - sp_offset
+            slot.offset.unwrap() - sp_offset
         };
         StackRef {
             base: StackBase::SP,
diff --git a/lib/cretonne/src/legalizer/boundary.rs b/lib/cretonne/src/legalizer/boundary.rs
index 5d9dd71dc0..0c5479650c 100644
--- a/lib/cretonne/src/legalizer/boundary.rs
+++ b/lib/cretonne/src/legalizer/boundary.rs
@@ -86,15 +86,15 @@ fn legalize_entry_params(func: &mut Function, entry: Ebb) {
                 ArgumentPurpose::FramePointer => {}
                 ArgumentPurpose::CalleeSaved => {}
                 ArgumentPurpose::StructReturn => {
-                    assert!(!has_sret, "Multiple sret arguments found");
+                    debug_assert!(!has_sret, "Multiple sret arguments found");
                     has_sret = true;
                 }
                 ArgumentPurpose::VMContext => {
-                    assert!(!has_vmctx, "Multiple vmctx arguments found");
+                    debug_assert!(!has_vmctx, "Multiple vmctx arguments found");
                     has_vmctx = true;
                 }
                 ArgumentPurpose::SignatureId => {
-                    assert!(!has_sigid, "Multiple sigid arguments found");
+                    debug_assert!(!has_sigid, "Multiple sigid arguments found");
                     has_sigid = true;
                 }
                 _ => panic!("Unexpected special-purpose arg {}", abi_type),
@@ -104,7 +104,7 @@ fn legalize_entry_params(func: &mut Function, entry: Ebb) {
             // Compute the value we want for `arg` from the legalized ABI parameters.
             let mut get_arg = |func: &mut Function, ty| {
                 let abi_type = func.signature.params[abi_arg];
-                assert_eq!(
+                debug_assert_eq!(
                     abi_type.purpose,
                     ArgumentPurpose::Normal,
                     "Can't legalize special-purpose argument"
@@ -119,7 +119,7 @@ fn legalize_entry_params(func: &mut Function, entry: Ebb) {
             let converted = convert_from_abi(&mut pos, arg_type, Some(arg), &mut get_arg);
             // The old `arg` is no longer an attached EBB argument, but there are probably still
             // uses of the value.
-            assert_eq!(pos.func.dfg.resolve_aliases(arg), converted);
+            debug_assert_eq!(pos.func.dfg.resolve_aliases(arg), converted);
         }
     }
 
@@ -139,19 +139,19 @@ fn legalize_entry_params(func: &mut Function, entry: Ebb) {
             }
             // These can be meaningfully added by `legalize_signature()`.
             ArgumentPurpose::Link => {
-                assert!(!has_link, "Multiple link parameters found");
+                debug_assert!(!has_link, "Multiple link parameters found");
                 has_link = true;
             }
             ArgumentPurpose::StructReturn => {
-                assert!(!has_sret, "Multiple sret parameters found");
+                debug_assert!(!has_sret, "Multiple sret parameters found");
                 has_sret = true;
             }
             ArgumentPurpose::VMContext => {
-                assert!(!has_vmctx, "Multiple vmctx parameters found");
+                debug_assert!(!has_vmctx, "Multiple vmctx parameters found");
                 has_vmctx = true;
             }
             ArgumentPurpose::SignatureId => {
-                assert!(!has_sigid, "Multiple sigid parameters found");
+                debug_assert!(!has_sigid, "Multiple sigid parameters found");
                 has_sigid = true;
             }
         }
@@ -181,7 +181,7 @@ where
     // We theoretically allow for call instructions that return a number of fixed results before
     // the call return values. In practice, it doesn't happen.
     let fixed_results = pos.func.dfg[call].opcode().constraints().fixed_results();
-    assert_eq!(fixed_results, 0, "Fixed results  on calls not supported");
+    debug_assert_eq!(fixed_results, 0, "Fixed results on calls not supported");
 
     let results = pos.func.dfg.detach_results(call);
     let mut next_res = 0;
@@ -210,7 +210,7 @@ where
                 }
             };
             let v = convert_from_abi(pos, res_type, Some(res), &mut get_res);
-            assert_eq!(pos.func.dfg.resolve_aliases(res), v);
+            debug_assert_eq!(pos.func.dfg.resolve_aliases(res), v);
         }
     }
 
@@ -239,7 +239,7 @@ where
     let arg_type = match get_arg(pos.func, ty) {
         Ok(v) => {
             debug_assert_eq!(pos.func.dfg.value_type(v), ty);
-            assert_eq!(into_result, None);
+            debug_assert_eq!(into_result, None);
             return v;
         }
         Err(t) => t,
@@ -275,7 +275,7 @@ where
         }
         // Construct a `ty` by bit-casting from an integer type.
         ValueConversion::IntBits => {
-            assert!(!ty.is_int());
+            debug_assert!(!ty.is_int());
             let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
             let arg = convert_from_abi(pos, abi_ty, None, get_arg);
             pos.ins().with_results([into_result]).bitcast(ty, arg)
@@ -341,7 +341,7 @@ fn convert_to_abi<PutArg>(
             convert_to_abi(pos, cfg, hi, put_arg);
         }
         ValueConversion::IntBits => {
-            assert!(!ty.is_int());
+            debug_assert!(!ty.is_int());
             let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
             let arg = pos.ins().bitcast(abi_ty, value);
             convert_to_abi(pos, cfg, arg, put_arg);
@@ -556,7 +556,7 @@ pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph
     legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
         func.signature.returns[abi_arg]
     });
-    assert_eq!(pos.func.dfg.inst_variable_args(inst).len(), abi_args);
+    debug_assert_eq!(pos.func.dfg.inst_variable_args(inst).len(), abi_args);
 
     // Append special return arguments for any `sret`, `link`, and `vmctx` return values added to
     // the legalized signature. These values should simply be propagated from the entry block
diff --git a/lib/cretonne/src/legalizer/globalvar.rs b/lib/cretonne/src/legalizer/globalvar.rs
index b945923f2f..5f6427822b 100644
--- a/lib/cretonne/src/legalizer/globalvar.rs
+++ b/lib/cretonne/src/legalizer/globalvar.rs
@@ -18,7 +18,7 @@ pub fn expand_global_addr(
     // Unpack the instruction.
     let gv = match func.dfg[inst] {
         ir::InstructionData::UnaryGlobalVar { opcode, global_var } => {
-            assert_eq!(opcode, ir::Opcode::GlobalAddr);
+            debug_assert_eq!(opcode, ir::Opcode::GlobalAddr);
             global_var
         }
         _ => panic!("Wanted global_addr: {}", func.dfg.display_inst(inst, None)),
diff --git a/lib/cretonne/src/legalizer/heap.rs b/lib/cretonne/src/legalizer/heap.rs
index af40181d1a..37ce226351 100644
--- a/lib/cretonne/src/legalizer/heap.rs
+++ b/lib/cretonne/src/legalizer/heap.rs
@@ -24,7 +24,7 @@ pub fn expand_heap_addr(
             arg,
             imm,
         } => {
-            assert_eq!(opcode, ir::Opcode::HeapAddr);
+            debug_assert_eq!(opcode, ir::Opcode::HeapAddr);
             (heap, arg, imm.into())
         }
         _ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst, None)),
diff --git a/lib/cretonne/src/legalizer/mod.rs b/lib/cretonne/src/legalizer/mod.rs
index 81dde2f457..51198d05a0 100644
--- a/lib/cretonne/src/legalizer/mod.rs
+++ b/lib/cretonne/src/legalizer/mod.rs
@@ -107,7 +107,7 @@ pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, is
 }
 
 // Include legalization patterns that were generated by `gen_legalizer.py` from the `XForms` in
-// `meta/cretonne/legalize.py`.
+// `lib/cretonne/meta/base/legalize.py`.
 //
 // Concretely, this defines private functions `narrow()`, and `expand()`.
 include!(concat!(env!("OUT_DIR"), "/legalizer.rs"));
@@ -248,7 +248,7 @@ fn expand_fconst(
     _isa: &TargetIsa,
 ) {
     let ty = func.dfg.value_type(func.dfg.first_result(inst));
-    assert!(!ty.is_vector(), "Only scalar fconst supported: {}", ty);
+    debug_assert!(!ty.is_vector(), "Only scalar fconst supported: {}", ty);
 
     // In the future, we may want to generate constant pool entries for these constants, but for
     // now use an `iconst` and a bit cast.
diff --git a/lib/cretonne/src/legalizer/split.rs b/lib/cretonne/src/legalizer/split.rs
index 76ad05a2bc..cdc60974e0 100644
--- a/lib/cretonne/src/legalizer/split.rs
+++ b/lib/cretonne/src/legalizer/split.rs
@@ -128,7 +128,7 @@ fn split_any(
     while let Some(repair) = repairs.pop() {
         for (_, inst) in cfg.pred_iter(repair.ebb) {
             let branch_opc = pos.func.dfg[inst].opcode();
-            assert!(
+            debug_assert!(
                 branch_opc.is_branch(),
                 "Predecessor not a branch: {}",
                 pos.func.dfg.display_inst(inst, None)
@@ -199,7 +199,7 @@ fn split_value(
             // This is an instruction result. See if the value was created by a `concat`
             // instruction.
             if let InstructionData::Binary { opcode, args, .. } = pos.func.dfg[inst] {
-                assert_eq!(num, 0);
+                debug_assert_eq!(num, 0);
                 if opcode == concat {
                     reuse = Some((args[0], args[1]));
                 }
diff --git a/lib/cretonne/src/lib.rs b/lib/cretonne/src/lib.rs
index ddb391852d..9656f6d1e1 100644
--- a/lib/cretonne/src/lib.rs
+++ b/lib/cretonne/src/lib.rs
@@ -1,5 +1,8 @@
 //! Cretonne code generation library.
-#![deny(missing_docs)]
+
+#![deny(missing_docs,
+        trivial_numeric_casts,
+        unused_extern_crates)]
 
 // Turns on alloc feature if no_std
 #![cfg_attr(not(feature = "std"), no_std)]
@@ -46,11 +49,13 @@ mod abi;
 mod bitset;
 mod constant_hash;
 mod context;
+mod divconst_magic_numbers;
 mod iterators;
 mod legalizer;
 mod licm;
 mod partition_slice;
 mod predicates;
+mod preopt;
 mod ref_slice;
 mod regalloc;
 mod scoped_hash_map;
diff --git a/lib/cretonne/src/predicates.rs b/lib/cretonne/src/predicates.rs
index 6ce3e0a799..63d2e79af3 100644
--- a/lib/cretonne/src/predicates.rs
+++ b/lib/cretonne/src/predicates.rs
@@ -1,7 +1,7 @@
 //! Predicate functions for testing instruction fields.
 //!
 //! This module defines functions that are used by the instruction predicates defined by
-//! `lib/cretonne/meta/cretonne/predicates.py` classes.
+//! `lib/cretonne/meta/cdsl/predicates.py` classes.
 //!
 //! The predicates the operate on integer fields use `Into<i64>` as a shared trait bound. This
 //! bound is implemented by all the native integer types as well as `Imm64`.
diff --git a/lib/cretonne/src/preopt.rs b/lib/cretonne/src/preopt.rs
new file mode 100644
index 0000000000..8e4bef8da3
--- /dev/null
+++ b/lib/cretonne/src/preopt.rs
@@ -0,0 +1,521 @@
+//! A pre-legalization rewriting pass.
+
+#![allow(non_snake_case)]
+
+use cursor::{Cursor, FuncCursor};
+use ir::dfg::ValueDef;
+use ir::{Function, InstructionData, Value, DataFlowGraph, InstBuilder, Type};
+use ir::Inst;
+use ir::types::{I32, I64};
+use ir::instructions::Opcode;
+use divconst_magic_numbers::{MU32, MU64, MS32, MS64};
+use divconst_magic_numbers::{magicU32, magicU64, magicS32, magicS64};
+use timing;
+
+
+//----------------------------------------------------------------------
+//
+// Pattern-match helpers and transformation for div and rem by constants.
+
+// Simple math helpers
+
+// if `x` is a power of two, or the negation thereof, return the power along
+// with a boolean that indicates whether `x` is negative. Else return None.
+#[inline]
+fn isPowerOf2_S32(x: i32) -> Option<(bool, u32)> {
+    // We have to special-case this because abs(x) isn't representable.
+    if x == -0x8000_0000 {
+        return Some((true, 31));
+    }
+    let abs_x = i32::wrapping_abs(x) as u32;
+    if abs_x.is_power_of_two() {
+        return Some((x < 0, abs_x.trailing_zeros()));
+    }
+    None
+}
+
+// Same comments as for isPowerOf2_S64 apply.
+#[inline]
+fn isPowerOf2_S64(x: i64) -> Option<(bool, u32)> {
+    // We have to special-case this because abs(x) isn't representable.
+    if x == -0x8000_0000_0000_0000 {
+        return Some((true, 63));
+    }
+    let abs_x = i64::wrapping_abs(x) as u64;
+    if abs_x.is_power_of_two() {
+        return Some((x < 0, abs_x.trailing_zeros()));
+    }
+    None
+}
+
+#[derive(Debug)]
+enum DivRemByConstInfo {
+    DivU32(Value, u32), // In all cases, the arguments are:
+    DivU64(Value, u64), // left operand, right operand
+    DivS32(Value, i32),
+    DivS64(Value, i64),
+    RemU32(Value, u32),
+    RemU64(Value, u64),
+    RemS32(Value, i32),
+    RemS64(Value, i64),
+}
+
+// Possibly create a DivRemByConstInfo from the given components, by
+// figuring out which, if any, of the 8 cases apply, and also taking care to
+// sanity-check the immediate.
+fn package_up_divrem_info(
+    argL: Value,
+    argL_ty: Type,
+    argRs: i64,
+    isSigned: bool,
+    isRem: bool,
+) -> Option<DivRemByConstInfo> {
+    let argRu: u64 = argRs as u64;
+    if !isSigned && argL_ty == I32 && argRu < 0x1_0000_0000 {
+        let con = if isRem {
+            DivRemByConstInfo::RemU32
+        } else {
+            DivRemByConstInfo::DivU32
+        };
+        return Some(con(argL, argRu as u32));
+    }
+    if !isSigned && argL_ty == I64 {
+        // unsigned 64, no range constraint
+        let con = if isRem {
+            DivRemByConstInfo::RemU64
+        } else {
+            DivRemByConstInfo::DivU64
+        };
+        return Some(con(argL, argRu));
+    }
+    if isSigned && argL_ty == I32 && (argRu <= 0x7fff_ffff || argRu >= 0xffff_ffff_8000_0000) {
+        let con = if isRem {
+            DivRemByConstInfo::RemS32
+        } else {
+            DivRemByConstInfo::DivS32
+        };
+        return Some(con(argL, argRu as i32));
+    }
+    if isSigned && argL_ty == I64 {
+        // signed 64, no range constraint
+        let con = if isRem {
+            DivRemByConstInfo::RemS64
+        } else {
+            DivRemByConstInfo::DivS64
+        };
+        return Some(con(argL, argRu as i64));
+    }
+    None
+}
+
+// Examine `idata` to see if it is a div or rem by a constant, and if so
+// return the operands, signedness, operation size and div-vs-rem-ness in a
+// handy bundle.
+fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
+    let idata: &InstructionData = &dfg[inst];
+
+    if let &InstructionData::BinaryImm { opcode, arg, imm } = idata {
+        let (isSigned, isRem) = match opcode {
+            Opcode::UdivImm => (false, false),
+            Opcode::UremImm => (false, true),
+            Opcode::SdivImm => (true, false),
+            Opcode::SremImm => (true, true),
+            _other => return None,
+        };
+        // Pull the operation size (type) from the left arg
+        let argL_ty = dfg.value_type(arg);
+        return package_up_divrem_info(arg, argL_ty, imm.into(), isSigned, isRem);
+    }
+
+    // TODO: should we actually bother to do this (that is, manually match
+    // the case that the second argument is an iconst)? Or should we assume
+    // that some previous constant propagation pass has pushed all such
+    // immediates to their use points, creating BinaryImm instructions
+    // instead? For now we take the conservative approach.
+    if let &InstructionData::Binary { opcode, args } = idata {
+        let (isSigned, isRem) = match opcode {
+            Opcode::Udiv => (false, false),
+            Opcode::Urem => (false, true),
+            Opcode::Sdiv => (true, false),
+            Opcode::Srem => (true, true),
+            _other => return None,
+        };
+        let argR: Value = args[1];
+        if let Some(simm64) = get_const(argR, dfg) {
+            let argL: Value = args[0];
+            // Pull the operation size (type) from the left arg
+            let argL_ty = dfg.value_type(argL);
+            return package_up_divrem_info(argL, argL_ty, simm64, isSigned, isRem);
+        }
+    }
+
+    None
+}
+
+// Actually do the transformation given a bundle containing the relevant
+// information. `divrem_info` describes a div or rem by a constant, that
+// `pos` currently points at, and `inst` is the associated instruction.
+// `inst` is replaced by a sequence of other operations that calculate the
+// same result. Note that there are various `divrem_info` cases where we
+// cannot do any transformation, in which case `inst` is left unchanged.
+fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCursor, inst: Inst) {
+    let isRem = match *divrem_info {
+        DivRemByConstInfo::DivU32(_, _) |
+        DivRemByConstInfo::DivU64(_, _) |
+        DivRemByConstInfo::DivS32(_, _) |
+        DivRemByConstInfo::DivS64(_, _) => false,
+        DivRemByConstInfo::RemU32(_, _) |
+        DivRemByConstInfo::RemU64(_, _) |
+        DivRemByConstInfo::RemS32(_, _) |
+        DivRemByConstInfo::RemS64(_, _) => true,
+    };
+
+    match divrem_info {
+
+        // -------------------- U32 --------------------
+
+        // U32 div, rem by zero: ignore
+        &DivRemByConstInfo::DivU32(_n1, 0) |
+        &DivRemByConstInfo::RemU32(_n1, 0) => {}
+
+        // U32 div by 1: identity
+        // U32 rem by 1: zero
+        &DivRemByConstInfo::DivU32(n1, 1) |
+        &DivRemByConstInfo::RemU32(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I32, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        // U32 div, rem by a power-of-2
+        &DivRemByConstInfo::DivU32(n1, d) |
+        &DivRemByConstInfo::RemU32(n1, d) if d.is_power_of_two() => {
+            debug_assert!(d >= 2);
+            // compute k where d == 2^k
+            let k = d.trailing_zeros();
+            debug_assert!(k >= 1 && k <= 31);
+            if isRem {
+                let mask = (1u64 << k) - 1;
+                pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
+            } else {
+                pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
+            }
+        }
+
+        // U32 div, rem by non-power-of-2
+        &DivRemByConstInfo::DivU32(n1, d) |
+        &DivRemByConstInfo::RemU32(n1, d) => {
+            debug_assert!(d >= 3);
+            let MU32 {
+                mulBy,
+                doAdd,
+                shiftBy,
+            } = magicU32(d);
+            let qf; // final quotient
+            let q0 = pos.ins().iconst(I32, mulBy as i64);
+            let q1 = pos.ins().umulhi(n1, q0);
+            if doAdd {
+                debug_assert!(shiftBy >= 1 && shiftBy <= 32);
+                let t1 = pos.ins().isub(n1, q1);
+                let t2 = pos.ins().ushr_imm(t1, 1);
+                let t3 = pos.ins().iadd(t2, q1);
+                // I never found any case where shiftBy == 1 here.
+                // So there's no attempt to fold out a zero shift.
+                debug_assert!(shiftBy != 1);
+                qf = pos.ins().ushr_imm(t3, (shiftBy - 1) as i64);
+            } else {
+                debug_assert!(shiftBy >= 0 && shiftBy <= 31);
+                // Whereas there are known cases here for shiftBy == 0.
+                if shiftBy > 0 {
+                    qf = pos.ins().ushr_imm(q1, shiftBy as i64);
+                } else {
+                    qf = q1;
+                }
+            }
+            // Now qf holds the final quotient. If necessary calculate the
+            // remainder instead.
+            if isRem {
+                let tt = pos.ins().imul_imm(qf, d as i64);
+                pos.func.dfg.replace(inst).isub(n1, tt);
+            } else {
+                pos.func.dfg.replace(inst).copy(qf);
+            }
+        }
+
+        // -------------------- U64 --------------------
+
+        // U64 div, rem by zero: ignore
+        &DivRemByConstInfo::DivU64(_n1, 0) |
+        &DivRemByConstInfo::RemU64(_n1, 0) => {}
+
+        // U64 div by 1: identity
+        // U64 rem by 1: zero
+        &DivRemByConstInfo::DivU64(n1, 1) |
+        &DivRemByConstInfo::RemU64(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I64, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        // U64 div, rem by a power-of-2
+        &DivRemByConstInfo::DivU64(n1, d) |
+        &DivRemByConstInfo::RemU64(n1, d) if d.is_power_of_two() => {
+            debug_assert!(d >= 2);
+            // compute k where d == 2^k
+            let k = d.trailing_zeros();
+            debug_assert!(k >= 1 && k <= 63);
+            if isRem {
+                let mask = (1u64 << k) - 1;
+                pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
+            } else {
+                pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
+            }
+        }
+
+        // U64 div, rem by non-power-of-2
+        &DivRemByConstInfo::DivU64(n1, d) |
+        &DivRemByConstInfo::RemU64(n1, d) => {
+            debug_assert!(d >= 3);
+            let MU64 {
+                mulBy,
+                doAdd,
+                shiftBy,
+            } = magicU64(d);
+            let qf; // final quotient
+            let q0 = pos.ins().iconst(I64, mulBy as i64);
+            let q1 = pos.ins().umulhi(n1, q0);
+            if doAdd {
+                debug_assert!(shiftBy >= 1 && shiftBy <= 64);
+                let t1 = pos.ins().isub(n1, q1);
+                let t2 = pos.ins().ushr_imm(t1, 1);
+                let t3 = pos.ins().iadd(t2, q1);
+                // I never found any case where shiftBy == 1 here.
+                // So there's no attempt to fold out a zero shift.
+                debug_assert!(shiftBy != 1);
+                qf = pos.ins().ushr_imm(t3, (shiftBy - 1) as i64);
+            } else {
+                debug_assert!(shiftBy >= 0 && shiftBy <= 63);
+                // Whereas there are known cases here for shiftBy == 0.
+                if shiftBy > 0 {
+                    qf = pos.ins().ushr_imm(q1, shiftBy as i64);
+                } else {
+                    qf = q1;
+                }
+            }
+            // Now qf holds the final quotient. If necessary calculate the
+            // remainder instead.
+            if isRem {
+                let tt = pos.ins().imul_imm(qf, d as i64);
+                pos.func.dfg.replace(inst).isub(n1, tt);
+            } else {
+                pos.func.dfg.replace(inst).copy(qf);
+            }
+        }
+
+        // -------------------- S32 --------------------
+
+        // S32 div, rem by zero or -1: ignore
+        &DivRemByConstInfo::DivS32(_n1, -1) |
+        &DivRemByConstInfo::RemS32(_n1, -1) |
+        &DivRemByConstInfo::DivS32(_n1, 0) |
+        &DivRemByConstInfo::RemS32(_n1, 0) => {}
+
+        // S32 div by 1: identity
+        // S32 rem by 1: zero
+        &DivRemByConstInfo::DivS32(n1, 1) |
+        &DivRemByConstInfo::RemS32(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I32, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        &DivRemByConstInfo::DivS32(n1, d) |
+        &DivRemByConstInfo::RemS32(n1, d) => {
+            if let Some((isNeg, k)) = isPowerOf2_S32(d) {
+                // k can be 31 only in the case that d is -2^31.
+                debug_assert!(k >= 1 && k <= 31);
+                let t1 = if k - 1 == 0 {
+                    n1
+                } else {
+                    pos.ins().sshr_imm(n1, (k - 1) as i64)
+                };
+                let t2 = pos.ins().ushr_imm(t1, (32 - k) as i64);
+                let t3 = pos.ins().iadd(n1, t2);
+                if isRem {
+                    // S32 rem by a power-of-2
+                    let t4 = pos.ins().band_imm(t3, i32::wrapping_neg(1 << k) as i64);
+                    // Curiously, we don't care here what the sign of d is.
+                    pos.func.dfg.replace(inst).isub(n1, t4);
+                } else {
+                    // S32 div by a power-of-2
+                    let t4 = pos.ins().sshr_imm(t3, k as i64);
+                    if isNeg {
+                        pos.func.dfg.replace(inst).irsub_imm(t4, 0);
+                    } else {
+                        pos.func.dfg.replace(inst).copy(t4);
+                    }
+                }
+            } else {
+                // S32 div, rem by a non-power-of-2
+                debug_assert!(d < -2 || d > 2);
+                let MS32 { mulBy, shiftBy } = magicS32(d);
+                let q0 = pos.ins().iconst(I32, mulBy as i64);
+                let q1 = pos.ins().smulhi(n1, q0);
+                let q2 = if d > 0 && mulBy < 0 {
+                    pos.ins().iadd(q1, n1)
+                } else if d < 0 && mulBy > 0 {
+                    pos.ins().isub(q1, n1)
+                } else {
+                    q1
+                };
+                debug_assert!(shiftBy >= 0 && shiftBy <= 31);
+                let q3 = if shiftBy == 0 {
+                    q2
+                } else {
+                    pos.ins().sshr_imm(q2, shiftBy as i64)
+                };
+                let t1 = pos.ins().ushr_imm(q3, 31);
+                let qf = pos.ins().iadd(q3, t1);
+                // Now qf holds the final quotient. If necessary calculate
+                // the remainder instead.
+                if isRem {
+                    let tt = pos.ins().imul_imm(qf, d as i64);
+                    pos.func.dfg.replace(inst).isub(n1, tt);
+                } else {
+                    pos.func.dfg.replace(inst).copy(qf);
+                }
+            }
+        }
+
+        // -------------------- S64 --------------------
+
+        // S64 div, rem by zero or -1: ignore
+        &DivRemByConstInfo::DivS64(_n1, -1) |
+        &DivRemByConstInfo::RemS64(_n1, -1) |
+        &DivRemByConstInfo::DivS64(_n1, 0) |
+        &DivRemByConstInfo::RemS64(_n1, 0) => {}
+
+        // S64 div by 1: identity
+        // S64 rem by 1: zero
+        &DivRemByConstInfo::DivS64(n1, 1) |
+        &DivRemByConstInfo::RemS64(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I64, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        &DivRemByConstInfo::DivS64(n1, d) |
+        &DivRemByConstInfo::RemS64(n1, d) => {
+            if let Some((isNeg, k)) = isPowerOf2_S64(d) {
+                // k can be 63 only in the case that d is -2^63.
+                debug_assert!(k >= 1 && k <= 63);
+                let t1 = if k - 1 == 0 {
+                    n1
+                } else {
+                    pos.ins().sshr_imm(n1, (k - 1) as i64)
+                };
+                let t2 = pos.ins().ushr_imm(t1, (64 - k) as i64);
+                let t3 = pos.ins().iadd(n1, t2);
+                if isRem {
+                    // S64 rem by a power-of-2
+                    let t4 = pos.ins().band_imm(t3, i64::wrapping_neg(1 << k));
+                    // Curiously, we don't care here what the sign of d is.
+                    pos.func.dfg.replace(inst).isub(n1, t4);
+                } else {
+                    // S64 div by a power-of-2
+                    let t4 = pos.ins().sshr_imm(t3, k as i64);
+                    if isNeg {
+                        pos.func.dfg.replace(inst).irsub_imm(t4, 0);
+                    } else {
+                        pos.func.dfg.replace(inst).copy(t4);
+                    }
+                }
+            } else {
+                // S64 div, rem by a non-power-of-2
+                debug_assert!(d < -2 || d > 2);
+                let MS64 { mulBy, shiftBy } = magicS64(d);
+                let q0 = pos.ins().iconst(I64, mulBy);
+                let q1 = pos.ins().smulhi(n1, q0);
+                let q2 = if d > 0 && mulBy < 0 {
+                    pos.ins().iadd(q1, n1)
+                } else if d < 0 && mulBy > 0 {
+                    pos.ins().isub(q1, n1)
+                } else {
+                    q1
+                };
+                debug_assert!(shiftBy >= 0 && shiftBy <= 63);
+                let q3 = if shiftBy == 0 {
+                    q2
+                } else {
+                    pos.ins().sshr_imm(q2, shiftBy as i64)
+                };
+                let t1 = pos.ins().ushr_imm(q3, 63);
+                let qf = pos.ins().iadd(q3, t1);
+                // Now qf holds the final quotient. If necessary calculate
+                // the remainder instead.
+                if isRem {
+                    let tt = pos.ins().imul_imm(qf, d);
+                    pos.func.dfg.replace(inst).isub(n1, tt);
+                } else {
+                    pos.func.dfg.replace(inst).copy(qf);
+                }
+            }
+        }
+
+    }
+}
+
+
+//----------------------------------------------------------------------
+//
+// General pattern-match helpers.
+
+// Find out if `value` actually resolves to a constant, and if so what its
+// value is.
+fn get_const(value: Value, dfg: &DataFlowGraph) -> Option<i64> {
+    match dfg.value_def(value) {
+        ValueDef::Result(definingInst, resultNo) => {
+            let definingIData: &InstructionData = &dfg[definingInst];
+            if let &InstructionData::UnaryImm { opcode, imm } = definingIData {
+                if opcode == Opcode::Iconst && resultNo == 0 {
+                    return Some(imm.into());
+                }
+            }
+            None
+        }
+        ValueDef::Param(_definingEbb, _paramNo) => None,
+    }
+}
+
+
+//----------------------------------------------------------------------
+//
+// The main pre-opt pass.
+
+pub fn do_preopt(func: &mut Function) {
+    let _tt = timing::preopt();
+    let mut pos = FuncCursor::new(func);
+    while let Some(_ebb) = pos.next_ebb() {
+
+        while let Some(inst) = pos.next_inst() {
+
+            //-- BEGIN -- division by constants ----------------
+
+            let mb_dri = get_div_info(inst, &pos.func.dfg);
+            if let Some(divrem_info) = mb_dri {
+                do_divrem_transformation(&divrem_info, &mut pos, inst);
+                continue;
+            }
+
+            //-- END -- division by constants ------------------
+        }
+    }
+}
diff --git a/lib/cretonne/src/regalloc/affinity.rs b/lib/cretonne/src/regalloc/affinity.rs
index 1c50cbbbab..7d85ae8721 100644
--- a/lib/cretonne/src/regalloc/affinity.rs
+++ b/lib/cretonne/src/regalloc/affinity.rs
@@ -13,7 +13,7 @@ use ir::{AbiParam, ArgumentLoc};
 use isa::{TargetIsa, RegInfo, RegClassIndex, OperandConstraint, ConstraintKind};
 
 /// Preferred register allocation for an SSA value.
-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Debug)]
 pub enum Affinity {
     /// No affinity.
     ///
diff --git a/lib/cretonne/src/regalloc/coalescing.rs b/lib/cretonne/src/regalloc/coalescing.rs
index 5933170e33..2437fd1a03 100644
--- a/lib/cretonne/src/regalloc/coalescing.rs
+++ b/lib/cretonne/src/regalloc/coalescing.rs
@@ -1,9 +1,9 @@
-//! Constructing conventional SSA form.
+//! Constructing Conventional SSA form.
 //!
-//! Conventional SSA form is a subset of SSA form where any (transitively) phi-related values do
-//! not interfere. We construct CSSA by building virtual registers that are as large as possible
-//! and inserting copies where necessary such that all argument values passed to an EBB parameter
-//! will belong to the same virtual register as the EBB parameter value itself.
+//! Conventional SSA (CSSA) form is a subset of SSA form where any (transitively) phi-related
+//! values do not interfere. We construct CSSA by building virtual registers that are as large as
+//! possible and inserting copies where necessary such that all argument values passed to an EBB
+//! parameter will belong to the same virtual register as the EBB parameter value itself.
 
 use cursor::{Cursor, EncCursor};
 use dbg::DisplayList;
@@ -27,7 +27,7 @@ use timing;
 // The coalescing algorithm implemented follows this paper fairly closely:
 //
 //     Budimlic, Z., Cooper, K. D., Harvey, T. J., et al. (2002). Fast copy coalescing and
-//     live-range identification (Vol. 37, pp. 25–32). ACM. http://doi.org/10.1145/543552.512534
+//     live-range identification (Vol. 37, pp. 25–32). ACM. https://doi.org/10.1145/543552.512534
 //
 // We use a more efficient dominator forest representation (a linear stack) described here:
 //
@@ -104,7 +104,7 @@ impl Coalescing {
         self.backedges.clear();
     }
 
-    /// Convert `func` to conventional SSA form and build virtual registers in the process.
+    /// Convert `func` to Conventional SSA form and build virtual registers in the process.
     pub fn conventional_ssa(
         &mut self,
         isa: &TargetIsa,
@@ -239,7 +239,7 @@ impl<'a> Context<'a> {
                 // 1. It is defined in a dominating EBB and live-in to `ebb`.
                 // 2. If is itself a parameter value for `ebb`. This case should already have been
                 //    eliminated by `isolate_conflicting_params()`.
-                assert!(
+                debug_assert!(
                     lr.def() != ebb.into(),
                     "{} parameter {} was missed by isolate_conflicting_params()",
                     ebb,
@@ -495,8 +495,8 @@ impl<'a> Context<'a> {
         // Second everything else in reverse layout order. Again, short forward branches get merged
         // first. There can also be backwards branches mixed in here, though, as long as they are
         // not loop backedges.
-        assert!(self.predecessors.is_empty());
-        assert!(self.backedges.is_empty());
+        debug_assert!(self.predecessors.is_empty());
+        debug_assert!(self.backedges.is_empty());
         for (pred_ebb, pred_inst) in self.cfg.pred_iter(ebb) {
             if self.preorder.dominates(ebb, pred_ebb) {
                 self.backedges.push(pred_inst);
@@ -958,7 +958,8 @@ impl VirtualCopies {
 
     /// Indicate that `param` is now fully merged.
     pub fn merged_param(&mut self, param: Value, func: &Function) {
-        assert_eq!(self.params.pop(), Some(param));
+        let popped = self.params.pop();
+        debug_assert_eq!(popped, Some(param));
 
         // The domtree pre-order in `self.params` guarantees that all parameters defined at the
         // same EBB will be adjacent. This means we can see when all parameters at an EBB have been
diff --git a/lib/cretonne/src/regalloc/coloring.rs b/lib/cretonne/src/regalloc/coloring.rs
index 74d80e1832..d7ab5dd0f3 100644
--- a/lib/cretonne/src/regalloc/coloring.rs
+++ b/lib/cretonne/src/regalloc/coloring.rs
@@ -23,7 +23,7 @@
 //!    operands are allowed to read spilled values, but each such instance must be counted as using
 //!    a register.
 //!
-//! 5. The code must be in conventional SSA form. Among other things, this means that values passed
+//! 5. The code must be in Conventional SSA form. Among other things, this means that values passed
 //!    as arguments when branching to an EBB must belong to the same virtual register as the
 //!    corresponding EBB argument value.
 //!
@@ -246,7 +246,7 @@ impl<'a> Context<'a> {
     /// Return the set of remaining allocatable registers after filtering out the dead arguments.
     fn color_entry_params(&mut self, args: &[LiveValue]) -> AvailableRegs {
         let sig = &self.cur.func.signature;
-        assert_eq!(sig.params.len(), args.len());
+        debug_assert_eq!(sig.params.len(), args.len());
 
         let mut regs = AvailableRegs::new(&self.usable_regs);
 
@@ -271,7 +271,7 @@ impl<'a> Context<'a> {
 
                 }
                 // The spiller will have assigned an incoming stack slot already.
-                Affinity::Stack => assert!(abi.location.is_stack()),
+                Affinity::Stack => debug_assert!(abi.location.is_stack()),
                 // This is a ghost value, unused in the function. Don't assign it to a location
                 // either.
                 Affinity::None => {}
@@ -340,7 +340,7 @@ impl<'a> Context<'a> {
             } else {
                 // This is a multi-way branch like `br_table`. We only support arguments on
                 // single-destination branches.
-                assert_eq!(
+                debug_assert_eq!(
                     self.cur.func.dfg.inst_variable_args(inst).len(),
                     0,
                     "Can't handle EBB arguments: {}",
@@ -586,7 +586,7 @@ impl<'a> Context<'a> {
         // Now handle the EBB arguments.
         let br_args = self.cur.func.dfg.inst_variable_args(inst);
         let dest_args = self.cur.func.dfg.ebb_params(dest);
-        assert_eq!(br_args.len(), dest_args.len());
+        debug_assert_eq!(br_args.len(), dest_args.len());
         for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) {
             // The first time we encounter a branch to `dest`, we get to pick the location. The
             // following times we see a branch to `dest`, we must follow suit.
@@ -631,7 +631,7 @@ impl<'a> Context<'a> {
     fn color_ebb_params(&mut self, inst: Inst, dest: Ebb) {
         let br_args = self.cur.func.dfg.inst_variable_args(inst);
         let dest_args = self.cur.func.dfg.ebb_params(dest);
-        assert_eq!(br_args.len(), dest_args.len());
+        debug_assert_eq!(br_args.len(), dest_args.len());
         for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) {
             match self.cur.func.locations[dest_arg] {
                 ValueLoc::Unassigned => {
@@ -741,7 +741,7 @@ impl<'a> Context<'a> {
         // It's technically possible for a call instruction to have fixed results before the
         // variable list of results, but we have no known instances of that.
         // Just assume all results are variable return values.
-        assert_eq!(defs.len(), self.cur.func.dfg.signatures[sig].returns.len());
+        debug_assert_eq!(defs.len(), self.cur.func.dfg.signatures[sig].returns.len());
         for (i, lv) in defs.iter().enumerate() {
             let abi = self.cur.func.dfg.signatures[sig].returns[i];
             if let ArgumentLoc::Reg(reg) = abi.location {
@@ -787,7 +787,7 @@ impl<'a> Context<'a> {
             }
 
             let ok = self.solver.add_fixed_output(rc, reg);
-            assert!(ok, "Couldn't clear fixed output interference for {}", value);
+            debug_assert!(ok, "Couldn't clear fixed output interference for {}", value);
         }
         self.cur.func.locations[value] = ValueLoc::Reg(reg);
     }
@@ -858,11 +858,8 @@ impl<'a> Context<'a> {
                 Ok(regs) => return regs,
                 Err(SolverError::Divert(rc)) => {
                     // Do we have any live-through `rc` registers that are not already variables?
-                    assert!(
-                        self.try_add_var(rc, throughs),
-                        "Ran out of registers in {}",
-                        rc
-                    );
+                    let added = self.try_add_var(rc, throughs);
+                    debug_assert!(added, "Ran out of registers in {}", rc);
                 }
                 Err(SolverError::Global(value)) => {
                     dbg!("Not enough global registers for {}, trying as local", value);
@@ -908,7 +905,7 @@ impl<'a> Context<'a> {
 
         let inst = self.cur.current_inst().expect("Not on an instruction");
         let ctx = self.liveness.context(&self.cur.func.layout);
-        match self.cur.func.dfg[inst].analyze_branch(&self.cur.func.dfg.value_lists) {
+        match self.cur.func.dfg.analyze_branch(inst) {
             NotABranch => false,
             SingleDest(ebb, _) => {
                 let lr = &self.liveness[value];
@@ -941,7 +938,7 @@ impl<'a> Context<'a> {
         // It is very unlikely (impossible?) that we would need more than one spill per top-level
         // register class, so avoid allocation by using a fixed array here.
         let mut slot = [PackedOption::default(); 8];
-        assert!(spills <= slot.len(), "Too many spills ({})", spills);
+        debug_assert!(spills <= slot.len(), "Too many spills ({})", spills);
 
         for m in self.solver.moves() {
             match *m {
diff --git a/lib/cretonne/src/regalloc/context.rs b/lib/cretonne/src/regalloc/context.rs
index 765ecf155c..a8284a9579 100644
--- a/lib/cretonne/src/regalloc/context.rs
+++ b/lib/cretonne/src/regalloc/context.rs
@@ -90,7 +90,7 @@ impl Context {
             verify_liveness(isa, func, cfg, &self.liveness)?;
         }
 
-        // Pass: Coalesce and create conventional SSA form.
+        // Pass: Coalesce and create Conventional SSA form.
         self.coalescing.conventional_ssa(
             isa,
             func,
diff --git a/lib/cretonne/src/regalloc/live_value_tracker.rs b/lib/cretonne/src/regalloc/live_value_tracker.rs
index de300ced0e..bedd0480e5 100644
--- a/lib/cretonne/src/regalloc/live_value_tracker.rs
+++ b/lib/cretonne/src/regalloc/live_value_tracker.rs
@@ -208,7 +208,7 @@ impl LiveValueTracker {
         let first_arg = self.live.values.len();
         for &value in dfg.ebb_params(ebb) {
             let lr = &liveness[value];
-            assert_eq!(lr.def(), ebb.into());
+            debug_assert_eq!(lr.def(), ebb.into());
             match lr.def_local_end().into() {
                 ExpandedProgramPoint::Inst(endpoint) => {
                     self.live.push(value, endpoint, lr);
@@ -216,7 +216,7 @@ impl LiveValueTracker {
                 ExpandedProgramPoint::Ebb(local_ebb) => {
                     // This is a dead EBB parameter which is not even live into the first
                     // instruction in the EBB.
-                    assert_eq!(
+                    debug_assert_eq!(
                         local_ebb,
                         ebb,
                         "EBB parameter live range ends at wrong EBB header"
@@ -261,7 +261,7 @@ impl LiveValueTracker {
     ) -> (&[LiveValue], &[LiveValue], &[LiveValue]) {
         // Save a copy of the live values before any branches or jumps that could be somebody's
         // immediate dominator.
-        match dfg[inst].analyze_branch(&dfg.value_lists) {
+        match dfg.analyze_branch(inst) {
             BranchInfo::NotABranch => {}
             _ => self.save_idom_live_set(inst),
         }
@@ -274,7 +274,7 @@ impl LiveValueTracker {
         let first_def = self.live.values.len();
         for &value in dfg.inst_results(inst) {
             let lr = &liveness[value];
-            assert_eq!(lr.def(), inst.into());
+            debug_assert_eq!(lr.def(), inst.into());
             match lr.def_local_end().into() {
                 ExpandedProgramPoint::Inst(endpoint) => {
                     self.live.push(value, endpoint, lr);
diff --git a/lib/cretonne/src/regalloc/liveness.rs b/lib/cretonne/src/regalloc/liveness.rs
index 83b63366a5..9a76cb5899 100644
--- a/lib/cretonne/src/regalloc/liveness.rs
+++ b/lib/cretonne/src/regalloc/liveness.rs
@@ -252,7 +252,7 @@ fn extend_to_use(
     forest: &mut LiveRangeForest,
 ) {
     // This is our scratch working space, and we'll leave it empty when we return.
-    assert!(worklist.is_empty());
+    debug_assert!(worklist.is_empty());
 
     // Extend the range locally in `ebb`.
     // If there already was a live interval in that block, we're done.
@@ -339,7 +339,7 @@ impl Liveness {
         let old = self.ranges.insert(
             LiveRange::new(value, def.into(), affinity),
         );
-        assert!(old.is_none(), "{} already has a live range", value);
+        debug_assert!(old.is_none(), "{} already has a live range", value);
     }
 
     /// Move the definition of `value` to `def`.
@@ -368,7 +368,7 @@ impl Liveness {
         debug_assert_eq!(Some(ebb), layout.inst_ebb(user));
         let lr = self.ranges.get_mut(value).expect("Value has no live range");
         let livein = lr.extend_in_ebb(ebb, user, layout, &mut self.forest);
-        assert!(!livein, "{} should already be live in {}", value, ebb);
+        debug_assert!(!livein, "{} should already be live in {}", value, ebb);
         &mut lr.affinity
     }
 
diff --git a/lib/cretonne/src/regalloc/liverange.rs b/lib/cretonne/src/regalloc/liverange.rs
index 93d5b0c244..dbde52f05d 100644
--- a/lib/cretonne/src/regalloc/liverange.rs
+++ b/lib/cretonne/src/regalloc/liverange.rs
@@ -253,7 +253,7 @@ impl<PO: ProgramOrder> GenLiveRange<PO> {
             order.cmp(to, self.def_begin) != Ordering::Less
         {
             let to_pp = to.into();
-            assert_ne!(
+            debug_assert_ne!(
                 to_pp,
                 self.def_begin,
                 "Can't use value in the defining instruction."
diff --git a/lib/cretonne/src/regalloc/reload.rs b/lib/cretonne/src/regalloc/reload.rs
index 10b803c831..63e7354453 100644
--- a/lib/cretonne/src/regalloc/reload.rs
+++ b/lib/cretonne/src/regalloc/reload.rs
@@ -146,7 +146,7 @@ impl<'a> Context<'a> {
         );
 
         if self.cur.func.layout.entry_block() == Some(ebb) {
-            assert_eq!(liveins.len(), 0);
+            debug_assert_eq!(liveins.len(), 0);
             self.visit_entry_params(ebb, args);
         } else {
             self.visit_ebb_params(ebb, args);
@@ -156,7 +156,7 @@ impl<'a> Context<'a> {
     /// Visit the parameters on the entry block.
     /// These values have ABI constraints from the function signature.
     fn visit_entry_params(&mut self, ebb: Ebb, args: &[LiveValue]) {
-        assert_eq!(self.cur.func.signature.params.len(), args.len());
+        debug_assert_eq!(self.cur.func.signature.params.len(), args.len());
         self.cur.goto_first_inst(ebb);
 
         for (arg_idx, arg) in args.iter().enumerate() {
@@ -176,7 +176,7 @@ impl<'a> Context<'a> {
                     }
                 }
                 ArgumentLoc::Stack(_) => {
-                    assert!(arg.affinity.is_stack());
+                    debug_assert!(arg.affinity.is_stack());
                 }
                 ArgumentLoc::Unassigned => panic!("Unexpected ABI location"),
             }
@@ -204,7 +204,7 @@ impl<'a> Context<'a> {
         );
 
         // Identify reload candidates.
-        assert!(self.candidates.is_empty());
+        debug_assert!(self.candidates.is_empty());
         self.find_candidates(inst, constraints);
 
         // Insert fill instructions before `inst` and replace `cand.value` with the filled value.
@@ -299,7 +299,7 @@ impl<'a> Context<'a> {
         }
     }
 
-    // Find reload candidates for `inst` and add them to `self.condidates`.
+    // Find reload candidates for `inst` and add them to `self.candidates`.
     //
     // These are uses of spilled values where the operand constraint requires a register.
     fn find_candidates(&mut self, inst: Inst, constraints: &RecipeConstraints) {
@@ -376,7 +376,7 @@ fn handle_abi_args(
     isa: &TargetIsa,
     liveness: &Liveness,
 ) {
-    assert_eq!(abi_types.len(), var_args.len());
+    debug_assert_eq!(abi_types.len(), var_args.len());
     for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) {
         if abi.location.is_reg() {
             let lv = liveness.get(arg).expect("Missing live range for ABI arg");
diff --git a/lib/cretonne/src/regalloc/solver.rs b/lib/cretonne/src/regalloc/solver.rs
index db5efcab06..7824444eec 100644
--- a/lib/cretonne/src/regalloc/solver.rs
+++ b/lib/cretonne/src/regalloc/solver.rs
@@ -566,7 +566,7 @@ impl Solver {
                 dbg!("-> converting variable {} to a fixed constraint", v);
                 // The spiller is responsible for ensuring that all constraints on the uses of a
                 // value are compatible.
-                assert!(
+                debug_assert!(
                     v.constraint.contains(to),
                     "Incompatible constraints for {}",
                     value
@@ -666,7 +666,7 @@ impl Solver {
             // No variable, then it must be a fixed reassignment.
             if let Some(a) = self.assignments.get(value) {
                 dbg!("-> already fixed assignment {}", a);
-                assert!(
+                debug_assert!(
                     constraint.contains(a.to),
                     "Incompatible constraints for {}",
                     value
@@ -709,7 +709,7 @@ impl Solver {
     /// Call this method to indicate that there will be no more fixed input reassignments added
     /// and prepare for the output side constraints.
     pub fn inputs_done(&mut self) {
-        assert!(!self.has_fixed_input_conflicts());
+        debug_assert!(!self.has_fixed_input_conflicts());
 
         // At this point, `regs_out` contains the `to` side of the input reassignments, and the
         // `from` side has already been marked as available in `regs_in`.
@@ -747,7 +747,7 @@ impl Solver {
         // interference constraints on the output side.
         // Variables representing tied operands will get their `is_output` flag set again later.
         if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) {
-            assert!(v.is_input);
+            debug_assert!(v.is_input);
             v.is_output = false;
             return;
         }
@@ -783,7 +783,7 @@ impl Solver {
 
         // Check if a variable was created.
         if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) {
-            assert!(v.is_input);
+            debug_assert!(v.is_input);
             v.is_output = true;
             v.is_global = is_global;
             return None;
@@ -1027,7 +1027,7 @@ impl Solver {
     /// Returns the number of spills that had to be emitted.
     pub fn schedule_moves(&mut self, regs: &AllocatableSet) -> usize {
         self.collect_moves();
-        assert!(self.fills.is_empty());
+        debug_assert!(self.fills.is_empty());
 
         let mut num_spill_slots = 0;
         let mut avail = regs.clone();
diff --git a/lib/cretonne/src/regalloc/spilling.rs b/lib/cretonne/src/regalloc/spilling.rs
index 5f6677d650..3c3f805cbf 100644
--- a/lib/cretonne/src/regalloc/spilling.rs
+++ b/lib/cretonne/src/regalloc/spilling.rs
@@ -243,7 +243,7 @@ impl<'a> Context<'a> {
         debug_assert_eq!(self.cur.current_ebb(), Some(ebb));
 
         // We may need to resolve register constraints if there are any noteworthy uses.
-        assert!(self.reg_uses.is_empty());
+        debug_assert!(self.reg_uses.is_empty());
         self.collect_reg_uses(inst, ebb, constraints);
 
         // Calls usually have fixed register uses.
diff --git a/lib/cretonne/src/regalloc/virtregs.rs b/lib/cretonne/src/regalloc/virtregs.rs
index df2dbca78e..097a66cccd 100644
--- a/lib/cretonne/src/regalloc/virtregs.rs
+++ b/lib/cretonne/src/regalloc/virtregs.rs
@@ -141,7 +141,7 @@ impl VirtRegs {
         func: &Function,
         preorder: &DominatorTreePreorder,
     ) -> VirtReg {
-        assert_eq!(self.get(single), None, "Expected singleton {}", single);
+        debug_assert_eq!(self.get(single), None, "Expected singleton {}", single);
 
         // Make sure `big` has a vreg.
         let vreg = self.get(big).unwrap_or_else(|| {
@@ -209,7 +209,7 @@ impl VirtRegs {
             }
         }
 
-        assert_eq!(
+        debug_assert_eq!(
             values.len(),
             singletons + cleared,
             "Can't unify partial virtual registers"
diff --git a/lib/cretonne/src/result.rs b/lib/cretonne/src/result.rs
index 2b89c774d3..fcff7a6e65 100644
--- a/lib/cretonne/src/result.rs
+++ b/lib/cretonne/src/result.rs
@@ -29,7 +29,7 @@ pub enum CtonError {
     /// Cretonne can compile very large and complicated functions, but the [implementation has
     /// limits][limits] that cause compilation to fail when they are exceeded.
     ///
-    /// [limits]: http://cretonne.readthedocs.io/en/latest/langref.html#implementation-limits
+    /// [limits]: https://cretonne.readthedocs.io/en/latest/langref.html#implementation-limits
     #[fail(display = "Implementation limit exceeded")]
     ImplLimitExceeded,
 
diff --git a/lib/cretonne/src/settings.rs b/lib/cretonne/src/settings.rs
index b6ef9eae30..9a93b4a00c 100644
--- a/lib/cretonne/src/settings.rs
+++ b/lib/cretonne/src/settings.rs
@@ -312,7 +312,7 @@ pub mod detail {
 }
 
 // Include code generated by `meta/gen_settings.py`. This file contains a public `Flags` struct
-// with an impl for all of the settings defined in `meta/cretonne/settings.py`.
+// with an impl for all of the settings defined in `lib/cretonne/meta/base/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings.rs"));
 
 /// Wrapper containing flags and optionally a `TargetIsa` trait object.
diff --git a/lib/cretonne/src/stack_layout.rs b/lib/cretonne/src/stack_layout.rs
index 796850e059..f1cf35f150 100644
--- a/lib/cretonne/src/stack_layout.rs
+++ b/lib/cretonne/src/stack_layout.rs
@@ -7,8 +7,8 @@ use std::cmp::{min, max};
 
 /// Compute the stack frame layout.
 ///
-/// Determine the total size of this stack frame and assign offsets to all `Spill` and `Local`
-/// stack slots.
+/// Determine the total size of this stack frame and assign offsets to all `Spill` and
+/// `Explicit` stack slots.
 ///
 /// The total frame size will be a multiple of `alignment` which must be a power of two.
 ///
@@ -19,13 +19,13 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> Result<Stac
     // Each object and the whole stack frame must fit in 2 GB such that any relative offset within
     // the frame fits in a `StackOffset`.
     let max_size = StackOffset::max_value() as StackSize;
-    assert!(alignment.is_power_of_two() && alignment <= max_size);
+    debug_assert!(alignment.is_power_of_two() && alignment <= max_size);
 
     // We assume a stack that grows toward lower addresses as implemented by modern ISAs. The
     // stack layout from high to low addresses will be:
     //
     // 1. incoming arguments.
-    // 2. spills + locals.
+    // 2. spills + explicits.
     // 3. outgoing arguments.
     //
     // The incoming arguments can have both positive and negative offsets. A negative offset
@@ -48,40 +48,44 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> Result<Stac
 
         match slot.kind {
             StackSlotKind::IncomingArg => {
-                incoming_min = min(incoming_min, slot.offset);
+                incoming_min = min(incoming_min, slot.offset.unwrap());
             }
             StackSlotKind::OutgoingArg => {
-                let offset = slot.offset.checked_add(slot.size as StackOffset).ok_or(
-                    CtonError::ImplLimitExceeded,
-                )?;
+                let offset = slot.offset
+                    .unwrap()
+                    .checked_add(slot.size as StackOffset)
+                    .ok_or(CtonError::ImplLimitExceeded)?;
                 outgoing_max = max(outgoing_max, offset);
             }
             StackSlotKind::SpillSlot |
-            StackSlotKind::Local |
+            StackSlotKind::ExplicitSlot |
             StackSlotKind::EmergencySlot => {
-                // Determine the smallest alignment of any local or spill slot.
+                // Determine the smallest alignment of any explicit or spill slot.
                 min_align = slot.alignment(min_align);
             }
         }
     }
 
-    // Lay out spill slots and locals below the incoming arguments.
+    // Lay out spill slots and explicit slots below the incoming arguments.
     // The offset is negative, growing downwards.
     // Start with the smallest alignments for better packing.
     let mut offset = incoming_min;
-    assert!(min_align.is_power_of_two());
+    debug_assert!(min_align.is_power_of_two());
     while min_align <= alignment {
         for ss in frame.keys() {
             let slot = frame[ss].clone();
 
-            // Pick out locals and spill slots with exact alignment `min_align`.
+            // Pick out explicit and spill slots with exact alignment `min_align`.
             match slot.kind {
-                StackSlotKind::SpillSlot | StackSlotKind::Local => {
+                StackSlotKind::SpillSlot |
+                StackSlotKind::ExplicitSlot |
+                StackSlotKind::EmergencySlot => {
                     if slot.alignment(alignment) != min_align {
                         continue;
                     }
                 }
-                _ => continue,
+                StackSlotKind::IncomingArg |
+                StackSlotKind::OutgoingArg => continue,
             }
 
             offset = offset.checked_sub(slot.size as StackOffset).ok_or(
@@ -110,7 +114,7 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> Result<Stac
 
 #[cfg(test)]
 mod tests {
-    use ir::StackSlots;
+    use ir::{StackSlots, StackSlotData, StackSlotKind};
     use ir::types;
     use super::layout_stack;
     use ir::stackslot::StackOffset;
@@ -130,64 +134,82 @@ mod tests {
 
         assert_eq!(layout_stack(sss, 1), Ok(0));
         assert_eq!(layout_stack(sss, 16), Ok(0));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));
 
         // Add some spill slots.
         let ss0 = sss.make_spill_slot(types::I64);
         let ss1 = sss.make_spill_slot(types::I32);
 
         assert_eq!(layout_stack(sss, 1), Ok(12));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
-        assert_eq!(sss[ss0].offset, -8);
-        assert_eq!(sss[ss1].offset, -12);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));
+        assert_eq!(sss[ss0].offset, Some(-8));
+        assert_eq!(sss[ss1].offset, Some(-12));
 
         assert_eq!(layout_stack(sss, 16), Ok(16));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
-        assert_eq!(sss[ss0].offset, -16);
-        assert_eq!(sss[ss1].offset, -4);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));
+        assert_eq!(sss[ss0].offset, Some(-16));
+        assert_eq!(sss[ss1].offset, Some(-4));
 
         // An incoming argument with negative offset counts towards the total frame size, but it
         // should still pack nicely with the spill slots.
         let in2 = sss.make_incoming_arg(types::I32, -4);
 
         assert_eq!(layout_stack(sss, 1), Ok(16));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
-        assert_eq!(sss[in2].offset, -4);
-        assert_eq!(sss[ss0].offset, -12);
-        assert_eq!(sss[ss1].offset, -16);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));
+        assert_eq!(sss[in2].offset, Some(-4));
+        assert_eq!(sss[ss0].offset, Some(-12));
+        assert_eq!(sss[ss1].offset, Some(-16));
 
         assert_eq!(layout_stack(sss, 16), Ok(16));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
-        assert_eq!(sss[in2].offset, -4);
-        assert_eq!(sss[ss0].offset, -16);
-        assert_eq!(sss[ss1].offset, -8);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));
+        assert_eq!(sss[in2].offset, Some(-4));
+        assert_eq!(sss[ss0].offset, Some(-16));
+        assert_eq!(sss[ss1].offset, Some(-8));
 
         // Finally, make sure there is room for the outgoing args.
         let out0 = sss.get_outgoing_arg(types::I32, 0);
 
         assert_eq!(layout_stack(sss, 1), Ok(20));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
-        assert_eq!(sss[in2].offset, -4);
-        assert_eq!(sss[ss0].offset, -12);
-        assert_eq!(sss[ss1].offset, -16);
-        assert_eq!(sss[out0].offset, 0);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));
+        assert_eq!(sss[in2].offset, Some(-4));
+        assert_eq!(sss[ss0].offset, Some(-12));
+        assert_eq!(sss[ss1].offset, Some(-16));
+        assert_eq!(sss[out0].offset, Some(0));
 
         assert_eq!(layout_stack(sss, 16), Ok(32));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
-        assert_eq!(sss[in2].offset, -4);
-        assert_eq!(sss[ss0].offset, -16);
-        assert_eq!(sss[ss1].offset, -8);
-        assert_eq!(sss[out0].offset, 0);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));
+        assert_eq!(sss[in2].offset, Some(-4));
+        assert_eq!(sss[ss0].offset, Some(-16));
+        assert_eq!(sss[ss1].offset, Some(-8));
+        assert_eq!(sss[out0].offset, Some(0));
 
         // Also test that an unsupported offset is rejected.
         sss.get_outgoing_arg(types::I8, StackOffset::max_value() - 1);
         assert_eq!(layout_stack(sss, 1), Err(CtonError::ImplLimitExceeded));
     }
+
+    #[test]
+    fn slot_kinds() {
+        let sss = &mut StackSlots::new();
+
+        // Add some slots of various kinds.
+        let ss0 = sss.make_spill_slot(types::I32);
+        let ss1 = sss.push(StackSlotData::new(
+            StackSlotKind::ExplicitSlot,
+            types::I32.bytes(),
+        ));
+        let ss2 = sss.get_emergency_slot(types::I32, &[]);
+
+        assert_eq!(layout_stack(sss, 1), Ok(12));
+        assert_eq!(sss[ss0].offset, Some(-4));
+        assert_eq!(sss[ss1].offset, Some(-8));
+        assert_eq!(sss[ss2].offset, Some(-12));
+    }
 }
diff --git a/lib/cretonne/src/timing.rs b/lib/cretonne/src/timing.rs
index 15496ed9ce..a70ed5fb21 100644
--- a/lib/cretonne/src/timing.rs
+++ b/lib/cretonne/src/timing.rs
@@ -55,6 +55,7 @@ define_passes!{
     flowgraph: "Control flow graph",
     domtree: "Dominator tree",
     loop_analysis: "Loop analysis",
+    preopt: "Pre-legalization rewriting",
     legalize: "Legalization",
     gvn: "Global value numbering",
     licm: "Loop invariant code motion",
@@ -186,7 +187,7 @@ mod details {
             let duration = self.start.elapsed();
             dbg!("timing: Ending {}", self.pass);
             let old_cur = CURRENT_PASS.with(|p| p.replace(self.prev));
-            assert_eq!(self.pass, old_cur, "Timing tokens dropped out of order");
+            debug_assert_eq!(self.pass, old_cur, "Timing tokens dropped out of order");
             PASS_TIME.with(|rc| {
                 let mut table = rc.borrow_mut();
                 table.pass[self.pass.idx()].total += duration;
diff --git a/lib/cretonne/src/verifier/locations.rs b/lib/cretonne/src/verifier/locations.rs
index 2c93d0de3e..ae3c5cfb24 100644
--- a/lib/cretonne/src/verifier/locations.rs
+++ b/lib/cretonne/src/verifier/locations.rs
@@ -207,14 +207,14 @@ impl<'a> LocationVerifier<'a> {
                             slot.kind
                         );
                     }
-                    if slot.offset != offset {
+                    if slot.offset.unwrap() != offset {
                         return err!(
                             inst,
                             "ABI expects {} at stack offset {}, but {} is at {}",
                             value,
                             offset,
                             ss,
-                            slot.offset
+                            slot.offset.unwrap()
                         );
                     }
                 } else {
@@ -274,7 +274,7 @@ impl<'a> LocationVerifier<'a> {
         };
         let dfg = &self.func.dfg;
 
-        match dfg[inst].analyze_branch(&dfg.value_lists) {
+        match dfg.analyze_branch(inst) {
             NotABranch => {
                 panic!(
                     "No branch information for {}",
diff --git a/lib/cretonne/src/verifier/mod.rs b/lib/cretonne/src/verifier/mod.rs
index 821bc3d623..d8c2cb6dc3 100644
--- a/lib/cretonne/src/verifier/mod.rs
+++ b/lib/cretonne/src/verifier/mod.rs
@@ -688,7 +688,7 @@ impl<'a> Verifier<'a> {
     }
 
     fn typecheck_variable_args(&self, inst: Inst) -> Result {
-        match self.func.dfg[inst].analyze_branch(&self.func.dfg.value_lists) {
+        match self.func.dfg.analyze_branch(inst) {
             BranchInfo::SingleDest(ebb, _) => {
                 let iter = self.func.dfg.ebb_params(ebb).iter().map(|&v| {
                     self.func.dfg.value_type(v)
@@ -803,7 +803,7 @@ impl<'a> Verifier<'a> {
                             slot
                         );
                     }
-                    if slot.offset != offset {
+                    if slot.offset != Some(offset) {
                         return err!(
                             inst,
                             "Outgoing stack argument {} should have offset {}: {} = {}",
diff --git a/lib/cretonne/src/write.rs b/lib/cretonne/src/write.rs
index ff60033af2..50acbbf553 100644
--- a/lib/cretonne/src/write.rs
+++ b/lib/cretonne/src/write.rs
@@ -477,29 +477,29 @@ mod tests {
         f.name = ExternalName::testcase("foo");
         assert_eq!(f.to_string(), "function %foo() native {\n}\n");
 
-        f.create_stack_slot(StackSlotData::new(StackSlotKind::Local, 4));
+        f.create_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4));
         assert_eq!(
             f.to_string(),
-            "function %foo() native {\n    ss0 = local 4\n}\n"
+            "function %foo() native {\n    ss0 = explicit_slot 4\n}\n"
         );
 
         let ebb = f.dfg.make_ebb();
         f.layout.append_ebb(ebb);
         assert_eq!(
             f.to_string(),
-            "function %foo() native {\n    ss0 = local 4\n\nebb0:\n}\n"
+            "function %foo() native {\n    ss0 = explicit_slot 4\n\nebb0:\n}\n"
         );
 
         f.dfg.append_ebb_param(ebb, types::I8);
         assert_eq!(
             f.to_string(),
-            "function %foo() native {\n    ss0 = local 4\n\nebb0(v0: i8):\n}\n"
+            "function %foo() native {\n    ss0 = explicit_slot 4\n\nebb0(v0: i8):\n}\n"
         );
 
         f.dfg.append_ebb_param(ebb, types::F32.by(4).unwrap());
         assert_eq!(
             f.to_string(),
-            "function %foo() native {\n    ss0 = local 4\n\nebb0(v0: i8, v1: f32x4):\n}\n"
+            "function %foo() native {\n    ss0 = explicit_slot 4\n\nebb0(v0: i8, v1: f32x4):\n}\n"
         );
     }
 }
diff --git a/lib/filecheck/src/error.rs b/lib/filecheck/src/error.rs
index af9657cdcc..d0736e682d 100644
--- a/lib/filecheck/src/error.rs
+++ b/lib/filecheck/src/error.rs
@@ -19,7 +19,7 @@ pub enum Error {
     UndefVariable(String),
     /// A pattern contains a back-reference to a variable that was defined in the same pattern.
     ///
-    /// For example, `check: Hello $(world=.*) $world`. Backreferences are not support. Often the
+    /// For example, `check: Hello $(world=.*) $world`. Backreferences are not supported. Often the
     /// desired effect can be achieved with the `sameln` check:
     ///
     /// ```text
diff --git a/lib/filecheck/src/lib.rs b/lib/filecheck/src/lib.rs
index b29377f9ac..40b7d04d40 100644
--- a/lib/filecheck/src/lib.rs
+++ b/lib/filecheck/src/lib.rs
@@ -1,5 +1,5 @@
 //! This crate provides a text pattern matching library with functionality similar to the LLVM
-//! project's [FileCheck command](http://llvm.org/docs/CommandGuide/FileCheck.html).
+//! project's [FileCheck command](https://llvm.org/docs/CommandGuide/FileCheck.html).
 //!
 //! A list of directives is typically extracted from a file containing a test case. The test case
 //! is then run through the program under test, and its output matched against the directives.
@@ -236,7 +236,9 @@
 //! This will match `"one, two"` , but not `"one,two"`. Without the `$()`, trailing whitespace
 //! would be trimmed from the pattern.
 
-#![deny(missing_docs)]
+#![deny(missing_docs,
+        trivial_numeric_casts,
+        unused_extern_crates)]
 
 pub use error::{Error, Result};
 pub use variable::{VariableMap, Value, NO_VARIABLES};
diff --git a/lib/filecheck/src/pattern.rs b/lib/filecheck/src/pattern.rs
index 934150dca7..97977b191c 100644
--- a/lib/filecheck/src/pattern.rs
+++ b/lib/filecheck/src/pattern.rs
@@ -42,7 +42,7 @@ pub enum Part {
 }
 
 impl Part {
-    /// Get the variabled referenced by this part, if any.
+    /// Get the variable referenced by this part, if any.
     pub fn ref_var(&self) -> Option<&str> {
         match *self {
             Part::Var(ref var) |
@@ -217,10 +217,10 @@ impl Pattern {
 }
 
 /// Compute the length of a regular expression terminated by `)` or `}`.
-/// Handle nested and escaped parentheses in the rx, but don't actualy parse it.
+/// Handle nested and escaped parentheses in the rx, but don't actually parse it.
 /// Return the position of the terminating brace or the length of the string.
 fn regex_prefix(s: &str) -> usize {
-    // The prevous char was a backslash.
+    // The previous char was a backslash.
     let mut escape = false;
     // State around parsing charsets.
     enum State {
diff --git a/lib/filecheck/src/variable.rs b/lib/filecheck/src/variable.rs
index 1a43f1428a..5977f06354 100644
--- a/lib/filecheck/src/variable.rs
+++ b/lib/filecheck/src/variable.rs
@@ -3,7 +3,7 @@ use std::borrow::Cow;
 /// A variable name is one or more ASCII alphanumerical characters, including underscore.
 /// Note that numerical variable names like `$45` are allowed too.
 ///
-/// Try to parse a variable name from the begining of `s`.
+/// Try to parse a variable name from the beginning of `s`.
 /// Return the index of the character following the varname.
 /// This returns 0 if `s` doesn't have a prefix that is a variable name.
 pub fn varname_prefix(s: &str) -> usize {
diff --git a/lib/filecheck/tests/basic.rs b/lib/filecheck/tests/basic.rs
index debced2a1b..1ccc5991a5 100644
--- a/lib/filecheck/tests/basic.rs
+++ b/lib/filecheck/tests/basic.rs
@@ -153,7 +153,7 @@ fn nextln() {
 #[test]
 fn leading_nextln() {
     // A leading nextln directive should match from line 2.
-    // This is somewhat arbitrary, but consistent with a preceeding 'check: $()' directive.
+    // This is somewhat arbitrary, but consistent with a preceding 'check: $()' directive.
     let c = CheckerBuilder::new()
         .text(
             "
diff --git a/lib/frontend/Cargo.toml b/lib/frontend/Cargo.toml
index bccc84f6bc..cdbabdda79 100644
--- a/lib/frontend/Cargo.toml
+++ b/lib/frontend/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["The Cretonne Project Developers"]
 name = "cretonne-frontend"
-version = "0.1.0"
+version = "0.3.4"
 description = "Cretonne IL builder helper"
 license = "Apache-2.0"
 documentation = "https://cretonne.readthedocs.io/"
@@ -12,7 +12,7 @@ readme = "README.md"
 name = "cton_frontend"
 
 [dependencies]
-cretonne = { path = "../cretonne", version = "0.1.0", default-features = false }
+cretonne = { path = "../cretonne", version = "0.3.4", default-features = false }
 
 [features]
 default = ["std"]
diff --git a/lib/frontend/src/frontend.rs b/lib/frontend/src/frontend.rs
index 432288e4eb..6802884b6a 100644
--- a/lib/frontend/src/frontend.rs
+++ b/lib/frontend/src/frontend.rs
@@ -12,9 +12,13 @@ use cretonne::packed_option::PackedOption;
 
 /// Structure used for translating a series of functions into Cretonne IL.
 ///
-/// In order to reduce memory reallocations whem compiling multiple functions,
+/// In order to reduce memory reallocations when compiling multiple functions,
 /// `ILBuilder` holds various data structures which are cleared between
 /// functions, rather than dropped, preserving the underlying allocations.
+///
+/// The `Variable` parameter can be any index-like type that can be made to
+/// implement `EntityRef`. For frontends that don't have an obvious type to
+/// use here, `variable::Variable` can be used.
 pub struct ILBuilder<Variable>
 where
     Variable: EntityRef,
@@ -259,7 +263,7 @@ where
     /// block, in the order they are declared. You must declare the types of the Ebb arguments
     /// you will use here.
     ///
-    /// When inserting the terminator instruction (which doesn't have a falltrough to its immediate
+    /// When inserting the terminator instruction (which doesn't have a fallthrough to its immediate
     /// successor), the block will be declared filled and it will not be possible to append
     /// instructions to it.
     pub fn switch_to_block(&mut self, ebb: Ebb) {
@@ -283,7 +287,7 @@ where
     /// Declares that all the predecessors of this block are known.
     ///
     /// Function to call with `ebb` as soon as the last branch instruction to `ebb` has been
-    /// created. Forgetting to call this method on every block will cause inconsistences in the
+    /// created. Forgetting to call this method on every block will cause inconsistencies in the
     /// produced functions.
     pub fn seal_block(&mut self, ebb: Ebb) {
         let side_effects = self.builder.ssa.seal_ebb_header_block(ebb, self.func);
@@ -590,22 +594,7 @@ mod tests {
     use frontend::{ILBuilder, FunctionBuilder};
     use cretonne::verifier::verify_function;
     use cretonne::settings;
-
-    use std::u32;
-
-    // An opaque reference to variable.
-    #[derive(Copy, Clone, PartialEq, Eq, Debug)]
-    pub struct Variable(u32);
-    impl EntityRef for Variable {
-        fn new(index: usize) -> Self {
-            assert!(index < (u32::MAX as usize));
-            Variable(index as u32)
-        }
-
-        fn index(self) -> usize {
-            self.0 as usize
-        }
-    }
+    use Variable;
 
     fn sample_function(lazy_seal: bool) {
         let mut sig = Signature::new(CallConv::Native);
@@ -620,9 +609,9 @@ mod tests {
             let block0 = builder.create_ebb();
             let block1 = builder.create_ebb();
             let block2 = builder.create_ebb();
-            let x = Variable(0);
-            let y = Variable(1);
-            let z = Variable(2);
+            let x = Variable::new(0);
+            let y = Variable::new(1);
+            let z = Variable::new(2);
             builder.declare_var(x, I32);
             builder.declare_var(y, I32);
             builder.declare_var(z, I32);
diff --git a/lib/frontend/src/lib.rs b/lib/frontend/src/lib.rs
index 52765b5a20..a44214cf14 100644
--- a/lib/frontend/src/lib.rs
+++ b/lib/frontend/src/lib.rs
@@ -39,23 +39,8 @@
 //! use cretonne::ir::{ExternalName, CallConv, Function, Signature, AbiParam, InstBuilder};
 //! use cretonne::ir::types::*;
 //! use cretonne::settings;
-//! use cton_frontend::{ILBuilder, FunctionBuilder};
+//! use cton_frontend::{ILBuilder, FunctionBuilder, Variable};
 //! use cretonne::verifier::verify_function;
-//! use std::u32;
-//!
-//! // An opaque reference to variable.
-//! #[derive(Copy, Clone, PartialEq, Eq, Debug)]
-//! pub struct Variable(u32);
-//! impl EntityRef for Variable {
-//!     fn new(index: usize) -> Self {
-//!         assert!(index < (u32::MAX as usize));
-//!         Variable(index as u32)
-//!     }
-//!
-//!     fn index(self) -> usize {
-//!         self.0 as usize
-//!     }
-//! }
 //!
 //! fn main() {
 //!     let mut sig = Signature::new(CallConv::Native);
@@ -69,9 +54,9 @@
 //!         let block0 = builder.create_ebb();
 //!         let block1 = builder.create_ebb();
 //!         let block2 = builder.create_ebb();
-//!         let x = Variable(0);
-//!         let y = Variable(1);
-//!         let z = Variable(2);
+//!         let x = Variable::new(0);
+//!         let y = Variable::new(1);
+//!         let z = Variable::new(2);
 //!         builder.declare_var(x, I32);
 //!         builder.declare_var(y, I32);
 //!         builder.declare_var(z, I32);
@@ -142,7 +127,9 @@
 //! }
 //! ```
 
-#![deny(missing_docs)]
+#![deny(missing_docs,
+        trivial_numeric_casts,
+        unused_extern_crates)]
 
 #![cfg_attr(not(feature = "std"), no_std)]
 #![cfg_attr(not(feature = "std"), feature(alloc))]
@@ -153,9 +140,11 @@ extern crate cretonne;
 extern crate alloc;
 
 pub use frontend::{ILBuilder, FunctionBuilder};
+pub use variable::Variable;
 
 mod frontend;
 mod ssa;
+mod variable;
 
 #[cfg(not(feature = "std"))]
 mod std {
diff --git a/lib/frontend/src/ssa.rs b/lib/frontend/src/ssa.rs
index fc2ebcca08..cf44371983 100644
--- a/lib/frontend/src/ssa.rs
+++ b/lib/frontend/src/ssa.rs
@@ -36,7 +36,7 @@ pub struct SSABuilder<Variable>
 where
     Variable: EntityRef,
 {
-    // Records for every variable and for every revelant block, the last definition of
+    // Records for every variable and for every relevant block, the last definition of
     // the variable in the block.
     variables: EntityMap<Variable, EntityMap<Block, PackedOption<Value>>>,
     // Records the position of the basic blocks and the list of values used but not defined in the
@@ -82,7 +82,7 @@ enum BlockData<Variable> {
     // A block at the top of an `Ebb`.
     EbbHeader(EbbHeaderBlockData<Variable>),
     // A block inside an `Ebb` with an unique other block as its predecessor.
-    // The block is implicitely sealed at creation.
+    // The block is implicitly sealed at creation.
     EbbBody { predecessor: Block },
 }
 
@@ -395,7 +395,7 @@ where
     /// Remove a previously declared Ebb predecessor by giving a reference to the jump
     /// instruction. Returns the basic block containing the instruction.
     ///
-    /// Note: use only when you know what you are doing, this might break the SSA bbuilding problem
+    /// Note: use only when you know what you are doing, this might break the SSA building problem
     pub fn remove_ebb_predecessor(&mut self, ebb: Ebb, inst: Inst) -> Block {
         debug_assert!(!self.is_sealed(ebb));
         let header_block = self.header_block(ebb);
@@ -423,7 +423,7 @@ where
     pub fn seal_all_ebb_header_blocks(&mut self, func: &mut Function) -> SideEffects {
         // Seal all `Ebb`s currently in the function. This can entail splitting
         // and creation of new blocks, however such new blocks are sealed on
-        // the fly, so we don't need to accout for them here.
+        // the fly, so we don't need to account for them here.
         for ebb in self.ebb_headers.keys() {
             self.seal_one_ebb_header_block(ebb, func);
         }
@@ -471,8 +471,8 @@ where
         }
     }
 
-    /// Look up in the predecessors of an Ebb the def for a value an decides wether or not
-    /// to keep the eeb arg, and act accordingly. Returns the chosen value and optionnaly a
+    /// Look up in the predecessors of an Ebb the def for a value an decides whether or not
+    /// to keep the eeb arg, and act accordingly. Returns the chosen value and optionally a
     /// list of Ebb that are the middle of newly created critical edges splits.
     fn predecessors_lookup(
         &mut self,
@@ -555,7 +555,7 @@ where
             ZeroOneOrMore::One(pred_val) => {
                 // Here all the predecessors use a single value to represent our variable
                 // so we don't need to have it as an ebb argument.
-                // We need to replace all the occurences of val with pred_val but since
+                // We need to replace all the occurrences of val with pred_val but since
                 // we can't afford a re-writing pass right now we just declare an alias.
                 // Resolve aliases eagerly so that we can check for cyclic aliasing,
                 // which can occur in unreachable code.
@@ -621,7 +621,7 @@ where
         val: Value,
         var: Variable,
     ) -> Option<(Ebb, Block, Inst)> {
-        match func.dfg[jump_inst].analyze_branch(&func.dfg.value_lists) {
+        match func.dfg.analyze_branch(jump_inst) {
             BranchInfo::NotABranch => {
                 panic!("you have declared a non-branch instruction as a predecessor to an ebb");
             }
@@ -721,21 +721,7 @@ mod tests {
     use cretonne::ir::instructions::BranchInfo;
     use cretonne::settings;
     use ssa::SSABuilder;
-    use std::u32;
-
-    /// An opaque reference to variable.
-    #[derive(Copy, Clone, PartialEq, Eq, Debug)]
-    pub struct Variable(u32);
-    impl EntityRef for Variable {
-        fn new(index: usize) -> Self {
-            assert!(index < (u32::MAX as usize));
-            Variable(index as u32)
-        }
-
-        fn index(self) -> usize {
-            self.0 as usize
-        }
-    }
+    use Variable;
 
     #[test]
     fn simple_block() {
@@ -749,14 +735,14 @@ mod tests {
         // z = x + z;
 
         let block = ssa.declare_ebb_header_block(ebb0);
-        let x_var = Variable(0);
+        let x_var = Variable::new(0);
         let x_ssa = {
             let mut cur = FuncCursor::new(&mut func);
             cur.insert_ebb(ebb0);
             cur.ins().iconst(I32, 1)
         };
         ssa.def_var(x_var, x_ssa, block);
-        let y_var = Variable(1);
+        let y_var = Variable::new(1);
         let y_ssa = {
             let mut cur = FuncCursor::new(&mut func).at_bottom(ebb0);
             cur.ins().iconst(I32, 2)
@@ -765,7 +751,7 @@ mod tests {
 
         assert_eq!(ssa.use_var(&mut func, x_var, I32, block).0, x_ssa);
         assert_eq!(ssa.use_var(&mut func, y_var, I32, block).0, y_ssa);
-        let z_var = Variable(2);
+        let z_var = Variable::new(2);
         let x_use1 = ssa.use_var(&mut func, x_var, I32, block).0;
         let y_use1 = ssa.use_var(&mut func, y_var, I32, block).0;
         let z1_ssa = {
@@ -801,7 +787,7 @@ mod tests {
         //    y = x + y;
 
         let block0 = ssa.declare_ebb_header_block(ebb0);
-        let x_var = Variable(0);
+        let x_var = Variable::new(0);
         let x_ssa = {
             let mut cur = FuncCursor::new(&mut func);
             cur.insert_ebb(ebb0);
@@ -810,7 +796,7 @@ mod tests {
             cur.ins().iconst(I32, 1)
         };
         ssa.def_var(x_var, x_ssa, block0);
-        let y_var = Variable(1);
+        let y_var = Variable::new(1);
         let y_ssa = {
             let mut cur = FuncCursor::new(&mut func).at_bottom(ebb0);
             cur.ins().iconst(I32, 2)
@@ -818,7 +804,7 @@ mod tests {
         ssa.def_var(y_var, y_ssa, block0);
         assert_eq!(ssa.use_var(&mut func, x_var, I32, block0).0, x_ssa);
         assert_eq!(ssa.use_var(&mut func, y_var, I32, block0).0, y_ssa);
-        let z_var = Variable(2);
+        let z_var = Variable::new(2);
         let x_use1 = ssa.use_var(&mut func, x_var, I32, block0).0;
         let y_use1 = ssa.use_var(&mut func, y_var, I32, block0).0;
         let z1_ssa = {
@@ -856,7 +842,7 @@ mod tests {
             cur.ins().iadd(x_use3, y_use3)
         };
         ssa.def_var(y_var, y2_ssa, block2);
-        match func.dfg[jump_inst].analyze_branch(&func.dfg.value_lists) {
+        match func.dfg.analyze_branch(jump_inst) {
             BranchInfo::SingleDest(dest, jump_args) => {
                 assert_eq!(dest, ebb1);
                 assert_eq!(jump_args.len(), 0);
@@ -889,7 +875,7 @@ mod tests {
 
         let block0 = ssa.declare_ebb_header_block(ebb0);
         ssa.seal_ebb_header_block(ebb0, &mut func);
-        let x_var = Variable(0);
+        let x_var = Variable::new(0);
         let x1 = {
             let mut cur = FuncCursor::new(&mut func);
             cur.insert_ebb(ebb0);
@@ -900,14 +886,14 @@ mod tests {
         };
         ssa.def_var(x_var, x1, block0);
         assert_eq!(ssa.use_var(&mut func, x_var, I32, block0).0, x1);
-        let y_var = Variable(1);
+        let y_var = Variable::new(1);
         let y1 = {
             let mut cur = FuncCursor::new(&mut func).at_bottom(ebb0);
             cur.ins().iconst(I32, 2)
         };
         ssa.def_var(y_var, y1, block0);
         assert_eq!(ssa.use_var(&mut func, y_var, I32, block0).0, y1);
-        let z_var = Variable(2);
+        let z_var = Variable::new(2);
         let x2 = ssa.use_var(&mut func, x_var, I32, block0).0;
         assert_eq!(x2, x1);
         let y2 = ssa.use_var(&mut func, y_var, I32, block0).0;
@@ -996,7 +982,7 @@ mod tests {
         //
         let block0 = ssa.declare_ebb_header_block(ebb0);
         ssa.seal_ebb_header_block(ebb0, &mut func);
-        let x_var = Variable(0);
+        let x_var = Variable::new(0);
         let x1 = {
             let mut cur = FuncCursor::new(&mut func);
             cur.insert_ebb(ebb0);
@@ -1066,9 +1052,9 @@ mod tests {
         //    jump ebb1
         //
         let block0 = ssa.declare_ebb_header_block(ebb0);
-        let x_var = Variable(0);
-        let y_var = Variable(1);
-        let z_var = Variable(2);
+        let x_var = Variable::new(0);
+        let y_var = Variable::new(1);
+        let z_var = Variable::new(2);
         ssa.seal_ebb_header_block(ebb0, &mut func);
         let x1 = {
             let mut cur = FuncCursor::new(&mut func);
@@ -1125,17 +1111,17 @@ mod tests {
 
     #[test]
     fn undef() {
-        // Use vars of varous types which have not been defined.
+        // Use vars of various types which have not been defined.
         let mut func = Function::new();
         let mut ssa: SSABuilder<Variable> = SSABuilder::new();
         let ebb0 = func.dfg.make_ebb();
         let block = ssa.declare_ebb_header_block(ebb0);
         ssa.seal_ebb_header_block(ebb0, &mut func);
-        let i32_var = Variable(0);
-        let f32_var = Variable(1);
-        let f64_var = Variable(2);
-        let b1_var = Variable(3);
-        let f32x4_var = Variable(4);
+        let i32_var = Variable::new(0);
+        let f32_var = Variable::new(1);
+        let f64_var = Variable::new(2);
+        let b1_var = Variable::new(3);
+        let f32x4_var = Variable::new(4);
         ssa.use_var(&mut func, i32_var, I32, block);
         ssa.use_var(&mut func, f32_var, F32, block);
         ssa.use_var(&mut func, f64_var, F64, block);
@@ -1153,7 +1139,7 @@ mod tests {
         let ebb0 = func.dfg.make_ebb();
         let block = ssa.declare_ebb_header_block(ebb0);
         ssa.seal_ebb_header_block(ebb0, &mut func);
-        let x_var = Variable(0);
+        let x_var = Variable::new(0);
         assert_eq!(func.dfg.num_ebb_params(ebb0), 0);
         ssa.use_var(&mut func, x_var, I32, block);
         assert_eq!(func.dfg.num_ebb_params(ebb0), 0);
@@ -1172,7 +1158,7 @@ mod tests {
         let mut ssa: SSABuilder<Variable> = SSABuilder::new();
         let ebb0 = func.dfg.make_ebb();
         let block = ssa.declare_ebb_header_block(ebb0);
-        let x_var = Variable(0);
+        let x_var = Variable::new(0);
         assert_eq!(func.dfg.num_ebb_params(ebb0), 0);
         ssa.use_var(&mut func, x_var, I32, block);
         assert_eq!(func.dfg.num_ebb_params(ebb0), 1);
@@ -1206,7 +1192,7 @@ mod tests {
             cur.insert_ebb(ebb1);
             cur.goto_bottom(ebb0);
             cur.ins().return_(&[]);
-            let x_var = Variable(0);
+            let x_var = Variable::new(0);
             cur.goto_bottom(ebb1);
             let val = ssa.use_var(&mut cur.func, x_var, I32, block1).0;
             let brz = cur.ins().brz(val, ebb1, &[]);
@@ -1248,7 +1234,7 @@ mod tests {
         let block2 = ssa.declare_ebb_header_block(ebb2);
         {
             let mut cur = FuncCursor::new(&mut func);
-            let x_var = Variable(0);
+            let x_var = Variable::new(0);
             cur.insert_ebb(ebb0);
             cur.insert_ebb(ebb1);
             cur.insert_ebb(ebb2);
diff --git a/lib/frontend/src/variable.rs b/lib/frontend/src/variable.rs
new file mode 100644
index 0000000000..b69a63afeb
--- /dev/null
+++ b/lib/frontend/src/variable.rs
@@ -0,0 +1,32 @@
+//! A basic `Variable` implementation.
+//!
+//! `ILBuilder`, `FunctionBuilder`, and related types have a `Variable`
+//! type parameter, to allow frontends that identify variables with
+//! their own index types to use them directly. Frontends which don't
+//! can use the `Variable` defined here.
+
+use cretonne::entity::EntityRef;
+use std::u32;
+
+///! An opaque reference to a variable.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub struct Variable(u32);
+
+impl Variable {
+    /// Create a new Variable with the given index.
+    pub fn with_u32(index: u32) -> Self {
+        debug_assert!(index < u32::MAX);
+        Variable(index)
+    }
+}
+
+impl EntityRef for Variable {
+    fn new(index: usize) -> Self {
+        debug_assert!(index < (u32::MAX as usize));
+        Variable(index as u32)
+    }
+
+    fn index(self) -> usize {
+        self.0 as usize
+    }
+}
diff --git a/lib/native/Cargo.toml b/lib/native/Cargo.toml
index 32417e8718..e89b0f02fe 100644
--- a/lib/native/Cargo.toml
+++ b/lib/native/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cretonne-native"
-version = "0.1.0"
+version = "0.3.4"
 authors = ["The Cretonne Project Developers"]
 description = "Support for targeting the host with Cretonne"
 repository = "https://github.com/Cretonne/cretonne"
@@ -11,7 +11,7 @@ readme = "README.md"
 name = "cton_native"
 
 [dependencies]
-cretonne = { path = "../cretonne", version = "0.1.0", default-features = false }
+cretonne = { path = "../cretonne", version = "0.3.4", default-features = false }
 
 [target.'cfg(any(target_arch = "x86", target_arch = "x86_64"))'.dependencies]
 raw-cpuid = "3.0.0"
diff --git a/lib/native/src/lib.rs b/lib/native/src/lib.rs
index 514e6df314..66c4cdec4a 100644
--- a/lib/native/src/lib.rs
+++ b/lib/native/src/lib.rs
@@ -1,6 +1,9 @@
 //! Performs autodetection of the host for the purposes of running
 //! Cretonne to generate code to run on the same machine.
-#![deny(missing_docs)]
+
+#![deny(missing_docs,
+        trivial_numeric_casts,
+        unused_extern_crates)]
 
 #![cfg_attr(not(feature = "std"), no_std)]
 
@@ -18,7 +21,7 @@ use raw_cpuid::CpuId;
 /// Return `settings` and `isa` builders configured for the current host
 /// machine, or `Err(())` if the host machine is not supported
 /// in the current configuration.
-pub fn builders() -> Result<(settings::Builder, isa::Builder), ()> {
+pub fn builders() -> Result<(settings::Builder, isa::Builder), &'static str> {
     let mut flag_builder = settings::builder();
 
     // TODO: Add RISC-V support once Rust supports it.
@@ -34,28 +37,28 @@ pub fn builders() -> Result<(settings::Builder, isa::Builder), ()> {
     } else if cfg!(target_arch = "aarch64") {
         "arm64"
     } else {
-        return Err(());
+        return Err("unrecognized architecture");
     };
 
     let mut isa_builder = isa::lookup(name).map_err(|err| match err {
         isa::LookupError::Unknown => panic!(),
-        isa::LookupError::Unsupported => (),
+        isa::LookupError::Unsupported => "unsupported architecture",
     })?;
 
     if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
-        parse_x86_cpuid(&mut isa_builder);
+        parse_x86_cpuid(&mut isa_builder)?;
     }
 
     Ok((flag_builder, isa_builder))
 }
 
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-fn parse_x86_cpuid(isa_builder: &mut isa::Builder) {
+fn parse_x86_cpuid(isa_builder: &mut isa::Builder) -> Result<(), &'static str> {
     let cpuid = CpuId::new();
 
     if let Some(info) = cpuid.get_feature_info() {
-        if info.has_sse2() {
-            isa_builder.enable("has_sse2").unwrap();
+        if !info.has_sse2() {
+            return Err("x86 support requires SSE2");
         }
         if info.has_sse3() {
             isa_builder.enable("has_sse3").unwrap();
@@ -86,4 +89,5 @@ fn parse_x86_cpuid(isa_builder: &mut isa::Builder) {
             isa_builder.enable("has_lzcnt").unwrap();
         }
     }
+    Ok(())
 }
diff --git a/lib/reader/Cargo.toml b/lib/reader/Cargo.toml
index 636f6b40e0..0d2061e361 100644
--- a/lib/reader/Cargo.toml
+++ b/lib/reader/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["The Cretonne Project Developers"]
 name = "cretonne-reader"
-version = "0.1.0"
+version = "0.3.4"
 description = "Cretonne textual IL reader"
 license = "Apache-2.0"
 documentation = "https://cretonne.readthedocs.io/"
@@ -12,4 +12,4 @@ readme = "README.md"
 name = "cton_reader"
 
 [dependencies]
-cretonne = { path = "../cretonne", version = "0.1.0" }
+cretonne = { path = "../cretonne", version = "0.3.4" }
diff --git a/lib/reader/src/lib.rs b/lib/reader/src/lib.rs
index 06a03dd6fe..85ea048950 100644
--- a/lib/reader/src/lib.rs
+++ b/lib/reader/src/lib.rs
@@ -3,7 +3,9 @@
 //! The cton_reader library supports reading .cton files. This functionality is needed for testing
 //! Cretonne, but is not essential for a JIT compiler.
 
-#![deny(missing_docs)]
+#![deny(missing_docs,
+        trivial_numeric_casts,
+        unused_extern_crates)]
 
 extern crate cretonne;
 
diff --git a/lib/reader/src/parser.rs b/lib/reader/src/parser.rs
index 43c149956a..a9568942ed 100644
--- a/lib/reader/src/parser.rs
+++ b/lib/reader/src/parser.rs
@@ -82,9 +82,6 @@ pub struct Parser<'a> {
 }
 
 // Context for resolving references when parsing a single function.
-//
-// Many entities like values, stack slots, and function signatures are referenced in the `.cton`
-// file by number. We need to map these numbers to real references.
 struct Context<'a> {
     function: Function,
     map: SourceMap,
@@ -119,7 +116,7 @@ impl<'a> Context<'a> {
         }
     }
 
-    // Allocate a new stack slot and add a mapping number -> StackSlot.
+    // Allocate a new stack slot.
     fn add_ss(&mut self, ss: StackSlot, data: StackSlotData, loc: &Location) -> Result<()> {
         while self.function.stack_slots.next_key().index() <= ss.index() {
             self.function.create_stack_slot(
@@ -139,7 +136,7 @@ impl<'a> Context<'a> {
         }
     }
 
-    // Allocate a global variable slot and add a mapping number -> GlobalVar.
+    // Allocate a global variable slot.
     fn add_gv(&mut self, gv: GlobalVar, data: GlobalVarData, loc: &Location) -> Result<()> {
         while self.function.global_vars.next_key().index() <= gv.index() {
             self.function.create_global_var(GlobalVarData::Sym {
@@ -159,7 +156,7 @@ impl<'a> Context<'a> {
         }
     }
 
-    // Allocate a heap slot and add a mapping number -> Heap.
+    // Allocate a heap slot.
     fn add_heap(&mut self, heap: Heap, data: HeapData, loc: &Location) -> Result<()> {
         while self.function.heaps.next_key().index() <= heap.index() {
             self.function.create_heap(HeapData {
@@ -182,7 +179,7 @@ impl<'a> Context<'a> {
         }
     }
 
-    // Allocate a new signature and add a mapping number -> SigRef.
+    // Allocate a new signature.
     fn add_sig(&mut self, sig: SigRef, data: Signature, loc: &Location) -> Result<()> {
         while self.function.dfg.signatures.next_key().index() <= sig.index() {
             self.function.import_signature(
@@ -202,7 +199,7 @@ impl<'a> Context<'a> {
         }
     }
 
-    // Allocate a new external function and add a mapping number -> FuncRef.
+    // Allocate a new external function.
     fn add_fn(&mut self, fn_: FuncRef, data: ExtFuncData, loc: &Location) -> Result<()> {
         while self.function.dfg.ext_funcs.next_key().index() <= fn_.index() {
             self.function.import_function(ExtFuncData {
@@ -223,7 +220,7 @@ impl<'a> Context<'a> {
         }
     }
 
-    // Allocate a new jump table and add a mapping number -> JumpTable.
+    // Allocate a new jump table.
     fn add_jt(&mut self, jt: JumpTable, data: JumpTableData, loc: &Location) -> Result<()> {
         while self.function.jump_tables.next_key().index() <= jt.index() {
             self.function.create_jump_table(JumpTableData::new());
@@ -241,7 +238,7 @@ impl<'a> Context<'a> {
         }
     }
 
-    // Allocate a new EBB and add a mapping src_ebb -> Ebb.
+    // Allocate a new EBB.
     fn add_ebb(&mut self, ebb: Ebb, loc: &Location) -> Result<Ebb> {
         while self.function.dfg.num_ebbs() <= ebb.index() {
             self.function.dfg.make_ebb();
@@ -446,7 +443,6 @@ impl<'a> Parser<'a> {
     }
 
     // Match and consume a value reference, direct or vtable.
-    // This does not convert from the source value numbering to our in-memory value numbering.
     fn match_value(&mut self, err_msg: &str) -> Result<Value> {
         if let Some(Token::Value(v)) = self.token() {
             self.consume();
@@ -1048,7 +1044,7 @@ impl<'a> Parser<'a> {
     // Parse a stack slot decl.
     //
     // stack-slot-decl ::= * StackSlot(ss) "=" stack-slot-kind Bytes {"," stack-slot-flag}
-    // stack-slot-kind ::= "local"
+    // stack-slot-kind ::= "explicit_slot"
     //                   | "spill_slot"
     //                   | "incoming_arg"
     //                   | "outgoing_arg"
@@ -1074,7 +1070,7 @@ impl<'a> Parser<'a> {
         // Take additional options.
         while self.optional(Token::Comma) {
             match self.match_any_identifier("expected stack slot flags")? {
-                "offset" => data.offset = self.match_imm32("expected byte offset")?,
+                "offset" => data.offset = Some(self.match_imm32("expected byte offset")?),
                 other => return err!(self.loc, "Unknown stack slot flag '{}'", other),
             }
         }
@@ -1427,7 +1423,7 @@ impl<'a> Parser<'a> {
     }
 
     // Parse parenthesized list of EBB parameters. Returns a vector of (u32, Type) pairs with the
-    // source value numbers of the defined values and the defined types.
+    // value numbers of the defined values and the defined types.
     //
     // ebb-params ::= * "(" ebb-param { "," ebb-param } ")"
     fn parse_ebb_params(&mut self, ctx: &mut Context, ebb: Ebb) -> Result<()> {
@@ -1476,7 +1472,7 @@ impl<'a> Parser<'a> {
         }
 
         let t = self.match_type("expected EBB argument type")?;
-        // Allocate the EBB argument and add the mapping.
+        // Allocate the EBB argument.
         ctx.function.dfg.append_ebb_param_for_parser(ebb, t, v);
         ctx.map.def_value(v, &v_location)?;
 
@@ -1632,6 +1628,11 @@ impl<'a> Parser<'a> {
         ctx: &mut Context,
         ebb: Ebb,
     ) -> Result<()> {
+        // Define the result values.
+        for val in &results {
+            ctx.map.def_value(*val, &self.loc)?;
+        }
+
         // Collect comments for the next instruction.
         self.start_gathering_comments();
 
@@ -1709,11 +1710,6 @@ impl<'a> Parser<'a> {
             }
         }
 
-        // Now map the source result values to the just created instruction results.
-        // Pass a reference to `ctx.values` instead of `ctx` itself since the `Values` iterator
-        // holds a reference to `ctx.function`.
-        self.add_values(&mut ctx.map, results.into_iter())?;
-
         if let Some(result_locations) = result_locations {
             for (&value, loc) in ctx.function.dfg.inst_results(inst).iter().zip(
                 result_locations,
@@ -1798,17 +1794,6 @@ impl<'a> Parser<'a> {
         Ok(ctrl_type)
     }
 
-    // Add mappings for a list of source values to their corresponding new values.
-    fn add_values<V>(&self, map: &mut SourceMap, new_results: V) -> Result<()>
-    where
-        V: Iterator<Item = Value>,
-    {
-        for val in new_results {
-            map.def_value(val, &self.loc)?;
-        }
-        Ok(())
-    }
-
     // Parse comma-separated value list into a VariableArgs struct.
     //
     // value_list ::= [ value { "," value } ]
@@ -1947,7 +1932,7 @@ impl<'a> Parser<'a> {
             }
             InstructionFormat::NullAry => InstructionData::NullAry { opcode },
             InstructionFormat::Jump => {
-                // Parse the destination EBB number. Don't translate source to local numbers yet.
+                // Parse the destination EBB number.
                 let ebb_num = self.match_ebb("expected jump destination EBB")?;
                 let args = self.parse_opt_value_list()?;
                 InstructionData::Jump {
diff --git a/lib/reader/src/testcommand.rs b/lib/reader/src/testcommand.rs
index ad856e9949..2b848a9348 100644
--- a/lib/reader/src/testcommand.rs
+++ b/lib/reader/src/testcommand.rs
@@ -9,7 +9,7 @@
 //!
 //! The options are either a single identifier flag, or setting values like `identifier=value`.
 //!
-//! The parser does not understand the test commands or which options are alid. It simply parses
+//! The parser does not understand the test commands or which options are valid. It simply parses
 //! the general format into a `TestCommand` data structure.
 
 use std::fmt::{self, Display, Formatter};
diff --git a/lib/reader/src/testfile.rs b/lib/reader/src/testfile.rs
index 20e03939a1..8ec5271a07 100644
--- a/lib/reader/src/testfile.rs
+++ b/lib/reader/src/testfile.rs
@@ -42,7 +42,7 @@ pub struct Details<'a> {
 
 /// A comment in a parsed function.
 ///
-/// The comment belongs to the immediately preceeding entity, whether that is an EBB header, and
+/// The comment belongs to the immediately preceding entity, whether that is an EBB header, and
 /// instruction, or one of the preamble declarations.
 ///
 /// Comments appearing inside the function but before the preamble, as well as comments appearing
diff --git a/lib/wasm/Cargo.toml b/lib/wasm/Cargo.toml
index 7954ebc209..3f35ee5c96 100644
--- a/lib/wasm/Cargo.toml
+++ b/lib/wasm/Cargo.toml
@@ -1,19 +1,20 @@
 [package]
 name = "cretonne-wasm"
-version = "0.1.0"
+version = "0.3.4"
 authors = ["The Cretonne Project Developers"]
 description = "Translator from WebAssembly to Cretonne IL"
 repository = "https://github.com/Cretonne/cretonne"
 license = "Apache-2.0"
 readme = "README.md"
+keywords = [ "webassembly", "wasm" ]
 
 [lib]
 name = "cton_wasm"
 
 [dependencies]
 wasmparser = "0.14.1"
-cretonne = { path = "../cretonne", version = "0.1.0", default_features = false }
-cretonne-frontend = { path = "../frontend", version = "0.1.0", default_features = false }
+cretonne = { path = "../cretonne", version = "0.3.4", default_features = false }
+cretonne-frontend = { path = "../frontend", version = "0.3.4", default_features = false }
 
 [dependencies.hashmap_core]
 version = "0.1.1"
diff --git a/lib/wasm/src/code_translator.rs b/lib/wasm/src/code_translator.rs
index c62e1fe76a..c9b8732989 100644
--- a/lib/wasm/src/code_translator.rs
+++ b/lib/wasm/src/code_translator.rs
@@ -25,9 +25,10 @@
 use cretonne::ir::{self, InstBuilder, MemFlags, JumpTableData};
 use cretonne::ir::types::*;
 use cretonne::ir::condcodes::{IntCC, FloatCC};
-use cton_frontend::FunctionBuilder;
+use cretonne::packed_option::ReservedValue;
+use cton_frontend::{FunctionBuilder, Variable};
 use wasmparser::{Operator, MemoryImmediate};
-use translation_utils::{f32_translation, f64_translation, type_to_type, num_return_values, Local};
+use translation_utils::{f32_translation, f64_translation, type_to_type, num_return_values};
 use translation_utils::{TableIndex, SignatureIndex, FunctionIndex, MemoryIndex};
 use state::{TranslationState, ControlStackFrame};
 use std::collections::{HashMap, hash_map};
@@ -38,29 +39,31 @@ use std::vec::Vec;
 /// Translates wasm operators into Cretonne IL instructions. Returns `true` if it inserted
 /// a return.
 pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
-    op: &Operator,
-    builder: &mut FunctionBuilder<Local>,
+    op: Operator,
+    builder: &mut FunctionBuilder<Variable>,
     state: &mut TranslationState,
     environ: &mut FE,
 ) {
-    if state.in_unreachable_code() {
+    if !state.reachable {
         return translate_unreachable_operator(op, builder, state);
     }
 
     // This big match treats all Wasm code operators.
-    match *op {
+    match op {
         /********************************** Locals ****************************************
          *  `get_local` and `set_local` are treated as non-SSA variables and will completely
-         *  diseappear in the Cretonne Code
+         *  disappear in the Cretonne Code
          ***********************************************************************************/
-        Operator::GetLocal { local_index } => state.push1(builder.use_var(Local(local_index))),
+        Operator::GetLocal { local_index } => {
+            state.push1(builder.use_var(Variable::with_u32(local_index)))
+        }
         Operator::SetLocal { local_index } => {
             let val = state.pop1();
-            builder.def_var(Local(local_index), val);
+            builder.def_var(Variable::with_u32(local_index), val);
         }
         Operator::TeeLocal { local_index } => {
             let val = state.peek1();
-            builder.def_var(Local(local_index), val);
+            builder.def_var(Variable::with_u32(local_index), val);
         }
         /********************************** Globals ****************************************
          *  `get_global` and `set_global` are handled by the environment.
@@ -90,7 +93,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             }
         }
         /********************************* Stack misc ***************************************
-         *  `drop`, `nop`,  `unreachable` and `select`.
+         *  `drop`, `nop`, `unreachable` and `select`.
          ***********************************************************************************/
         Operator::Drop => {
             state.pop1();
@@ -106,7 +109,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             // We use `trap user0` to indicate a user-generated trap.
             // We could make the trap code configurable if need be.
             builder.ins().trap(ir::TrapCode::User(0));
-            state.real_unreachable_stack_depth = 1;
+            state.reachable = false;
         }
         /***************************** Control flow blocks **********************************
          *  When starting a control flow block, we create a new `Ebb` that will hold the code
@@ -156,15 +159,24 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             // and push a new control frame with a new ebb for the code after the if/then/else
             // At the end of the then clause we jump to the destination
             let i = state.control_stack.len() - 1;
-            let (destination, return_count, branch_inst) = match state.control_stack[i] {
-                ControlStackFrame::If {
-                    destination,
-                    num_return_values,
-                    branch_inst,
-                    ..
-                } => (destination, num_return_values, branch_inst),
-                _ => panic!("should not happen"),
-            };
+            let (destination, return_count, branch_inst, ref mut reachable_from_top) =
+                match state.control_stack[i] {
+                    ControlStackFrame::If {
+                        destination,
+                        num_return_values,
+                        branch_inst,
+                        reachable_from_top,
+                        ..
+                    } => (
+                        destination,
+                        num_return_values,
+                        branch_inst,
+                        reachable_from_top,
+                    ),
+                    _ => panic!("should not happen"),
+                };
+            // The if has an else, so there's no branch to the end from the top.
+            *reachable_from_top = false;
             builder.ins().jump(destination, state.peekn(return_count));
             state.popn(return_count);
             // We change the target of the branch instruction
@@ -220,7 +232,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let (return_count, br_destination) = {
                 let frame = &mut state.control_stack[i];
                 // We signal that all the code that follows until the next End is unreachable
-                frame.set_reachable();
+                frame.set_branched_to_exit();
                 let return_count = if frame.is_loop() {
                     0
                 } else {
@@ -233,7 +245,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
                 state.peekn(return_count),
             );
             state.popn(return_count);
-            state.real_unreachable_stack_depth = 1 + relative_depth as usize;
+            state.reachable = false;
         }
         Operator::BrIf { relative_depth } => {
             let val = state.pop1();
@@ -242,7 +254,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
                 let frame = &mut state.control_stack[i];
                 // The values returned by the branch are still available for the reachable
                 // code that comes after it
-                frame.set_reachable();
+                frame.set_branched_to_exit();
                 let return_count = if frame.is_loop() {
                     0
                 } else {
@@ -256,7 +268,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
                 state.peekn(return_count),
             );
         }
-        Operator::BrTable { ref table } => {
+        Operator::BrTable { table } => {
             let (depths, default) = table.read_table();
             let mut min_depth = default;
             for depth in &depths {
@@ -273,31 +285,32 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
                     min_depth_frame.num_return_values()
                 }
             };
+            let val = state.pop1();
+            let mut data = JumpTableData::with_capacity(depths.len());
             if jump_args_count == 0 {
                 // No jump arguments
-                let val = state.pop1();
-                let mut data = JumpTableData::with_capacity(depths.len());
                 for depth in depths {
-                    let i = state.control_stack.len() - 1 - (depth as usize);
-                    let frame = &mut state.control_stack[i];
-                    let ebb = frame.br_destination();
+                    let ebb = {
+                        let i = state.control_stack.len() - 1 - (depth as usize);
+                        let frame = &mut state.control_stack[i];
+                        frame.set_branched_to_exit();
+                        frame.br_destination()
+                    };
                     data.push_entry(ebb);
-                    frame.set_reachable();
                 }
                 let jt = builder.create_jump_table(data);
                 builder.ins().br_table(val, jt);
-                let i = state.control_stack.len() - 1 - (default as usize);
-                let frame = &mut state.control_stack[i];
-                let ebb = frame.br_destination();
+                let ebb = {
+                    let i = state.control_stack.len() - 1 - (default as usize);
+                    let frame = &mut state.control_stack[i];
+                    frame.set_branched_to_exit();
+                    frame.br_destination()
+                };
                 builder.ins().jump(ebb, &[]);
-                state.real_unreachable_stack_depth = 1 + min_depth as usize;
-                frame.set_reachable();
             } else {
                 // Here we have jump arguments, but Cretonne's br_table doesn't support them
                 // We then proceed to split the edges going out of the br_table
-                let val = state.pop1();
                 let return_count = jump_args_count;
-                let mut data = JumpTableData::with_capacity(depths.len());
                 let mut dest_ebb_sequence = Vec::new();
                 let mut dest_ebb_map = HashMap::new();
                 for depth in depths {
@@ -313,29 +326,32 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
                 }
                 let jt = builder.create_jump_table(data);
                 builder.ins().br_table(val, jt);
-                let default_ebb = state.control_stack[state.control_stack.len() - 1 -
-                                                          (default as usize)]
-                    .br_destination();
+                let default_ebb = {
+                    let i = state.control_stack.len() - 1 - (default as usize);
+                    let frame = &mut state.control_stack[i];
+                    frame.set_branched_to_exit();
+                    frame.br_destination()
+                };
                 builder.ins().jump(default_ebb, state.peekn(return_count));
                 for (depth, dest_ebb) in dest_ebb_sequence {
                     builder.switch_to_block(dest_ebb);
                     builder.seal_block(dest_ebb);
-                    let i = state.control_stack.len() - 1 - depth;
                     let real_dest_ebb = {
+                        let i = state.control_stack.len() - 1 - depth;
                         let frame = &mut state.control_stack[i];
-                        frame.set_reachable();
+                        frame.set_branched_to_exit();
                         frame.br_destination()
                     };
                     builder.ins().jump(real_dest_ebb, state.peekn(return_count));
                 }
                 state.popn(return_count);
-                state.real_unreachable_stack_depth = 1 + min_depth as usize;
             }
+            state.reachable = false;
         }
         Operator::Return => {
             let (return_count, br_destination) = {
                 let frame = &mut state.control_stack[0];
-                frame.set_reachable();
+                frame.set_branched_to_exit();
                 let return_count = frame.num_return_values();
                 (return_count, frame.br_destination())
             };
@@ -348,7 +364,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
                 }
             }
             state.popn(return_count);
-            state.real_unreachable_stack_depth = 1;
+            state.reachable = false;
         }
         /************************************ Calls ****************************************
          * The call instructions pop off their arguments from the stack and append their
@@ -619,12 +635,35 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let val = state.pop1();
             state.push1(builder.ins().bitcast(I64, val));
         }
-        Operator::I32Extend8S |
-        Operator::I32Extend16S |
-        Operator::I64Extend8S |
-        Operator::I64Extend16S |
+        Operator::I32Extend8S => {
+            let val = state.pop1();
+            state.push1(builder.ins().ireduce(I8, val));
+            let val = state.pop1();
+            state.push1(builder.ins().sextend(I32, val));
+        }
+        Operator::I32Extend16S => {
+            let val = state.pop1();
+            state.push1(builder.ins().ireduce(I16, val));
+            let val = state.pop1();
+            state.push1(builder.ins().sextend(I32, val));
+        }
+        Operator::I64Extend8S => {
+            let val = state.pop1();
+            state.push1(builder.ins().ireduce(I8, val));
+            let val = state.pop1();
+            state.push1(builder.ins().sextend(I64, val));
+        }
+        Operator::I64Extend16S => {
+            let val = state.pop1();
+            state.push1(builder.ins().ireduce(I16, val));
+            let val = state.pop1();
+            state.push1(builder.ins().sextend(I64, val));
+        }
         Operator::I64Extend32S => {
-            panic!("proposed sign-extend operators not yet supported");
+            let val = state.pop1();
+            state.push1(builder.ins().ireduce(I32, val));
+            let val = state.pop1();
+            state.push1(builder.ins().sextend(I64, val));
         }
         /****************************** Binary Operators ************************************/
         Operator::I32Add | Operator::I64Add => {
@@ -897,80 +936,78 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
 /// are dropped but special ones like `End` or `Else` signal the potential end of the unreachable
 /// portion so the translation state muts be updated accordingly.
 fn translate_unreachable_operator(
-    op: &Operator,
-    builder: &mut FunctionBuilder<Local>,
+    op: Operator,
+    builder: &mut FunctionBuilder<Variable>,
     state: &mut TranslationState,
 ) {
-    let stack = &mut state.stack;
-    let control_stack = &mut state.control_stack;
-
-    // We don't translate because the code is unreachable
-    // Nevertheless we have to record a phantom stack for this code
-    // to know when the unreachable code ends
-    match *op {
-        Operator::If { ty: _ } |
+    match op {
+        Operator::If { ty: _ } => {
+            // Push a placeholder control stack entry. The if isn't reachable,
+            // so we don't have any branches anywhere.
+            state.push_if(ir::Inst::reserved_value(), ir::Ebb::reserved_value(), 0);
+        }
         Operator::Loop { ty: _ } |
         Operator::Block { ty: _ } => {
-            state.phantom_unreachable_stack_depth += 1;
-        }
-        Operator::End => {
-            if state.phantom_unreachable_stack_depth > 0 {
-                state.phantom_unreachable_stack_depth -= 1;
-            } else {
-                // This End corresponds to a real control stack frame
-                // We switch to the destination block but we don't insert
-                // a jump instruction since the code is still unreachable
-                let frame = control_stack.pop().unwrap();
-
-                builder.switch_to_block(frame.following_code());
-                builder.seal_block(frame.following_code());
-                match frame {
-                    // If it is a loop we also have to seal the body loop block
-                    ControlStackFrame::Loop { header, .. } => builder.seal_block(header),
-                    // If it is an if then the code after is reachable again
-                    ControlStackFrame::If { .. } => {
-                        state.real_unreachable_stack_depth = 1;
-                    }
-                    _ => {}
-                }
-                if frame.is_reachable() {
-                    state.real_unreachable_stack_depth = 1;
-                }
-                // Now we have to split off the stack the values not used
-                // by unreachable code that hasn't been translated
-                stack.truncate(frame.original_stack_size());
-                // And add the return values of the block but only if the next block is reachble
-                // (which corresponds to testing if the stack depth is 1)
-                if state.real_unreachable_stack_depth == 1 {
-                    stack.extend_from_slice(builder.ebb_params(frame.following_code()));
-                }
-                state.real_unreachable_stack_depth -= 1;
-            }
+            state.push_block(ir::Ebb::reserved_value(), 0);
         }
         Operator::Else => {
-            if state.phantom_unreachable_stack_depth > 0 {
-                // This is part of a phantom if-then-else, we do nothing
-            } else {
-                // Encountering an real else means that the code in the else
-                // clause is reachable again
-                let (branch_inst, original_stack_size) = match control_stack[control_stack.len() -
-                                                                                   1] {
-                    ControlStackFrame::If {
-                        branch_inst,
-                        original_stack_size,
-                        ..
-                    } => (branch_inst, original_stack_size),
-                    _ => panic!("should not happen"),
-                };
-                // We change the target of the branch instruction
-                let else_ebb = builder.create_ebb();
-                builder.change_jump_destination(branch_inst, else_ebb);
-                builder.seal_block(else_ebb);
-                builder.switch_to_block(else_ebb);
-                // Now we have to split off the stack the values not used
-                // by unreachable code that hasn't been translated
-                stack.truncate(original_stack_size);
-                state.real_unreachable_stack_depth = 0;
+            let i = state.control_stack.len() - 1;
+            match state.control_stack[i] {
+                ControlStackFrame::If {
+                    branch_inst,
+                    ref mut reachable_from_top,
+                    ..
+                } => {
+                    if *reachable_from_top {
+                        // We have a branch from the top of the if to the else.
+                        state.reachable = true;
+                        // And because there's an else, there can no longer be a
+                        // branch from the top directly to the end.
+                        *reachable_from_top = false;
+
+                        // We change the target of the branch instruction
+                        let else_ebb = builder.create_ebb();
+                        builder.change_jump_destination(branch_inst, else_ebb);
+                        builder.seal_block(else_ebb);
+                        builder.switch_to_block(else_ebb);
+                    }
+                }
+                _ => {}
+            }
+        }
+        Operator::End => {
+            let stack = &mut state.stack;
+            let control_stack = &mut state.control_stack;
+            let frame = control_stack.pop().unwrap();
+
+            // Now we have to split off the stack the values not used
+            // by unreachable code that hasn't been translated
+            stack.truncate(frame.original_stack_size());
+
+            let reachable_anyway = match frame {
+                // If it is a loop we also have to seal the body loop block
+                ControlStackFrame::Loop { header, .. } => {
+                    builder.seal_block(header);
+                    // And loops can't have branches to the end.
+                    false
+                }
+                ControlStackFrame::If { reachable_from_top, .. } => {
+                    // A reachable if without an else has a branch from the top
+                    // directly to the bottom.
+                    reachable_from_top
+                }
+                // All other control constructs are already handled.
+                _ => false,
+            };
+
+            if frame.exit_is_branched_to() || reachable_anyway {
+                builder.switch_to_block(frame.following_code());
+                builder.seal_block(frame.following_code());
+
+                // And add the return values of the block but only if the next block is reachable
+                // (which corresponds to testing if the stack depth is 1)
+                stack.extend_from_slice(builder.ebb_params(frame.following_code()));
+                state.reachable = true;
             }
         }
         _ => {
@@ -985,12 +1022,12 @@ fn get_heap_addr(
     addr32: ir::Value,
     offset: u32,
     addr_ty: ir::Type,
-    builder: &mut FunctionBuilder<Local>,
+    builder: &mut FunctionBuilder<Variable>,
 ) -> (ir::Value, i32) {
     use std::cmp::min;
 
     let guard_size: i64 = builder.func.heaps[heap].guard_size.into();
-    assert!(guard_size > 0, "Heap guard pages currently required");
+    debug_assert!(guard_size > 0, "Heap guard pages currently required");
 
     // Generate `heap_addr` instructions that are friendly to CSE by checking offsets that are
     // multiples of the guard size. Add one to make sure that we check the pointer itself is in
@@ -1021,7 +1058,7 @@ fn translate_load<FE: FuncEnvironment + ?Sized>(
     offset: u32,
     opcode: ir::Opcode,
     result_ty: ir::Type,
-    builder: &mut FunctionBuilder<Local>,
+    builder: &mut FunctionBuilder<Variable>,
     state: &mut TranslationState,
     environ: &mut FE,
 ) {
@@ -1044,7 +1081,7 @@ fn translate_load<FE: FuncEnvironment + ?Sized>(
 fn translate_store<FE: FuncEnvironment + ?Sized>(
     offset: u32,
     opcode: ir::Opcode,
-    builder: &mut FunctionBuilder<Local>,
+    builder: &mut FunctionBuilder<Variable>,
     state: &mut TranslationState,
     environ: &mut FE,
 ) {
diff --git a/lib/wasm/src/environ/spec.rs b/lib/wasm/src/environ/spec.rs
index 7872bdb0ea..73c1d3ff7f 100644
--- a/lib/wasm/src/environ/spec.rs
+++ b/lib/wasm/src/environ/spec.rs
@@ -148,7 +148,7 @@ pub trait FuncEnvironment {
     ) -> ir::Value;
 }
 
-/// An object satisfyng the `ModuleEnvironment` trait can be passed as argument to the
+/// An object satisfying the `ModuleEnvironment` trait can be passed as argument to the
 /// [`translate_module`](fn.translate_module.html) function. These methods should not be called
 /// by the user, they are only for `cretonne-wasm` internal use.
 pub trait ModuleEnvironment<'data> {
diff --git a/lib/wasm/src/func_translator.rs b/lib/wasm/src/func_translator.rs
index 5e8555c194..cef629c7dc 100644
--- a/lib/wasm/src/func_translator.rs
+++ b/lib/wasm/src/func_translator.rs
@@ -9,10 +9,9 @@ use cretonne::entity::EntityRef;
 use cretonne::ir::{self, InstBuilder, Ebb};
 use cretonne::result::{CtonResult, CtonError};
 use cretonne::timing;
-use cton_frontend::{ILBuilder, FunctionBuilder};
+use cton_frontend::{ILBuilder, FunctionBuilder, Variable};
 use environ::FuncEnvironment;
 use state::TranslationState;
-use translation_utils::Local;
 use wasmparser::{self, BinaryReader};
 
 /// WebAssembly to Cretonne IL function translator.
@@ -21,7 +20,7 @@ use wasmparser::{self, BinaryReader};
 /// by a `FuncEnvironment` object. A single translator instance can be reused to translate multiple
 /// functions which will reduce heap allocation traffic.
 pub struct FuncTranslator {
-    il_builder: ILBuilder<Local>,
+    il_builder: ILBuilder<Variable>,
     state: TranslationState,
 }
 
@@ -45,7 +44,7 @@ impl FuncTranslator {
     ///
     /// See [the WebAssembly specification][wasm].
     ///
-    /// [wasm]: http://webassembly.github.io/spec/binary/modules.html#code-section
+    /// [wasm]: https://webassembly.github.io/spec/binary/modules.html#code-section
     ///
     /// The Cretonne IR function `func` should be completely empty except for the `func.signature`
     /// and `func.name` fields. The signature may contain special-purpose arguments which are not
@@ -75,8 +74,8 @@ impl FuncTranslator {
             func.name,
             func.signature
         );
-        assert_eq!(func.dfg.num_ebbs(), 0, "Function must be empty");
-        assert_eq!(func.dfg.num_insts(), 0, "Function must be empty");
+        debug_assert_eq!(func.dfg.num_ebbs(), 0, "Function must be empty");
+        debug_assert_eq!(func.dfg.num_insts(), 0, "Function must be empty");
 
         // This clears the `ILBuilder`.
         let mut builder = FunctionBuilder::new(func, &mut self.il_builder);
@@ -107,7 +106,7 @@ impl FuncTranslator {
 /// Declare local variables for the signature parameters that correspond to WebAssembly locals.
 ///
 /// Return the number of local variables declared.
-fn declare_wasm_parameters(builder: &mut FunctionBuilder<Local>, entry_block: Ebb) -> usize {
+fn declare_wasm_parameters(builder: &mut FunctionBuilder<Variable>, entry_block: Ebb) -> usize {
     let sig_len = builder.func.signature.params.len();
     let mut next_local = 0;
     for i in 0..sig_len {
@@ -116,7 +115,7 @@ fn declare_wasm_parameters(builder: &mut FunctionBuilder<Local>, entry_block: Eb
         // signature parameters. For example, a `vmctx` pointer.
         if param_type.purpose == ir::ArgumentPurpose::Normal {
             // This is a normal WebAssembly signature parameter, so create a local for it.
-            let local = Local::new(next_local);
+            let local = Variable::new(next_local);
             builder.declare_var(local, param_type.value_type);
             next_local += 1;
 
@@ -133,7 +132,7 @@ fn declare_wasm_parameters(builder: &mut FunctionBuilder<Local>, entry_block: Eb
 /// Declare local variables, starting from `num_params`.
 fn parse_local_decls(
     reader: &mut BinaryReader,
-    builder: &mut FunctionBuilder<Local>,
+    builder: &mut FunctionBuilder<Variable>,
     num_params: usize,
 ) -> CtonResult {
     let mut next_local = num_params;
@@ -157,7 +156,7 @@ fn parse_local_decls(
 ///
 /// Fail of too many locals are declared in the function, or if the type is not valid for a local.
 fn declare_locals(
-    builder: &mut FunctionBuilder<Local>,
+    builder: &mut FunctionBuilder<Variable>,
     count: u32,
     wasm_type: wasmparser::Type,
     next_local: &mut usize,
@@ -174,7 +173,7 @@ fn declare_locals(
 
     let ty = builder.func.dfg.value_type(zeroval);
     for _ in 0..count {
-        let local = Local::new(*next_local);
+        let local = Variable::new(*next_local);
         builder.declare_var(local, ty);
         builder.def_var(local, zeroval);
         *next_local += 1;
@@ -187,18 +186,18 @@ fn declare_locals(
 /// arguments and locals are declared in the builder.
 fn parse_function_body<FE: FuncEnvironment + ?Sized>(
     mut reader: BinaryReader,
-    builder: &mut FunctionBuilder<Local>,
+    builder: &mut FunctionBuilder<Variable>,
     state: &mut TranslationState,
     environ: &mut FE,
 ) -> CtonResult {
     // The control stack is initialized with a single block representing the whole function.
-    assert_eq!(state.control_stack.len(), 1, "State not initialized");
+    debug_assert_eq!(state.control_stack.len(), 1, "State not initialized");
 
     // Keep going until the final `End` operator which pops the outermost block.
     while !state.control_stack.is_empty() {
         builder.set_srcloc(cur_srcloc(&reader));
         let op = reader.read_operator().map_err(|_| CtonError::InvalidInput)?;
-        translate_operator(&op, builder, state, environ);
+        translate_operator(op, builder, state, environ);
     }
 
     // The final `End` operator left us in the exit block where we need to manually add a return
@@ -206,9 +205,11 @@ fn parse_function_body<FE: FuncEnvironment + ?Sized>(
     //
     // If the exit block is unreachable, it may not have the correct arguments, so we would
     // generate a return instruction that doesn't match the signature.
-    debug_assert!(builder.is_pristine());
-    if !builder.is_unreachable() {
-        builder.ins().return_(&state.stack);
+    if state.reachable {
+        debug_assert!(builder.is_pristine());
+        if !builder.is_unreachable() {
+            builder.ins().return_(&state.stack);
+        }
     }
 
     // Discard any remaining values on the stack. Either we just returned them,
diff --git a/lib/wasm/src/lib.rs b/lib/wasm/src/lib.rs
index ae5c99e05e..4dc802d50f 100644
--- a/lib/wasm/src/lib.rs
+++ b/lib/wasm/src/lib.rs
@@ -9,7 +9,9 @@
 //!
 //! The main function of this module is [`translate_module`](fn.translate_module.html).
 
-#![deny(missing_docs)]
+#![deny(missing_docs,
+        trivial_numeric_casts,
+        unused_extern_crates)]
 
 #![cfg_attr(not(feature = "std"), no_std)]
 #![cfg_attr(not(feature = "std"), feature(alloc))]
diff --git a/lib/wasm/src/module_translator.rs b/lib/wasm/src/module_translator.rs
index 43a47b68af..bb74db4276 100644
--- a/lib/wasm/src/module_translator.rs
+++ b/lib/wasm/src/module_translator.rs
@@ -1,4 +1,4 @@
-//! Translation skeletton that traverses the whole WebAssembly module and call helper functions
+//! Translation skeleton that traverses the whole WebAssembly module and call helper functions
 //! to deal with each part of it.
 use cretonne::timing;
 use wasmparser::{ParserState, SectionCode, ParserInput, Parser, WasmDecoder, BinaryReaderError};
@@ -126,6 +126,10 @@ pub fn translate_module<'data>(
                     }
                 }
             }
+            ParserState::BeginSection { code: SectionCode::Custom { .. }, .. } => {
+                // Ignore unknown custom sections.
+                next_input = ParserInput::SkipSection;
+            }
             _ => return Err(String::from("wrong content in the preamble")),
         };
     }
diff --git a/lib/wasm/src/sections_translator.rs b/lib/wasm/src/sections_translator.rs
index a12a92e100..1bf7c9441c 100644
--- a/lib/wasm/src/sections_translator.rs
+++ b/lib/wasm/src/sections_translator.rs
@@ -117,7 +117,7 @@ pub fn parse_import_section<'data>(
     Ok(())
 }
 
-/// Retrieves the correspondances between functions and signatures from the function section
+/// Retrieves the correspondences between functions and signatures from the function section
 pub fn parse_function_section(
     parser: &mut Parser,
     environ: &mut ModuleEnvironment,
diff --git a/lib/wasm/src/state.rs b/lib/wasm/src/state.rs
index 40de47d7c6..5ced6d6056 100644
--- a/lib/wasm/src/state.rs
+++ b/lib/wasm/src/state.rs
@@ -26,20 +26,20 @@ pub enum ControlStackFrame {
         branch_inst: Inst,
         num_return_values: usize,
         original_stack_size: usize,
-        reachable: bool,
+        exit_is_branched_to: bool,
+        reachable_from_top: bool,
     },
     Block {
         destination: Ebb,
         num_return_values: usize,
         original_stack_size: usize,
-        reachable: bool,
+        exit_is_branched_to: bool,
     },
     Loop {
         destination: Ebb,
         header: Ebb,
         num_return_values: usize,
         original_stack_size: usize,
-        reachable: bool,
     },
 }
 
@@ -81,19 +81,21 @@ impl ControlStackFrame {
         }
     }
 
-    pub fn is_reachable(&self) -> bool {
+    pub fn exit_is_branched_to(&self) -> bool {
         match *self {
-            ControlStackFrame::If { reachable, .. } |
-            ControlStackFrame::Block { reachable, .. } |
-            ControlStackFrame::Loop { reachable, .. } => reachable,
+            ControlStackFrame::If { exit_is_branched_to, .. } |
+            ControlStackFrame::Block { exit_is_branched_to, .. } => exit_is_branched_to,
+            ControlStackFrame::Loop { .. } => false,
         }
     }
 
-    pub fn set_reachable(&mut self) {
+    pub fn set_branched_to_exit(&mut self) {
         match *self {
-            ControlStackFrame::If { ref mut reachable, .. } |
-            ControlStackFrame::Block { ref mut reachable, .. } |
-            ControlStackFrame::Loop { ref mut reachable, .. } => *reachable = true,
+            ControlStackFrame::If { ref mut exit_is_branched_to, .. } |
+            ControlStackFrame::Block { ref mut exit_is_branched_to, .. } => {
+                *exit_is_branched_to = true
+            }
+            ControlStackFrame::Loop { .. } => {}
         }
     }
 }
@@ -106,8 +108,7 @@ impl ControlStackFrame {
 pub struct TranslationState {
     pub stack: Vec<Value>,
     pub control_stack: Vec<ControlStackFrame>,
-    pub phantom_unreachable_stack_depth: usize,
-    pub real_unreachable_stack_depth: usize,
+    pub reachable: bool,
 
     // Map of global variables that have already been created by `FuncEnvironment::make_global`.
     globals: HashMap<GlobalIndex, GlobalValue>,
@@ -131,8 +132,7 @@ impl TranslationState {
         Self {
             stack: Vec::new(),
             control_stack: Vec::new(),
-            phantom_unreachable_stack_depth: 0,
-            real_unreachable_stack_depth: 0,
+            reachable: true,
             globals: HashMap::new(),
             heaps: HashMap::new(),
             signatures: HashMap::new(),
@@ -143,8 +143,7 @@ impl TranslationState {
     fn clear(&mut self) {
         debug_assert!(self.stack.is_empty());
         debug_assert!(self.control_stack.is_empty());
-        debug_assert_eq!(self.phantom_unreachable_stack_depth, 0);
-        debug_assert_eq!(self.real_unreachable_stack_depth, 0);
+        self.reachable = true;
         self.globals.clear();
         self.heaps.clear();
         self.signatures.clear();
@@ -220,7 +219,7 @@ impl TranslationState {
             destination: following_code,
             original_stack_size: self.stack.len(),
             num_return_values: num_result_types,
-            reachable: false,
+            exit_is_branched_to: false,
         });
     }
 
@@ -231,7 +230,6 @@ impl TranslationState {
             destination: following_code,
             original_stack_size: self.stack.len(),
             num_return_values: num_result_types,
-            reachable: false,
         });
     }
 
@@ -242,19 +240,10 @@ impl TranslationState {
             destination: following_code,
             original_stack_size: self.stack.len(),
             num_return_values: num_result_types,
-            reachable: false,
+            exit_is_branched_to: false,
+            reachable_from_top: self.reachable,
         });
     }
-
-    /// Test if the translation state is currently in unreachable code.
-    pub fn in_unreachable_code(&self) -> bool {
-        if self.real_unreachable_stack_depth > 0 {
-            true
-        } else {
-            debug_assert_eq!(self.phantom_unreachable_stack_depth, 0, "in reachable code");
-            false
-        }
-    }
 }
 
 /// Methods for handling entity references.
diff --git a/lib/wasm/src/translation_utils.rs b/lib/wasm/src/translation_utils.rs
index 4c70d708ed..44c03fb6dd 100644
--- a/lib/wasm/src/translation_utils.rs
+++ b/lib/wasm/src/translation_utils.rs
@@ -71,25 +71,6 @@ pub struct Memory {
     pub shared: bool,
 }
 
-/// Wrapper to a `get_local` and `set_local` index. They are WebAssembly's non-SSA variables.
-#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
-pub struct Local(pub u32);
-impl cretonne::entity::EntityRef for Local {
-    fn new(index: usize) -> Self {
-        debug_assert!(index < (u32::MAX as usize));
-        Local(index as u32)
-    }
-
-    fn index(self) -> usize {
-        self.0 as usize
-    }
-}
-impl Default for Local {
-    fn default() -> Self {
-        Local(u32::MAX)
-    }
-}
-
 /// Helper function translating wasmparser types to Cretonne types when possible.
 pub fn type_to_type(ty: &wasmparser::Type) -> Result<cretonne::ir::Type, ()> {
     match *ty {
diff --git a/rustc.rst b/rustc.rst
index 90ab7df8b4..20cebeb5d6 100644
--- a/rustc.rst
+++ b/rustc.rst
@@ -2,15 +2,63 @@
 Cretonne in Rustc
 =================
 
-The Rust compiler currently uses LLVM as its optimizer and code generator for both debug and
-release builds. The Cretonne project does not intend to compete with LLVM when it comes to
-optimizing release builds, but for debug builds where compilation speed is paramount, it makes
-sense to use Cretonne instead of LLVM.
+One goal for Cretonne is to be usable as a backend suitable for compiling Rust
+in debug mode. This mode doesn't require a lot of mid-level optimization, and it
+does want very fast compile times, and this matches up fairly well with what we
+expect Cretonne's initial strengths and weaknesses will be. Cretonne is being
+designed to take aggressive advantage of multiple cores, and to be very efficient
+with its use of memory.
 
-- Cretonne is designed to take advantage of multi-core CPUs, making parallel code generation quite
-  easy. This is harder with LLVM which was designed before multi-core CPUs where mainstream.
-- Cretonne is designed with compilation speed in mind. It makes engineering tradeoffs that favor
-  compilation speed over advanced optimizations.
+Another goal is a "pretty good" backend. The idea here is to do the work to get
+MIR-level inlining enabled, do some basic optimizations in Cretonne to capture the
+low-hanging fruit, and then use that along with good low-level optimizations to
+produce code which has a chance of being decently fast, with quite fast compile
+times. It obviously wouldn't compete with LLVM-based release builds in terms of
+optimization, but for some users, completely unoptimized code is too slow to test
+with, so a "pretty good" mode might be good enough.
 
-See `the discussion on the Rust internals forum
-<https://internals.rust-lang.org/t/possible-alternative-compiler-backend-cretonne>`_.
+There's plenty of work to do to achieve these goals, and if achieve them, we'll have
+enabled a Rust compiler written entirely in Rust, and enabled faster Rust compile
+times for important use cases.
+
+With all that said, there is a potential goal beyond that, which is to build a
+full optimizing release-capable backend. We can't predict how far Cretonne will go
+yet, but we do have some crazy ideas about what such a thing might look like,
+including:
+
+- Take advantage of Rust language properties in the optimizer. With LLVM, Rust is
+  able to use annotations to describe some of its aliasing guarantees, however the
+  annotations are awkward and limited. An optimizer that can represent the core
+  aliasing relationships that Rust provides directly has the potential to be very
+  powerful without the need for complex alias analysis logic. Unsafe blocks are an
+  interesting challenge, however in many simple cases, like Vec, it may be possible
+  to recover what the optimizer needs to know.
+
+- Design for superoptimization. Traditionally, compiler development teams have
+  spent many years of manual effort to identify patterns of code that can be
+  matched and replaced. Superoptimizers have been contributing some to this
+  effort, but in the future, we may be able to reverse roles.
+  Superoptimizers will do the bulk of the work, and humans will contribute
+  specialized optimizations that superoptimizers miss. This has the potential to
+  take a new optimizer from scratch to diminishing-returns territory with much
+  less manual effort.
+
+- Build an optimizer IR without the constraints of fast-debug-build compilation.
+  Cretonne's base IR is focused on Codegen, so a full-strength optimizer would either
+  use an IR layer on top of it (possibly using Cretonne's flexible EntityMap system),
+  or possibly an independent IR that could be translated to/from the base IR. Either
+  way, this overall architecture would keep the optimizer out of the way of the
+  non-optimizing build path, which keeps that path fast and simple, and gives the
+  optimizer more flexibility. If we then want to base the IR on a powerful data
+  structure like the Value State Dependence Graph (VSDG), we can do so with fewer
+  compromises.
+
+And, these ideas build on each other. For example, one of the challenges for
+dependence-graph-oriented IRs like the VSDG is getting good enough memory dependence
+information. But if we can get high-quality aliasing information directly from the
+Rust front-end, we should be in great shape. As another example, it's often harder
+for superoptimizers to reason about control flow than expression graphs. But,
+graph-oriented IRs like the VSDG represent control flow as control dependencies.
+It's difficult to say how powerful this combination will be until we try it, but
+if nothing else, it should be very convenient to express pattern-matching over a
+single graph that includes both data and control dependencies.