Merge remote-tracking branch 'origin/master' into no_std

2018-03-12 12:55:57 -07:00
parent 5ffdc51742 11eddafef8
commit 4a3077d638
138 changed files with 3795 additions and 1168 deletions
--- a/README.rst
+++ b/README.rst
@@ -3,7 +3,7 @@ Cretonne Code Generator
 =======================

 Cretonne is a low-level retargetable code generator. It translates a `target-independent
-intermediate language <http://cretonne.readthedocs.io/en/latest/langref.html>`_ into executable
+intermediate language <https://cretonne.readthedocs.io/en/latest/langref.html>`_ into executable
 machine code.

 *This is a work in progress that is not yet functional.*
@@ -100,7 +100,7 @@ Building the documentation
 --------------------------

 To build the Cretonne documentation, you need the `Sphinx documentation
-generator <http://www.sphinx-doc.org/>`_::
+generator <https://www.sphinx-doc.org/>`_::

    $ pip install sphinx sphinx-autobuild sphinx_rtd_theme
    $ cd cretonne/docs
--- a/check-rustfmt.sh
+++ b/check-rustfmt.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
-#
+set -euo pipefail
+
 # Usage: check-rustfmt.sh [--install]
 #
 # Check that the desired version of rustfmt is installed.
--- a/cranelift/Cargo.toml
+++ b/cranelift/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "cretonne-tools"
 authors = ["The Cretonne Project Developers"]
-version = "0.1.0"
+version = "0.3.4"
 description = "Binaries for testing the Cretonne library"
 license = "Apache-2.0"
 documentation = "https://cretonne.readthedocs.io/"
@@ -13,18 +13,18 @@ name = "cton-util"
 path = "src/cton-util.rs"

 [dependencies]
-cretonne = { path = "lib/cretonne", version = "0.1.0" }
-cretonne-reader = { path = "lib/reader", version = "0.1.0" }
-cretonne-frontend = { path = "lib/frontend", version = "0.1.0" }
-cretonne-wasm = { path = "lib/wasm", version = "0.1.0" }
-cretonne-native = { path = "lib/native", version = "0.1.0" }
+cretonne = { path = "lib/cretonne", version = "0.3.4" }
+cretonne-reader = { path = "lib/reader", version = "0.3.4" }
+cretonne-frontend = { path = "lib/frontend", version = "0.3.4" }
+cretonne-wasm = { path = "lib/wasm", version = "0.3.4" }
+cretonne-native = { path = "lib/native", version = "0.3.4" }
 filecheck = { path = "lib/filecheck" }
 docopt = "0.8.0"
 serde = "1.0.8"
 serde_derive = "1.0.8"
 num_cpus = "1.5.1"
 tempdir="0.3.5"
-term = "0.4.6"
+term = "0.5"

 [workspace]

--- a/cranelift/docs/Makefile
+++ b/cranelift/docs/Makefile
@@ -1,196 +1,24 @@
-# Makefile for Sphinx documentation
+# Minimal makefile for Sphinx documentation
 #

 # You can set these variables from the command line.
 SPHINXOPTS    =
 SPHINXBUILD   = sphinx-build
 SPHINXABUILD  = sphinx-autobuild
-PAPER         =
+SPHINXPROJ    = cretonne
+SOURCEDIR     = .
 BUILDDIR      = _build

-# User-friendly check for sphinx-build
-ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
-$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
-endif
-
-# Internal variables.
-PAPEROPT_a4     = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-# the i18n builder cannot share the environment and doctrees with the others
-I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-
-.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
-
+# Put it first so that "make" without argument is like "make help".
 help:
-	@echo "Please use \`make <target>' where <target> is one of"
-	@echo "  html       to make standalone HTML files"
-	@echo "  dirhtml    to make HTML files named index.html in directories"
-	@echo "  singlehtml to make a single large HTML file"
-	@echo "  pickle     to make pickle files"
-	@echo "  json       to make JSON files"
-	@echo "  htmlhelp   to make HTML files and a HTML help project"
-	@echo "  qthelp     to make HTML files and a qthelp project"
-	@echo "  applehelp  to make an Apple Help Book"
-	@echo "  devhelp    to make HTML files and a Devhelp project"
-	@echo "  epub       to make an epub"
-	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
-	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
-	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
-	@echo "  text       to make text files"
-	@echo "  man        to make manual pages"
-	@echo "  texinfo    to make Texinfo files"
-	@echo "  info       to make Texinfo files and run them through makeinfo"
-	@echo "  gettext    to make PO message catalogs"
-	@echo "  changes    to make an overview of all changed/added/deprecated items"
-	@echo "  xml        to make Docutils-native XML files"
-	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
-	@echo "  linkcheck  to check all external links for integrity"
-	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
-	@echo "  coverage   to run coverage check of the documentation (if enabled)"
-
-clean:
-	rm -rf $(BUILDDIR)/*
-
-html:
-	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
-	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

 autohtml: html
 	$(SPHINXABUILD) -z ../lib/cretonne/meta --ignore '.*' -b html -E $(ALLSPHINXOPTS) $(BUILDDIR)/html

-dirhtml:
-	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
-	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+.PHONY: help Makefile

-singlehtml:
-	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
-	@echo
-	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
-
-pickle:
-	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
-	@echo
-	@echo "Build finished; now you can process the pickle files."
-
-json:
-	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
-	@echo
-	@echo "Build finished; now you can process the JSON files."
-
-htmlhelp:
-	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
-	@echo
-	@echo "Build finished; now you can run HTML Help Workshop with the" \
-	      ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-qthelp:
-	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
-	@echo
-	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
-	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
-	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/cretonne.qhcp"
-	@echo "To view the help file:"
-	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/cretonne.qhc"
-
-applehelp:
-	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
-	@echo
-	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
-	@echo "N.B. You won't be able to view it unless you put it in" \
-	      "~/Library/Documentation/Help or install it in your application" \
-	      "bundle."
-
-devhelp:
-	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
-	@echo
-	@echo "Build finished."
-	@echo "To view the help file:"
-	@echo "# mkdir -p $$HOME/.local/share/devhelp/cretonne"
-	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/cretonne"
-	@echo "# devhelp"
-
-epub:
-	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
-	@echo
-	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
-
-latex:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo
-	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
-	@echo "Run \`make' in that directory to run these through (pdf)latex" \
-	      "(use \`make latexpdf' here to do that automatically)."
-
-latexpdf:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo "Running LaTeX files through pdflatex..."
-	$(MAKE) -C $(BUILDDIR)/latex all-pdf
-	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-latexpdfja:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo "Running LaTeX files through platex and dvipdfmx..."
-	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
-	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-text:
-	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
-	@echo
-	@echo "Build finished. The text files are in $(BUILDDIR)/text."
-
-man:
-	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
-	@echo
-	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
-
-texinfo:
-	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
-	@echo
-	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
-	@echo "Run \`make' in that directory to run these through makeinfo" \
-	      "(use \`make info' here to do that automatically)."
-
-info:
-	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
-	@echo "Running Texinfo files through makeinfo..."
-	make -C $(BUILDDIR)/texinfo info
-	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
-
-gettext:
-	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
-	@echo
-	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
-
-changes:
-	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
-	@echo
-	@echo "The overview file is in $(BUILDDIR)/changes."
-
-linkcheck:
-	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
-	@echo
-	@echo "Link check complete; look for any errors in the above output " \
-	      "or in $(BUILDDIR)/linkcheck/output.txt."
-
-doctest:
-	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
-	@echo "Testing of doctests in the sources finished, look at the " \
-	      "results in $(BUILDDIR)/doctest/output.txt."
-
-coverage:
-	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
-	@echo "Testing of coverage in the sources finished, look at the " \
-	      "results in $(BUILDDIR)/coverage/python.txt."
-
-xml:
-	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
-	@echo
-	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
-
-pseudoxml:
-	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
-	@echo
-	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/cranelift/docs/compare-llvm.rst
+++ b/cranelift/docs/compare-llvm.rst
@@ -2,9 +2,9 @@
 Cretonne compared to LLVM
 *************************

-`LLVM <http://llvm.org>`_ is a collection of compiler components implemented as
+`LLVM <https://llvm.org>`_ is a collection of compiler components implemented as
 a set of C++ libraries. It can be used to build both JIT compilers and static
-compilers like `Clang <http://clang.llvm.org>`_, and it is deservedly very
+compilers like `Clang <https://clang.llvm.org>`_, and it is deservedly very
 popular. `Chris Lattner's chapter about LLVM
 <http://www.aosabook.org/en/llvm.html>`_ in the `Architecture of Open Source
 Applications <http://aosabook.org/en/index.html>`_ book gives an excellent
@@ -40,7 +40,7 @@ Intermediate representations
 LLVM uses multiple intermediate representations as it translates a program to
 binary machine code:

-`LLVM IR <http://llvm.org/docs/LangRef.html>`_
+`LLVM IR <https://llvm.org/docs/LangRef.html>`_
    This is the primary intermediate language which has textual, binary, and
    in-memory representations. It serves two main purposes:

@@ -49,7 +49,7 @@ binary machine code:
    - Intermediate representation for common mid-level optimizations. A large
      library of code analysis and transformation passes operate on LLVM IR.

-`SelectionDAG <http://llvm.org/docs/CodeGenerator.html#instruction-selection-section>`_
+`SelectionDAG <https://llvm.org/docs/CodeGenerator.html#instruction-selection-section>`_
    A graph-based representation of the code in a single basic block is used by
    the instruction selector. It has both ISA-agnostic and ISA-specific
    opcodes. These main passes are run on the SelectionDAG representation:
@@ -65,7 +65,7 @@ binary machine code:
    The SelectionDAG representation automatically eliminates common
    subexpressions and dead code.

-`MachineInstr <http://llvm.org/docs/CodeGenerator.html#machine-code-representation>`_
+`MachineInstr <https://llvm.org/docs/CodeGenerator.html#machine-code-representation>`_
    A linear representation of ISA-specific instructions that initially is in
    SSA form, but it can also represent non-SSA form during and after register
    allocation. Many low-level optimizations run on MI code. The most important
@@ -74,7 +74,7 @@ binary machine code:
    - Scheduling.
    - Register allocation.

-`MC <http://llvm.org/docs/CodeGenerator.html#the-mc-layer>`_
+`MC <https://llvm.org/docs/CodeGenerator.html#the-mc-layer>`_
    MC serves as the output abstraction layer and is the basis for LLVM's
    integrated assembler. It is used for:

@@ -126,7 +126,7 @@ condition is false. The Cretonne representation is closer to how machine code
 works; LLVM's representation is more abstract.

 LLVM uses `phi instructions
-<http://llvm.org/docs/LangRef.html#phi-instruction>`_ in its SSA
+<https://llvm.org/docs/LangRef.html#phi-instruction>`_ in its SSA
 representation. Cretonne passes arguments to EBBs instead. The two
 representations are equivalent, but the EBB arguments are better suited to
 handle EBBs that may contain multiple branches to the same destination block
--- a/cranelift/docs/conf.py
+++ b/cranelift/docs/conf.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 #
 # cretonne documentation build configuration file, created by
-# sphinx-quickstart on Fri Jan  8 10:11:19 2016.
+# sphinx-quickstart on Fri Mar  2 12:49:24 2018.
 #
 # This file is execfile()d with the current directory set to its
 # containing dir.
@@ -12,14 +12,13 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.

-from __future__ import absolute_import
-import sys
-import os
-
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
+
+from __future__ import absolute_import
+import os
+import sys
 sys.path.insert(0, os.path.abspath('.'))

 # Also add the meta directory to sys.path so autodoc can find the Cretonne meta
@@ -28,6 +27,10 @@ sys.path.insert(0, os.path.abspath('../lib/cretonne/meta'))

 # -- General configuration ------------------------------------------------

+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
@@ -47,6 +50,7 @@ templates_path = ['_templates']

 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
+#
 # source_suffix = ['.rst', '.md']
 source_suffix = '.rst'

@@ -55,7 +59,7 @@ master_doc = 'index'

 # General information about the project.
 project = u'cretonne'
-copyright = u'2016, Cretonne Developers'
+copyright = u'2018, Cretonne Developers'
 author = u'Cretonne Developers'

 # The version info for the project you're documenting, acts as replacement for
@@ -76,7 +80,8 @@ language = None

 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build']
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']

 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
@@ -89,14 +94,46 @@ todo_include_todos = True

 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
+#
 html_theme = 'sphinx_rtd_theme'

+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+#
+# html_static_path = ['_static']
+
+
+# -- Options for HTMLHelp output ------------------------------------------
+
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'cretonnedoc'

+
 # -- Options for LaTeX output ---------------------------------------------

 latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
 }

 # Grouping the document tree into LaTeX files. List of tuples
--- a/cranelift/docs/example.cton
+++ b/cranelift/docs/example.cton
@@ -1,7 +1,7 @@
 test verifier

 function %average(i32, i32) -> f32 native {
-    ss1 = local 8            ; Stack slot for ``sum``.
+    ss1 = explicit_slot 8         ; Stack slot for ``sum``.

 ebb1(v1: i32, v2: i32):
    v3 = f64const 0x0.0
--- a/cranelift/docs/index.rst
+++ b/cranelift/docs/index.rst
@@ -12,6 +12,28 @@ Contents:
   regalloc
   compare-llvm

+Rust Crate Documentation
+========================
+
+`cretonne <https://docs.rs/cretonne/>`_
+    This is the core code generator crate. It takes Cretonne IR as input
+    and emits encoded machine instructions, along with symbolic relocations,
+    as output.
+
+`cretonne-wasm <https://docs.rs/cretonne-wasm/>`_
+    This crate translates WebAssembly code into Cretonne IR.
+
+`cretonne-frontend <https://docs.rs/cretonne-frontend/>`_
+    This crate provides utilities for translating code into Cretonne IR.
+
+`cretonne-native <https://docs.rs/cretonne-native/>`_
+    This crate performs auto-detection of the host, allowing Cretonne to
+    generate code optimized for the machine it's running on.
+
+`cretonne-reader <https://docs.rs/cretonne-reader/>`_
+    This crate translates from Cretonne IR's text format into Cretonne IR
+    in in-memory data structures.
+
 Indices and tables
 ==================

--- a/cranelift/docs/langref.rst
+++ b/cranelift/docs/langref.rst
@@ -37,7 +37,7 @@ The first line of a function definition provides the function *name* and
 the :term:`function signature` which declares the parameter and return types.
 Then follows the :term:`function preamble` which declares a number of entities
 that can be referenced inside the function. In the example above, the preamble
-declares a single local variable, ``ss1``.
+declares a single explicit stack slot, ``ss1``.

 After the preamble follows the :term:`function body` which consists of
 :term:`extended basic block`\s (EBBs), the first of which is the
@@ -440,7 +440,7 @@ Cretonne provides fully general :inst:`load` and :inst:`store` instructions for
 accessing memory, as well as :ref:`extending loads and truncating stores
 <extload-truncstore>`.

-If the memory at the given addresss is not :term:`addressable`, the behavior of
+If the memory at the given address is not :term:`addressable`, the behavior of
 these instructions is undefined. If it is addressable but not
 :term:`accessible`, they :term:`trap`.

@@ -471,8 +471,8 @@ the expected alignment. By default, misaligned loads and stores are allowed,
 but when the ``aligned`` flag is set, a misaligned memory access is allowed to
 :term:`trap`.

-Local variables
---------------
+Explicit Stack Slots
+--------------------

 One set of restricted memory operations access the current function's stack
 frame. The stack frame is divided into fixed-size stack slots that are
@@ -480,9 +480,9 @@ allocated in the :term:`function preamble`. Stack slots are not typed, they
 simply represent a contiguous sequence of :term:`accessible` bytes in the stack
 frame.

-.. inst:: SS = local Bytes, Flags...
+.. inst:: SS = explicit_slot Bytes, Flags...

-    Allocate a stack slot for a local variable in the preamble.
+    Allocate a stack slot in the preamble.

    If no alignment is specified, Cretonne will pick an appropriate alignment
    for the stack slot based on its size and access patterns.
@@ -559,7 +559,7 @@ runtime data structures.
    The address of GV can be computed by first loading a pointer from BaseGV
    and adding Offset to it.

-    It is assumed the BaseGV resides in readable memory with the apropriate
+    It is assumed the BaseGV resides in readable memory with the appropriate
    alignment for storing a pointer.

    Chains of ``deref`` global variables are possible, but cycles are not
@@ -782,7 +782,7 @@ Integer operations

    For example, see
    `llvm.sadd.with.overflow.*` and `llvm.ssub.with.overflow.*` in
-    `LLVM <http://llvm.org/docs/LangRef.html#arithmetic-with-overflow-intrinsics>`_.
+    `LLVM <https://llvm.org/docs/LangRef.html#arithmetic-with-overflow-intrinsics>`_.

 .. autoinst:: imul
 .. autoinst:: imul_imm
@@ -1135,7 +1135,7 @@ Glossary
        A list of declarations of entities that are used by the function body.
        Some of the entities that can be declared in the preamble are:

-        - Local variables.
+        - Stack slots.
        - Functions that are called directly.
        - Function signatures for indirect function calls.
        - Function flags and attributes that are not part of the signature.
@@ -1160,7 +1160,19 @@ Glossary

    stack slot
        A fixed size memory allocation in the current function's activation
-        frame. Also called a local variable.
+        frame. These include :term:`explicit stack slot`\s and
+        :term:`spill stack slot`\s.
+
+    explicit stack slot
+        A fixed size memory allocation in the current function's activation
+        frame. These differ from :term:`spill stack slot`\s in that they can
+        be created by frontends and they may have their addresses taken.
+
+    spill stack slot
+        A fixed size memory allocation in the current function's activation
+        frame. These differ from :term:`explicit stack slot`\s in that they are
+        only created during register allocation, and they may not have their
+        address taken.

    terminator instruction
        A control flow instruction that unconditionally directs the flow of
--- a/cranelift/docs/make.bat
+++ b/cranelift/docs/make.bat
@@ -1,62 +1,19 @@
@ECHO OFF

+pushd %~dp0
+
 REM Command file for Sphinx documentation

 if "%SPHINXBUILD%" == "" (
 	set SPHINXBUILD=sphinx-build
 )
+set SOURCEDIR=.
 set BUILDDIR=_build
-set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
-set I18NSPHINXOPTS=%SPHINXOPTS% .
-if NOT "%PAPER%" == "" (
-	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
-	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
-)
+set SPHINXPROJ=cretonne

 if "%1" == "" goto help

-if "%1" == "help" (
-	:help
-	echo.Please use `make ^<target^>` where ^<target^> is one of
-	echo.  html       to make standalone HTML files
-	echo.  dirhtml    to make HTML files named index.html in directories
-	echo.  singlehtml to make a single large HTML file
-	echo.  pickle     to make pickle files
-	echo.  json       to make JSON files
-	echo.  htmlhelp   to make HTML files and a HTML help project
-	echo.  qthelp     to make HTML files and a qthelp project
-	echo.  devhelp    to make HTML files and a Devhelp project
-	echo.  epub       to make an epub
-	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
-	echo.  text       to make text files
-	echo.  man        to make manual pages
-	echo.  texinfo    to make Texinfo files
-	echo.  gettext    to make PO message catalogs
-	echo.  changes    to make an overview over all changed/added/deprecated items
-	echo.  xml        to make Docutils-native XML files
-	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
-	echo.  linkcheck  to check all external links for integrity
-	echo.  doctest    to run all doctests embedded in the documentation if enabled
-	echo.  coverage   to run coverage check of the documentation if enabled
-	goto end
-)
-
-if "%1" == "clean" (
-	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
-	del /q /s %BUILDDIR%\*
-	goto end
-)
-
-
-REM Check if sphinx-build is available and fallback to Python version if any
-%SPHINXBUILD% 1>NUL 2>NUL
-if errorlevel 9009 goto sphinx_python
-goto sphinx_ok
-
-:sphinx_python
-
-set SPHINXBUILD=python -m sphinx.__init__
-%SPHINXBUILD% 2> nul
+%SPHINXBUILD% >NUL 2>NUL
 if errorlevel 9009 (
 	echo.
 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
@@ -65,199 +22,15 @@ if errorlevel 9009 (
 	echo.may add the Sphinx directory to PATH.
 	echo.
 	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
+	echo.https://sphinx-doc.org/
 	exit /b 1
 )

-:sphinx_ok
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end

-
-if "%1" == "html" (
-	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
-	goto end
-)
-
-if "%1" == "dirhtml" (
-	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
-	goto end
-)
-
-if "%1" == "singlehtml" (
-	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
-	goto end
-)
-
-if "%1" == "pickle" (
-	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can process the pickle files.
-	goto end
-)
-
-if "%1" == "json" (
-	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can process the JSON files.
-	goto end
-)
-
-if "%1" == "htmlhelp" (
-	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can run HTML Help Workshop with the ^
-.hhp project file in %BUILDDIR%/htmlhelp.
-	goto end
-)
-
-if "%1" == "qthelp" (
-	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can run "qcollectiongenerator" with the ^
-.qhcp project file in %BUILDDIR%/qthelp, like this:
-	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\cretonne.qhcp
-	echo.To view the help file:
-	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\cretonne.ghc
-	goto end
-)
-
-if "%1" == "devhelp" (
-	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished.
-	goto end
-)
-
-if "%1" == "epub" (
-	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The epub file is in %BUILDDIR%/epub.
-	goto end
-)
-
-if "%1" == "latex" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "latexpdf" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	cd %BUILDDIR%/latex
-	make all-pdf
-	cd %~dp0
-	echo.
-	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "latexpdfja" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	cd %BUILDDIR%/latex
-	make all-pdf-ja
-	cd %~dp0
-	echo.
-	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "text" (
-	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The text files are in %BUILDDIR%/text.
-	goto end
-)
-
-if "%1" == "man" (
-	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The manual pages are in %BUILDDIR%/man.
-	goto end
-)
-
-if "%1" == "texinfo" (
-	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
-	goto end
-)
-
-if "%1" == "gettext" (
-	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
-	goto end
-)
-
-if "%1" == "changes" (
-	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.The overview file is in %BUILDDIR%/changes.
-	goto end
-)
-
-if "%1" == "linkcheck" (
-	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Link check complete; look for any errors in the above output ^
-or in %BUILDDIR%/linkcheck/output.txt.
-	goto end
-)
-
-if "%1" == "doctest" (
-	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Testing of doctests in the sources finished, look at the ^
-results in %BUILDDIR%/doctest/output.txt.
-	goto end
-)
-
-if "%1" == "coverage" (
-	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Testing of coverage in the sources finished, look at the ^
-results in %BUILDDIR%/coverage/python.txt.
-	goto end
-)
-
-if "%1" == "xml" (
-	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The XML files are in %BUILDDIR%/xml.
-	goto end
-)
-
-if "%1" == "pseudoxml" (
-	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
-	goto end
-)
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%

 :end
+popd
--- a/cranelift/docs/testing.rst
+++ b/cranelift/docs/testing.rst
@@ -119,7 +119,7 @@ All types of tests allow shared Cretonne settings to be modified:
    option        : flag | setting "=" value

 The shared settings available for all target ISAs are defined in
-:file:`lib/cretonne/meta/cretonne/settings.py`.
+:file:`lib/cretonne/meta/base/settings.py`.

 The ``set`` lines apply settings cumulatively::

--- a/cranelift/filetests/isa/intel/binary32.cton
+++ b/cranelift/filetests/isa/intel/binary32.cton
@@ -552,3 +552,35 @@ ebb1:

    return
 }
+
+; Tests for i32/i8 conversion instructions.
+function %I32_I8() {
+ebb0:
+    [-,%rcx]            v1 = iconst.i32 1
+
+    [-,%rcx]            v11 = ireduce.i8 v1             ; bin:
+
+    ; asm: movsbl %cl, %esi
+    [-,%rsi]            v20 = sextend.i32 v11           ; bin: 0f be f1
+
+    ; asm: movzbl %cl, %esi
+    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b6 f1
+
+    trap user0                                          ; bin: 0f 0b
+}
+
+; Tests for i32/i16 conversion instructions.
+function %I32_I16() {
+ebb0:
+    [-,%rcx]            v1 = iconst.i32 1
+
+    [-,%rcx]            v11 = ireduce.i16 v1            ; bin:
+
+    ; asm: movswl %cx, %esi
+    [-,%rsi]            v20 = sextend.i32 v11           ; bin: 0f bf f1
+
+    ; asm: movzwl %cx, %esi
+    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b7 f1
+
+    trap user0                                          ; bin: 0f 0b
+}
--- a/cranelift/filetests/isa/intel/binary64-float.cton
+++ b/cranelift/filetests/isa/intel/binary64-float.cton
@@ -20,6 +20,7 @@ ebb0:
    [-,%rsi]            v1 = iconst.i32 2
    [-,%rax]            v2 = iconst.i64 11
    [-,%r14]            v3 = iconst.i64 12
+    [-,%r13]            v4 = iconst.i64 13

    ; asm: cvtsi2ssl %r11d, %xmm5
    [-,%xmm5]           v10 = fcvt_from_sint.f32 v0             ; bin: f3 41 0f 2a eb
@@ -173,6 +174,10 @@ ebb0:
    [-]                 store.f32 v100, v3                      ; bin: 66 41 0f 7e 2e
    ; asm: movd %xmm10, (%rax)
    [-]                 store.f32 v101, v2                      ; bin: 66 44 0f 7e 10
+    ; asm: movd %xmm5, (%r13)
+    [-]                 store.f32 v100, v4                      ; bin: 66 41 0f 7e 6d 00
+    ; asm: movd %xmm10, (%r13)
+    [-]                 store.f32 v101, v4                      ; bin: 66 45 0f 7e 55 00
    ; asm: movd %xmm5, 50(%r14)
    [-]                 store.f32 v100, v3+50                   ; bin: 66 41 0f 7e 6e 32
    ; asm: movd %xmm10, -50(%rax)
@@ -250,6 +255,7 @@ ebb0:
    [-,%rsi]            v1 = iconst.i32 2
    [-,%rax]            v2 = iconst.i64 11
    [-,%r14]            v3 = iconst.i64 12
+    [-,%r13]            v4 = iconst.i64 13

    ; asm: cvtsi2sdl %r11d, %xmm5
    [-,%xmm5]           v10 = fcvt_from_sint.f64 v0             ; bin: f2 41 0f 2a eb
@@ -403,6 +409,10 @@ ebb0:
    [-]                 store.f64 v100, v3                      ; bin: 66 41 0f d6 2e
    ; asm: movq %xmm10, (%rax)
    [-]                 store.f64 v101, v2                      ; bin: 66 44 0f d6 10
+    ; asm: movq %xmm5, (%r13)
+    [-]                 store.f64 v100, v4                      ; bin: 66 41 0f d6 6d 00
+    ; asm: movq %xmm10, (%r13)
+    [-]                 store.f64 v101, v4                      ; bin: 66 45 0f d6 55 00
    ; asm: movq %xmm5, 50(%r14)
    [-]                 store.f64 v100, v3+50                   ; bin: 66 41 0f d6 6e 32
    ; asm: movq %xmm10, -50(%rax)
--- a/cranelift/filetests/isa/intel/binary64.cton
+++ b/cranelift/filetests/isa/intel/binary64.cton
@@ -336,6 +336,28 @@ ebb0:
    ; asm: divq %r10
    [-,%rax,%rdx] v202, v203 = x86_udivmodx v190, v191, v3  ; bin: 49 f7 f2

+    ; double-length multiply instructions, 64 bit
+    [-,%rax]       v1001 = iconst.i64 1
+    [-,%r15]       v1002 = iconst.i64 2
+    ; asm: mulq %r15
+    [-,%rax,%rdx]  v1003, v1004 = x86_umulx v1001, v1002 ; bin: 49 f7 e7
+    ; asm: imulq %r15
+    [-,%rax,%rdx]  v1005, v1006 = x86_smulx v1001, v1002 ; bin: 49 f7 ef
+
+    ; double-length multiply instructions, 32 bit
+    [-,%rax]       v1011 = iconst.i32 1
+    [-,%r15]       v1012 = iconst.i32 2
+    [-,%rcx]       v1017 = iconst.i32 3
+    ; asm: mull %r15d
+    [-,%rax,%rdx]  v1013, v1014 = x86_umulx v1011, v1012    ; bin: 41 f7 e7
+    ; asm: imull %r15d
+    [-,%rax,%rdx]  v1015, v1016 = x86_smulx v1011, v1012    ; bin: 41 f7 ef
+
+    ; asm: mull %ecx
+    [-,%rax,%rdx]  v1018, v1019 = x86_umulx v1011, v1017    ; bin: f7 e1
+    ; asm: imull %ecx
+    [-,%rax,%rdx]  v1020, v1021 = x86_smulx v1011, v1017    ; bin: f7 e9
+
    ; Bit-counting instructions.

    ; asm: popcntq %rsi, %rcx
@@ -1062,6 +1084,118 @@ ebb2:
    jump ebb1                                   ; bin: eb fd
 }

+; Tests for i32/i8 conversion instructions.
+function %I32_I8() {
+ebb0:
+    [-,%rcx]            v1 = iconst.i32 1
+    [-,%rsi]            v2 = iconst.i32 2
+    [-,%r10]            v3 = iconst.i32 3
+
+    [-,%rcx]            v11 = ireduce.i8 v1             ; bin:
+    [-,%rsi]            v12 = ireduce.i8 v2             ; bin:
+    [-,%r10]            v13 = ireduce.i8 v3             ; bin:
+
+    ; asm: movsbl %cl, %esi
+    [-,%rsi]            v20 = sextend.i32 v11           ; bin: 0f be f1
+    ; asm: movsbl %sil, %r10d
+    [-,%r10]            v21 = sextend.i32 v12           ; bin: 44 0f be d6
+    ; asm: movsbl %r10b, %ecx
+    [-,%rcx]            v22 = sextend.i32 v13           ; bin: 41 0f be ca
+
+    ; asm: movzbl %cl, %esi
+    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b6 f1
+    ; asm: movzbl %sil, %r10d
+    [-,%r10]            v31 = uextend.i32 v12           ; bin: 44 0f b6 d6
+    ; asm: movzbl %r10b, %ecx
+    [-,%rcx]            v32 = uextend.i32 v13           ; bin: 41 0f b6 ca
+
+    trap user0                                          ; bin: 0f 0b
+}
+
+; Tests for i32/i16 conversion instructions.
+function %I32_I16() {
+ebb0:
+    [-,%rcx]            v1 = iconst.i32 1
+    [-,%rsi]            v2 = iconst.i32 2
+    [-,%r10]            v3 = iconst.i32 3
+
+    [-,%rcx]            v11 = ireduce.i16 v1            ; bin:
+    [-,%rsi]            v12 = ireduce.i16 v2            ; bin:
+    [-,%r10]            v13 = ireduce.i16 v3            ; bin:
+
+    ; asm: movswl %cx, %esi
+    [-,%rsi]            v20 = sextend.i32 v11           ; bin: 0f bf f1
+    ; asm: movswl %si, %r10d
+    [-,%r10]            v21 = sextend.i32 v12           ; bin: 44 0f bf d6
+    ; asm: movswl %r10w, %ecx
+    [-,%rcx]            v22 = sextend.i32 v13           ; bin: 41 0f bf ca
+
+    ; asm: movzwl %cx, %esi
+    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b7 f1
+    ; asm: movzwl %si, %r10d
+    [-,%r10]            v31 = uextend.i32 v12           ; bin: 44 0f b7 d6
+    ; asm: movzwl %r10w, %ecx
+    [-,%rcx]            v32 = uextend.i32 v13           ; bin: 41 0f b7 ca
+
+    trap user0                                          ; bin: 0f 0b
+}
+
+; Tests for i64/i8 conversion instructions.
+function %I64_I8() {
+ebb0:
+    [-,%rcx]            v1 = iconst.i64 1
+    [-,%rsi]            v2 = iconst.i64 2
+    [-,%r10]            v3 = iconst.i64 3
+
+    [-,%rcx]            v11 = ireduce.i8 v1             ; bin:
+    [-,%rsi]            v12 = ireduce.i8 v2             ; bin:
+    [-,%r10]            v13 = ireduce.i8 v3             ; bin:
+
+    ; asm: movsbq %cl, %rsi
+    [-,%rsi]            v20 = sextend.i64 v11           ; bin: 48 0f be f1
+    ; asm: movsbq %sil, %r10
+    [-,%r10]            v21 = sextend.i64 v12           ; bin: 4c 0f be d6
+    ; asm: movsbq %r10b, %rcx
+    [-,%rcx]            v22 = sextend.i64 v13           ; bin: 49 0f be ca
+
+    ; asm: movzbl %cl, %esi
+    [-,%rsi]            v30 = uextend.i64 v11           ; bin: 0f b6 f1
+    ; asm: movzbl %sil, %r10d
+    [-,%r10]            v31 = uextend.i64 v12           ; bin: 44 0f b6 d6
+    ; asm: movzbl %r10b, %ecx
+    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 41 0f b6 ca
+
+    trap user0                                          ; bin: 0f 0b
+}
+
+; Tests for i64/i16 conversion instructions.
+function %I64_I16() {
+ebb0:
+    [-,%rcx]            v1 = iconst.i64 1
+    [-,%rsi]            v2 = iconst.i64 2
+    [-,%r10]            v3 = iconst.i64 3
+
+    [-,%rcx]            v11 = ireduce.i16 v1            ; bin:
+    [-,%rsi]            v12 = ireduce.i16 v2            ; bin:
+    [-,%r10]            v13 = ireduce.i16 v3            ; bin:
+
+    ; asm: movswq %cx, %rsi
+    [-,%rsi]            v20 = sextend.i64 v11           ; bin: 48 0f bf f1
+    ; asm: movswq %si, %r10
+    [-,%r10]            v21 = sextend.i64 v12           ; bin: 4c 0f bf d6
+    ; asm: movswq %r10w, %rcx
+    [-,%rcx]            v22 = sextend.i64 v13           ; bin: 49 0f bf ca
+
+    ; asm: movzwl %cx, %esi
+    [-,%rsi]            v30 = uextend.i64 v11           ; bin: 0f b7 f1
+    ; asm: movzwl %si, %r10d
+    [-,%r10]            v31 = uextend.i64 v12           ; bin: 44 0f b7 d6
+    ; asm: movzwl %r10w, %ecx
+    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 41 0f b7 ca
+
+    trap user0                                          ; bin: 0f 0b
+}
+
 ; Tests for i64/i32 conversion instructions.
 function %I64_I32() {
 ebb0:
--- a/cranelift/filetests/isa/intel/legalize-mulhi.cton
+++ b/cranelift/filetests/isa/intel/legalize-mulhi.cton
@@ -0,0 +1,45 @@
+
+test compile
+set is_64bit
+isa intel baseline
+
+; umulhi/smulhi on 64 bit operands
+
+function %i64_umulhi(i64, i64) -> i64 {
+ebb0(v10: i64, v11: i64):
+  v12 = umulhi v10, v11
+  ; check: %rdi -> %rax
+  ; check: x86_umulx
+  ; check: %rdx -> %rax
+  return v12
+}
+
+function %i64_smulhi(i64, i64) -> i64 {
+ebb0(v20: i64, v21: i64):
+  v22 = smulhi v20, v21
+  ; check: %rdi -> %rax
+  ; check: x86_smulx
+  ; check: %rdx -> %rax
+  return v22
+}
+
+
+; umulhi/smulhi on 32 bit operands
+
+function %i32_umulhi(i32, i32) -> i32 {
+ebb0(v30: i32, v31: i32):
+  v32 = umulhi v30, v31
+  ; check: %rdi -> %rax
+  ; check: x86_umulx
+  ; check: %rdx -> %rax
+  return v32
+}
+
+function %i32_smulhi(i32, i32) -> i32 {
+ebb0(v40: i32, v41: i32):
+  v42 = smulhi v40, v41
+  ; check: %rdi -> %rax
+  ; check: x86_smulx
+  ; check: %rdx -> %rax
+  return v42
+}
--- a/cranelift/filetests/isa/intel/prologue-epilogue.cton
+++ b/cranelift/filetests/isa/intel/prologue-epilogue.cton
@@ -4,13 +4,13 @@ set is_compressed
 isa intel haswell

 function %foo() {
-    ss0 = local 168
+    ss0 = explicit_slot 168
 ebb0:
    return
 }

 ; check: function %foo(i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] native {
-; nextln:     ss0 = local 168, offset -224
+; nextln:     ss0 = explicit_slot 168, offset -224
 ; nextln:     ss1 = incoming_arg 56, offset -56
 ; check: ebb0(v0: i64 [%rbp], v1: i64 [%rbx], v2: i64 [%r12], v3: i64 [%r13], v4: i64 [%r14], v5: i64 [%r15]):
 ; nextln:     x86_push v0
--- a/cranelift/filetests/parser/tiny.cton
+++ b/cranelift/filetests/parser/tiny.cton
@@ -124,7 +124,7 @@ ebb0(v90: i32, v91: f32):
 ; Stack slot references
 function %stack() {
    ss10 = spill_slot 8
-    ss2 = local 4
+    ss2 = explicit_slot 4
    ss3 = incoming_arg 4, offset 8
    ss4 = outgoing_arg 4
    ss5 = emergency_slot 4
@@ -136,7 +136,7 @@ ebb0:
    stack_store v2, ss2
 }
 ; sameln: function %stack() native {
-; check:     ss2 = local 4
+; check:     ss2 = explicit_slot 4
 ; check:     ss3 = incoming_arg 4, offset 8
 ; check:     ss4 = outgoing_arg 4
 ; check:     ss5 = emergency_slot 4
--- a/cranelift/filetests/preopt/div_by_const_indirect.cton
+++ b/cranelift/filetests/preopt/div_by_const_indirect.cton
@@ -0,0 +1,60 @@
+
+test preopt
+isa intel baseline
+
+; Cases where the denominator is created by an iconst
+
+function %indir_udiv32(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 7
+    v2 = udiv v0, v1
+    ; check: iconst.i32 7
+    ; check: iconst.i32 0x2492_4925
+    ; check: umulhi v0, v3
+    ; check: isub v0, v4
+    ; check: ushr_imm v5, 1
+    ; check: iadd v6, v4
+    ; check: ushr_imm v7, 2
+    ; check: copy v8
+    return v2
+}
+
+function %indir_sdiv32(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 -17
+    v2 = sdiv v0, v1
+    ; check: iconst.i32 -17
+    ; check: iconst.i32 0xffff_ffff_8787_8787
+    ; check: smulhi v0, v3
+    ; check: sshr_imm v4, 3
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: copy v7
+    return v2
+}
+
+function %indir_udiv64(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = iconst.i64 1337
+    v2 = udiv v0, v1
+    ; check: iconst.i64 1337
+    ; check: iconst.i64 0xc411_9d95_2866_a139
+    ; check: umulhi v0, v3
+    ; check: ushr_imm v4, 10
+    ; check: copy v5
+    return v2
+}
+
+function %indir_sdiv64(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = iconst.i64 -90210
+    v2 = sdiv v0, v1
+    ; check: iconst.i64 0xffff_ffff_fffe_9f9e
+    ; check: iconst.i64 0xd181_4ee8_939c_b8bb
+    ; check: smulhi v0, v3
+    ; check: sshr_imm v4, 14
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: copy v7
+    return v2
+}
--- a/cranelift/filetests/preopt/div_by_const_non_power_of_2.cton
+++ b/cranelift/filetests/preopt/div_by_const_non_power_of_2.cton
@@ -0,0 +1,267 @@
+
+test preopt
+isa intel baseline
+
+; -------- U32 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_udiv32_p7(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 7
+    ; check: iconst.i32 0x2492_4925
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 2
+    ; check: copy v7
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_udiv32_p125(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 125
+    ; check: iconst.i32 0x1062_4dd3
+    ; check: umulhi v0, v2
+    ; check: ushr_imm v3, 3
+    ; check: copy v4
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_udiv32_p641(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 641
+    ; check: iconst.i32 0x0066_3d81
+    ; check: umulhi v0, v2
+    ; check: copy v3
+    return v1
+}
+
+
+; -------- S32 --------
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_sdiv32_n6(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -6
+    ; check: iconst.i32 0xffff_ffff_d555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: iadd v3, v4
+    ; check: copy v5
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv32_n5(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -5
+    ; check: iconst.i32 0xffff_ffff_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 31
+    ; check: iadd v4, v5
+    ; check: copy v6
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_sdiv32_n3(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -3
+    ; check: iconst.i32 0x5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: copy v7
+    return v1
+}
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_sdiv32_p6(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 6
+    ; check: iconst.i32 0x2aaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: iadd v3, v4
+    ; check: copy v5
+    return v1
+}
+
+; case d > 0 && M < 0 (mull, add, shift, add-sign-bit)
+function %t_sdiv32_p7(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 7
+    ; check: iconst.i32 0xffff_ffff_9249_2493
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 2
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: copy v7
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv32_p625(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 625
+    ; check: iconst.i32 0x68db_8bad
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 8
+    ; check: ushr_imm v4, 31
+    ; check: iadd v4, v5
+    ; check: copy v6
+    return v1
+}
+
+
+; -------- U64 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_udiv64_p7(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 7
+    ; check: iconst.i64 0x2492_4924_9249_2493
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 2
+    ; check: copy v7
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_udiv64_p9(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 9
+    ; check: iconst.i64 0xe38e_38e3_8e38_e38f
+    ; check: umulhi v0, v2
+    ; check: ushr_imm v3, 3
+    ; check: copy v4
+    return v1
+}
+
+; complex case (mul, sub, shift, add, shift)
+function %t_udiv64_p125(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 125
+    ; check: iconst.i64 0x0624_dd2f_1a9f_be77
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 6
+    ; check: copy v7
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_udiv64_p274177(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 274177
+    ; check: iconst.i64 0x3d30_f19c_d101
+    ; check: umulhi v0, v2
+    ; check: copy v3
+    return v1
+}
+
+
+; -------- S64 --------
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv64_n625(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -625
+    ; check: iconst.i64 0xcb92_3a29_c779_a6b5
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: copy v6
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_sdiv64_n6(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -6
+    ; check: iconst.i64 0xd555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: iadd v3, v4
+    ; check: copy v5
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_sdiv64_n5(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -5
+    ; check: iconst.i64 0x9999_9999_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: copy v6
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_sdiv64_n3(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -3
+    ; check: iconst.i64 0x5555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: copy v7
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_sdiv64_p6(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 6
+    ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: iadd v3, v4
+    ; check: copy v5
+    return v1
+}
+
+; case d > 0 && M < 0 (mul, add, shift, add-sign-bit)
+function %t_sdiv64_p15(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 15
+    ; check: iconst.i64 0x8888_8888_8888_8889
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 3
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: copy v7
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv64_p625(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 625
+    ; check: iconst.i64 0x346d_c5d6_3886_594b
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: copy v6
+    return v1
+}
--- a/cranelift/filetests/preopt/div_by_const_power_of_2.cton
+++ b/cranelift/filetests/preopt/div_by_const_power_of_2.cton
@@ -0,0 +1,293 @@
+
+test preopt
+isa intel baseline
+
+; -------- U32 --------
+
+; ignored
+function %t_udiv32_p0(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 0
+    ; check: udiv_imm v0, 0
+    return v1
+}
+
+; converted to a copy
+function %t_udiv32_p1(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 1
+    ; check: copy v0
+    return v1
+}
+
+; shift
+function %t_udiv32_p2(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 2
+    ; check: ushr_imm v0, 1
+    return v1
+}
+
+; shift
+function %t_udiv32_p2p31(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = udiv_imm v0, 0x8000_0000
+    ; check: ushr_imm v0, 31
+    return v1
+}
+
+
+; -------- U64 --------
+
+; ignored
+function %t_udiv64_p0(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 0
+    ; check: udiv_imm v0, 0
+    return v1
+}
+
+; converted to a copy
+function %t_udiv64_p1(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 1
+    ; check: copy v0
+    return v1
+}
+
+; shift
+function %t_udiv64_p2(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 2
+    ; check: ushr_imm v0, 1
+    return v1
+}
+
+; shift
+function %t_udiv64_p2p63(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = udiv_imm v0, 0x8000_0000_0000_0000
+    ; check: ushr_imm v0, 63
+    return v1
+}
+
+
+; -------- S32 --------
+
+; ignored
+function %t_sdiv32_p0(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 0
+    ; check: sdiv_imm v0, 0
+    return v1
+}
+
+; converted to a copy
+function %t_sdiv32_p1(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 1
+    ; check: copy v0
+    return v1
+}
+
+; ignored
+function %t_sdiv32_n1(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -1
+    ; check: sdiv_imm v0, -1
+    return v1
+}
+
+; shift
+function %t_sdiv32_p2(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: copy v4
+    return v1
+}
+
+; shift
+function %t_sdiv32_n2(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: irsub_imm v4, 0
+    return v1
+}
+
+; shift
+function %t_sdiv32_p4(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 4
+    ; check: v2 = sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 2
+    ; check: copy v5
+
+    return v1
+}
+
+; shift
+function %t_sdiv32_n4(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 2
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; shift
+function %t_sdiv32_p2p30(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, 0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 30
+    ; check: copy v5
+    return v1
+}
+
+; shift
+function %t_sdiv32_n2p30(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 30
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000) isn't
+; representable.
+function %t_sdiv32_n2p31(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = sdiv_imm v0, -0x8000_0000
+    ; check: sshr_imm v0, 30
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 31
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+
+; -------- S64 --------
+
+; ignored
+function %t_sdiv64_p0(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 0
+    ; check: sdiv_imm v0, 0
+    return v1
+}
+
+; converted to a copy
+function %t_sdiv64_p1(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 1
+    ; check: copy v0
+    return v1
+}
+
+; ignored
+function %t_sdiv64_n1(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -1
+    ; check: sdiv_imm v0, -1
+    return v1
+}
+
+; shift
+function %t_sdiv64_p2(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: copy v4
+    return v1
+}
+
+; shift
+function %t_sdiv64_n2(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: irsub_imm v4, 0
+    return v1
+}
+
+; shift
+function %t_sdiv64_p4(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 2
+    ; check: copy v5
+    return v1
+}
+
+; shift
+function %t_sdiv64_n4(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 2
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; shift
+function %t_sdiv64_p2p62(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, 0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 62
+    ; check: copy v5
+    return v1
+}
+
+; shift
+function %t_sdiv64_n2p62(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 62
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't
+; representable.
+function %t_sdiv64_n2p63(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = sdiv_imm v0, -0x8000_0000_0000_0000
+    ; check: sshr_imm v0, 62
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 63
+    ; check: irsub_imm v5, 0
+    return v1
+}
--- a/cranelift/filetests/preopt/rem_by_const_non_power_of_2.cton
+++ b/cranelift/filetests/preopt/rem_by_const_non_power_of_2.cton
@@ -0,0 +1,286 @@
+
+test preopt
+isa intel baseline
+
+; -------- U32 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_urem32_p7(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 7
+    ; check: iconst.i32 0x2492_4925
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 2
+    ; check: imul_imm v7, 7
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_urem32_p125(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 125
+    ; check: iconst.i32 0x1062_4dd3
+    ; check: umulhi v0, v2
+    ; check: ushr_imm v3, 3
+    ; check: imul_imm v4, 125
+    ; check: isub v0, v5
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_urem32_p641(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 641
+    ; check: iconst.i32 0x0066_3d81
+    ; check: umulhi v0, v2
+    ; check: imul_imm v3, 641
+    ; check: isub v0, v4
+    return v1
+}
+
+
+; -------- S32 --------
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_srem32_n6(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -6
+    ; check: iconst.i32 0xffff_ffff_d555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, -6
+    ; check: isub v0, v6
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem32_n5(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -5
+    ; check: iconst.i32 0xffff_ffff_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 31
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, -5
+    ; check: isub v0, v7
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_srem32_n3(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -3
+    ; check: iconst.i32 0x5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, -3
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_srem32_p6(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 6
+    ; check: iconst.i32 0x2aaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, 6
+    ; check: isub v0, v6
+    return v1
+}
+
+; case d > 0 && M < 0 (mull, add, shift, add-sign-bit)
+function %t_srem32_p7(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 7
+    ; check: iconst.i32 0xffff_ffff_9249_2493
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 2
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, 7
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem32_p625(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 625
+    ; check: iconst.i32 0x68db_8bad
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 8
+    ; check: ushr_imm v4, 31
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, 625
+    ; check: isub v0, v7
+    return v1
+}
+
+
+; -------- U64 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_urem64_p7(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 7
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 2
+    ; check: imul_imm v7, 7
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_urem64_p9(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 9
+    ; check: iconst.i64 0xe38e_38e3_8e38_e38f
+    ; check: umulhi v0, v2
+    ; check: ushr_imm v3, 3
+    ; check: imul_imm v4, 9
+    ; check: isub v0, v5
+    return v1
+}
+
+; complex case (mul, sub, shift, add, shift)
+function %t_urem64_p125(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 125
+    ; check: iconst.i64 0x0624_dd2f_1a9f_be77
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 6
+    ; check: imul_imm v7, 125
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_urem64_p274177(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 274177
+    ; check: iconst.i64 0x3d30_f19c_d101
+    ; check: umulhi v0, v2
+    ; check: imul_imm v3, 0x0004_2f01
+    ; check: isub v0, v4
+    return v1
+}
+
+
+; -------- S64 --------
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem64_n625(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -625
+    ; check: iconst.i64 0xcb92_3a29_c779_a6b5
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, -625
+    ; check: isub v0, v7
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_srem64_n6(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -6
+    ; check: iconst.i64 0xd555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, -6
+    ; check: isub v0, v6
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_srem64_n5(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -5
+    ; check: iconst.i64 0x9999_9999_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, -5
+    ; check: isub v0, v7
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_srem64_n3(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -3
+    ; check: iconst.i64 0x5555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, -3
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_srem64_p6(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 6
+    ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, 6
+    ; check: isub v0, v6
+    return v1
+}
+
+; case d > 0 && M < 0 (mul, add, shift, add-sign-bit)
+function %t_srem64_p15(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 15
+    ; check: iconst.i64 0x8888_8888_8888_8889
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 3
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, 15
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem64_p625(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 625
+    ; check: iconst.i64 0x346d_c5d6_3886_594b
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, 625
+    ; check: isub v0, v7
+    return v1
+}
--- a/cranelift/filetests/preopt/rem_by_const_power_of_2.cton
+++ b/cranelift/filetests/preopt/rem_by_const_power_of_2.cton
@@ -0,0 +1,292 @@
+
+test preopt
+isa intel baseline
+
+; -------- U32 --------
+
+; ignored
+function %t_urem32_p0(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 0
+    ; check: urem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_urem32_p1(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 1
+    ; check: iconst.i32 0
+    return v1
+}
+
+; shift
+function %t_urem32_p2(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 2
+    ; check: band_imm v0, 1
+    return v1
+}
+
+; shift
+function %t_urem32_p2p31(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = urem_imm v0, 0x8000_0000
+    ; check: band_imm v0, 0x7fff_ffff
+    return v1
+}
+
+
+; -------- U64 --------
+
+; ignored
+function %t_urem64_p0(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 0
+    ; check: urem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_urem64_p1(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 1
+    ; check: iconst.i64 0
+    return v1
+}
+
+; shift
+function %t_urem64_p2(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 2
+    ; check: band_imm v0, 1
+   return v1
+}
+
+; shift
+function %t_urem64_p2p63(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = urem_imm v0, 0x8000_0000_0000_0000
+    ; check: band_imm v0, 0x7fff_ffff_ffff_ffff
+    return v1
+}
+
+
+; -------- S32 --------
+
+; ignored
+function %t_srem32_n1(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -1
+    ; check: srem_imm v0, -1
+    return v1
+}
+
+; ignored
+function %t_srem32_p0(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 0
+    ; check: srem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_srem32_p1(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 1
+    ; check: iconst.i32 0
+    return v1
+}
+
+; shift
+function %t_srem32_p2(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem32_n2(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem32_p4(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem32_n4(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem32_p2p30(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, 0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xffff_ffff_c000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem32_n2p30(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xffff_ffff_c000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000) isn't
+; representable.
+function %t_srem32_n2p31(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = srem_imm v0, -0x8000_0000
+    ; check: sshr_imm v0, 30
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xffff_ffff_8000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+
+; -------- S64 --------
+
+; ignored
+function %t_srem64_n1(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -1
+    ; check: srem_imm v0, -1
+    return v1
+}
+
+; ignored
+function %t_srem64_p0(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 0
+    ; check: srem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_srem64_p1(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 1
+    ; check: iconst.i64 0
+    return v1
+}
+
+; shift
+function %t_srem64_p2(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem64_n2(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem64_p4(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem64_n4(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem64_p2p62(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, 0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xc000_0000_0000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem64_n2p62(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xc000_0000_0000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't
+; representable.
+function %t_srem64_n2p63(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = srem_imm v0, -0x8000_0000_0000_0000
+    ; check: sshr_imm v0, 62
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0x8000_0000_0000_0000
+    ; check: isub v0, v5
+   return v1
+}
--- a/cranelift/format-all.sh
+++ b/cranelift/format-all.sh
@@ -1,10 +1,8 @@
 #!/bin/bash
+set -euo pipefail

 # Format all sources using rustfmt.

-# Exit immediately on errors.
-set -e
-
 cd $(dirname "$0")

 # Make sure we can find rustfmt.
--- a/cranelift/publish-all.sh
+++ b/cranelift/publish-all.sh
@@ -1,11 +1,11 @@
 #!/bin/bash
-set -e
+set -euo pipefail
 cd $(dirname "$0")
-topdir=$(pwd)
+topdir="$(pwd)"

 # All the cretonne-* crates have the same version number
 # The filecheck crate version is managed independently.
-version="0.1.0"
+version="0.3.4"

 # Update all of the Cargo.toml files.
 #
@@ -16,9 +16,9 @@ for crate in . lib/*; do
        continue
    fi
    # Update the version number of this crate to $version.
-    sed -i "" -e "s/^version = .*/version = \"$version\"/" $crate/Cargo.toml
+    sed -i.bk -e "s/^version = .*/version = \"$version\"/" "$crate/Cargo.toml"
    # Update the required version number of any cretonne* dependencies.
-    sed -i "" -e "/^cretonne/s/version = \"[^\"]*\"/version = \"$version\"/" $crate/Cargo.toml
+    sed -i.bk -e "/^cretonne/s/version = \"[^\"]*\"/version = \"$version\"/" "$crate/Cargo.toml"
 done

 # Update our local Cargo.lock (not checked in).
@@ -29,6 +29,10 @@ cargo update
 #
 # Note that libraries need to be published in topological order.

+echo git commit -a -m "\"Bump version to $version"\"
+echo git push
 for crate in filecheck cretonne frontend native reader wasm; do
-    echo cargo publish --manifest-path lib/$crate/Cargo.toml
+    echo cargo publish --manifest-path "lib/$crate/Cargo.toml"
 done
+echo
+echo Then, go to https://github.com/Cretonne/cretonne/releases/ and define a new release.
--- a/cranelift/src/cat.rs
+++ b/cranelift/src/cat.rs
@@ -13,7 +13,7 @@ use filetest::subtest::{self, SubTest, Context, Result as STResult};
 pub fn run(files: Vec<String>) -> CommandResult {
    for (i, f) in files.into_iter().enumerate() {
        if i != 0 {
-            println!("");
+            println!();
        }
        cat_one(f)?
    }
@@ -30,7 +30,7 @@ fn cat_one(filename: String) -> CommandResult {

    for (idx, func) in items.into_iter().enumerate() {
        if idx != 0 {
-            println!("");
+            println!();
        }
        print!("{}", func);
    }
--- a/cranelift/src/compile.rs
+++ b/cranelift/src/compile.rs
@@ -6,9 +6,45 @@ use cton_reader::parse_test;
 use std::path::PathBuf;
 use cretonne::Context;
 use cretonne::settings::FlagsOrIsa;
+use cretonne::{binemit, ir};
 use std::path::Path;
 use utils::{pretty_error, read_to_string, parse_sets_and_isa};

+struct PrintRelocs {
+    flag_print: bool,
+}
+
+impl binemit::RelocSink for PrintRelocs {
+    fn reloc_ebb(
+        &mut self,
+        where_: binemit::CodeOffset,
+        r: binemit::Reloc,
+        offset: binemit::CodeOffset,
+    ) {
+        if self.flag_print {
+            println!("reloc_ebb: {} {} at {}", r, offset, where_);
+        }
+    }
+
+    fn reloc_external(
+        &mut self,
+        where_: binemit::CodeOffset,
+        r: binemit::Reloc,
+        name: &ir::ExternalName,
+        addend: binemit::Addend,
+    ) {
+        if self.flag_print {
+            println!("reloc_ebb: {} {} {} at {}", r, name, addend, where_);
+        }
+    }
+
+    fn reloc_jt(&mut self, where_: binemit::CodeOffset, r: binemit::Reloc, jt: ir::JumpTable) {
+        if self.flag_print {
+            println!("reloc_ebb: {} {} at {}", r, jt, where_);
+        }
+    }
+}
+
 pub fn run(
    files: Vec<String>,
    flag_print: bool,
@@ -37,7 +73,7 @@ fn handle_module(
    let test_file = parse_test(&buffer).map_err(|e| format!("{}: {}", name, e))?;

    // If we have an isa from the command-line, use that. Otherwise if the
-    // file contins a unique isa, use that.
+    // file contains a unique isa, use that.
    let isa = if let Some(isa) = fisa.isa {
        isa
    } else if let Some(isa) = test_file.isa_spec.unique_isa() {
@@ -49,12 +85,32 @@ fn handle_module(
    for (func, _) in test_file.functions {
        let mut context = Context::new();
        context.func = func;
-        context.compile(isa).map_err(|err| {
+        let size = context.compile(isa).map_err(|err| {
            pretty_error(&context.func, Some(isa), err)
        })?;
        if flag_print {
            println!("{}", context.func.display(isa));
        }
+
+        // Encode the result as machine code.
+        let mut mem = Vec::new();
+        let mut relocs = PrintRelocs { flag_print };
+        mem.resize(size as usize, 0);
+        context.emit_to_memory(mem.as_mut_ptr(), &mut relocs, &*isa);
+
+        if flag_print {
+            print!(".byte ");
+            let mut first = true;
+            for byte in &mem {
+                if first {
+                    first = false;
+                } else {
+                    print!(", ");
+                }
+                print!("{}", byte);
+            }
+            println!();
+        }
    }

    Ok(())
--- a/cranelift/src/filetest/binemit.rs
+++ b/cranelift/src/filetest/binemit.rs
@@ -126,7 +126,7 @@ impl SubTest for TestBinEmit {
        // Fix the stack frame layout so we can test spill/fill encodings.
        let min_offset = func.stack_slots
            .keys()
-            .map(|ss| func.stack_slots[ss].offset)
+            .map(|ss| func.stack_slots[ss].offset.unwrap())
            .min();
        func.stack_slots.frame_size = min_offset.map(|off| (-off) as u32);

@@ -271,14 +271,13 @@ impl SubTest for TestBinEmit {
                                "No encodings found for: {}",
                                func.dfg.display_inst(inst, isa)
                            ));
-                        } else {
+                        }
                        return Err(format!(
                                "No matching encodings for {} in {}",
                                func.dfg.display_inst(inst, isa),
                                DisplayList(&encodings),
                            ));
                    }
-                    }
                    let have = sink.text.trim();
                    if have != want {
                        return Err(format!(
--- a/cranelift/src/filetest/concurrent.rs
+++ b/cranelift/src/filetest/concurrent.rs
@@ -119,7 +119,7 @@ fn worker_thread(
            loop {
                // Lock the mutex only long enough to extract a request.
                let Request(jobid, path) = match requests.lock().unwrap().recv() {
-                    Err(..) => break, // TX end shuit down. exit thread.
+                    Err(..) => break, // TX end shut down. exit thread.
                    Ok(req) => req,
                };

--- a/cranelift/src/filetest/mod.rs
+++ b/cranelift/src/filetest/mod.rs
@@ -19,6 +19,7 @@ mod concurrent;
 mod domtree;
 mod legalizer;
 mod licm;
+mod preopt;
 mod regalloc;
 mod runner;
 mod runone;
@@ -64,6 +65,7 @@ fn new_subtest(parsed: &TestCommand) -> subtest::Result<Box<subtest::SubTest>> {
        "domtree" => domtree::subtest(parsed),
        "legalizer" => legalizer::subtest(parsed),
        "licm" => licm::subtest(parsed),
+        "preopt" => preopt::subtest(parsed),
        "print-cfg" => print_cfg::subtest(parsed),
        "regalloc" => regalloc::subtest(parsed),
        "simple-gvn" => simple_gvn::subtest(parsed),
--- a/cranelift/src/filetest/preopt.rs
+++ b/cranelift/src/filetest/preopt.rs
@@ -0,0 +1,50 @@
+//! Test command for testing the preopt pass.
+//!
+//! The resulting function is sent to `filecheck`.
+
+use cretonne::ir::Function;
+use cretonne;
+use cton_reader::TestCommand;
+use filetest::subtest::{SubTest, Context, Result, run_filecheck};
+use std::borrow::Cow;
+use std::fmt::Write;
+use utils::pretty_error;
+
+struct TestPreopt;
+
+pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
+    assert_eq!(parsed.command, "preopt");
+    if !parsed.options.is_empty() {
+        Err(format!("No options allowed on {}", parsed))
+    } else {
+        Ok(Box::new(TestPreopt))
+    }
+}
+
+impl SubTest for TestPreopt {
+    fn name(&self) -> Cow<str> {
+        Cow::from("preopt")
+    }
+
+    fn is_mutating(&self) -> bool {
+        true
+    }
+
+    fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
+        // Create a compilation context, and drop in the function.
+        let mut comp_ctx = cretonne::Context::new();
+        comp_ctx.func = func.into_owned();
+        let isa = context.isa.expect("preopt needs an ISA");
+
+        comp_ctx.flowgraph();
+        comp_ctx.preopt(isa).map_err(|e| {
+            pretty_error(&comp_ctx.func, context.isa, Into::into(e))
+        })?;
+
+        let mut text = String::new();
+        write!(&mut text, "{}", &comp_ctx.func).map_err(
+            |e| e.to_string(),
+        )?;
+        run_filecheck(&text, context)
+    }
+}
--- a/cranelift/src/filetest/runner.rs
+++ b/cranelift/src/filetest/runner.rs
@@ -221,7 +221,7 @@ impl TestRunner {
        }
        self.tests[jobid].state = State::Done(result);

-        // Rports jobs in order.
+        // Reports jobs in order.
        while self.report_job() {
            self.reported_tests += 1;
        }
--- a/cranelift/src/print_cfg.rs
+++ b/cranelift/src/print_cfg.rs
@@ -17,7 +17,7 @@ use utils::read_to_string;
 pub fn run(files: Vec<String>) -> CommandResult {
    for (i, f) in files.into_iter().enumerate() {
        if i != 0 {
-            println!("");
+            println!();
        }
        print_cfg(f)?
    }
@@ -100,7 +100,7 @@ fn print_cfg(filename: String) -> CommandResult {

    for (idx, func) in items.into_iter().enumerate() {
        if idx != 0 {
-            println!("");
+            println!();
        }
        print!("{}", CFGPrinter::new(&func));
    }
--- a/cranelift/test-all.sh
+++ b/cranelift/test-all.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -euo pipefail

 # This is the top-level test script:
 #
@@ -10,8 +11,9 @@
 #
 # All tests run by this script should be passing at all times.

-# Exit immediately on errors.
-set -e
+# Disable generation of .pyc files because they cause trouble for vendoring
+# scripts, and this is a build step that isn't run very often anyway.
+export PYTHONDONTWRITEBYTECODE=1

 # Repository top-level directory.
 cd $(dirname "$0")
--- a/cranelift/wasmtests/unreachable_code.wat
+++ b/cranelift/wasmtests/unreachable_code.wat
@@ -0,0 +1,77 @@
+(module
+  (type (;0;) (func (param i32 i64 f64) (result f64)))
+  (type (;1;) (func))
+  (type (;2;) (func (result f32)))
+  (type (;3;) (func (result f64)))
+  (type (;4;) (func (param f64 f64) (result f64)))
+  (type (;5;) (func (result i32)))
+  (func (result i32)
+      block (result i32)
+        unreachable
+      end
+      block
+      end
+      i32.clz
+  )
+  (func (result i32)
+      loop (result i32)
+        unreachable
+      end
+      block
+      end
+      i32.clz
+  )
+  (func (;0;) (type 5) (result i32)
+    nop
+    block (result i32)  ;; label = @1
+      block  ;; label = @2
+        block  ;; label = @3
+          nop
+          block  ;; label = @4
+            i32.const 1
+            if  ;; label = @5
+              nop
+              block  ;; label = @6
+                nop
+                nop
+                loop (result i32)  ;; label = @7
+                  nop
+                  block (result i32)  ;; label = @8
+                    nop
+                    nop
+                    block (result i32)  ;; label = @9
+                      nop
+                      unreachable
+                    end
+                  end
+                end
+                block (result i32)  ;; label = @7
+                  block  ;; label = @8
+                    nop
+                  end
+                  i32.const 0
+                end
+                br_if 5 (;@1;)
+                drop
+              end
+            else
+              nop
+            end
+            nop
+          end
+        end
+      end
+      unreachable
+    end)
+  (func
+    block (result i32)
+      block (result i32)
+        i32.const 1
+        br 1
+      end
+    end
+    drop
+  )
+  (table (;0;) 16 anyfunc)
+  (elem (i32.const 0))
+)
--- a/lib/cretonne/Cargo.toml
+++ b/lib/cretonne/Cargo.toml
@@ -1,12 +1,13 @@
 [package]
 authors = ["The Cretonne Project Developers"]
 name = "cretonne"
-version = "0.1.0"
+version = "0.3.4"
 description = "Low-level code generator library"
 license = "Apache-2.0"
 documentation = "https://cretonne.readthedocs.io/"
 repository = "https://github.com/Cretonne/cretonne"
 readme = "README.md"
+keywords = [ "compile", "compiler", "jit" ]
 build = "build.rs"

 [lib]
--- a/lib/cretonne/build.rs
+++ b/lib/cretonne/build.rs
@@ -46,7 +46,7 @@ fn main() {
    let cur_dir = env::current_dir().expect("Can't access current working directory");
    let crate_dir = cur_dir.as_path();

-    // Make sure we rebuild is this build script changes.
+    // Make sure we rebuild if this build script changes.
    // I guess that won't happen if you have non-UTF8 bytes in your path names.
    // The `build.py` script prints out its own dependencies.
    println!(
@@ -59,8 +59,11 @@ fn main() {
    let build_script = meta_dir.join("build.py");

    // Launch build script with Python. We'll just find python in the path.
+    // Use -B to disable .pyc files, because they cause trouble for vendoring
+    // scripts, and this is a build step that isn't run very often anyway.
    let status = process::Command::new("python")
        .current_dir(crate_dir)
+        .arg("-B")
        .arg(build_script)
        .arg("--out-dir")
        .arg(out_dir)
--- a/lib/cretonne/meta/base/instructions.py
+++ b/lib/cretonne/meta/base/instructions.py
@@ -833,6 +833,26 @@ imul = Instruction(
        """,
        ins=(x, y), outs=a)

+umulhi = Instruction(
+        'umulhi', r"""
+        Unsigned integer multiplication, producing the high half of a
+        double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        """,
+        ins=(x, y), outs=a)
+
+smulhi = Instruction(
+        'smulhi', """
+        Signed integer multiplication, producing the high half of a
+        double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        """,
+        ins=(x, y), outs=a)
+
 udiv = Instruction(
        'udiv', r"""
        Unsigned integer division: :math:`a := \lfloor {x \over y} \rfloor`.
--- a/lib/cretonne/meta/base/settings.py
+++ b/lib/cretonne/meta/base/settings.py
@@ -56,7 +56,11 @@ avoid_div_traps = BoolSetting(
 is_compressed = BoolSetting("Enable compressed instructions")

 enable_float = BoolSetting(
-        """Enable the use of floating-point instructions""",
+        """
+        Enable the use of floating-point instructions
+
+        Disabling use of floating-point instructions is not yet implemented.
+        """,
        default=True)

 enable_simd = BoolSetting(
--- a/lib/cretonne/meta/check.sh
+++ b/lib/cretonne/meta/check.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-set -e
+set -euo pipefail
 cd $(dirname "$0")

 runif() {
--- a/lib/cretonne/meta/gen_instr.py
+++ b/lib/cretonne/meta/gen_instr.py
@@ -211,7 +211,7 @@ def gen_instruction_data_impl(fmt):
                    if f.has_value_list:
                        fmt.line(n + ' { ref mut args, .. } => args,')
                fmt.line('_ => panic!("No value list: {:?}", self),')
-            fmt.line('assert!(args.is_empty(), "Value list already in use");')
+            fmt.line('debug_assert!(args.is_empty(), "Value list already in use");')
            fmt.line('*args = vlist;')


--- a/lib/cretonne/meta/gen_legalizer.py
+++ b/lib/cretonne/meta/gen_legalizer.py
@@ -348,7 +348,8 @@ def gen_xform(xform, fmt, type_sets):
        # Delete the original instruction if we didn't have an opportunity to
        # replace it.
        if not replace_inst:
-            fmt.line('assert_eq!(pos.remove_inst(), inst);')
+            fmt.line('let removed = pos.remove_inst();')
+            fmt.line('debug_assert_eq!(removed, inst);')
        fmt.line('return true;')


--- a/lib/cretonne/meta/gen_settings.py
+++ b/lib/cretonne/meta/gen_settings.py
@@ -245,7 +245,7 @@ def gen_constructor(sgrp, parent, fmt):
                'pub fn new({}) -> Flags {{'.format(args), '}'):
            fmt.line('let bvec = builder.state_for("{}");'.format(sgrp.name))
            fmt.line('let mut bytes = [0; {}];'.format(sgrp.byte_size()))
-            fmt.line('assert_eq!(bvec.len(), {});'.format(sgrp.settings_size))
+            fmt.line('debug_assert_eq!(bvec.len(), {});'.format(sgrp.settings_size))
            with fmt.indented(
                    'for (i, b) in bvec.iter().enumerate() {', '}'):
                fmt.line('bytes[i] = *b;')
--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -120,6 +120,9 @@ enc_i32_i64(base.imul, r.rrx, 0x0f, 0xaf)
 enc_i32_i64(x86.sdivmodx, r.div, 0xf7, rrr=7)
 enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6)

+enc_i32_i64(x86.smulx, r.mulx, 0xf7, rrr=5)
+enc_i32_i64(x86.umulx, r.mulx, 0xf7, rrr=4)
+
 enc_i32_i64(base.copy, r.umr, 0x89)
 enc_both(base.copy.b1, r.umr, 0x89)
 enc_i32_i64(base.regmove, r.rmov, 0x89)
@@ -403,9 +406,55 @@ I64.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))

 # Numerical conversions.

-# Converting i64 to i32 is a no-op in 64-bit mode.
+# Reducing an integer is a no-op.
+I32.enc(base.ireduce.i8.i32, r.null, 0)
+I32.enc(base.ireduce.i16.i32, r.null, 0)
+I64.enc(base.ireduce.i8.i32, r.null, 0)
+I64.enc(base.ireduce.i16.i32, r.null, 0)
+I64.enc(base.ireduce.i8.i64, r.null, 0)
+I64.enc(base.ireduce.i16.i64, r.null, 0)
 I64.enc(base.ireduce.i32.i64, r.null, 0)
+
+# TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
+# instructions for %al/%ax/%eax to %ax/%eax/%rax.
+
+# movsbl
+I32.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
+I64.enc(base.sextend.i32.i8, *r.urm.rex(0x0f, 0xbe))
+I64.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
+
+# movswl
+I32.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
+I64.enc(base.sextend.i32.i16, *r.urm.rex(0x0f, 0xbf))
+I64.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
+
+# movsbq
+I64.enc(base.sextend.i64.i8, *r.urm.rex(0x0f, 0xbe, w=1))
+
+# movswq
+I64.enc(base.sextend.i64.i16, *r.urm.rex(0x0f, 0xbf, w=1))
+
+# movslq
 I64.enc(base.sextend.i64.i32, *r.urm.rex(0x63, w=1))
+
+# movzbl
+I32.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
+I64.enc(base.uextend.i32.i8, *r.urm.rex(0x0f, 0xb6))
+I64.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
+
+# movzwl
+I32.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
+I64.enc(base.uextend.i32.i16, *r.urm.rex(0x0f, 0xb7))
+I64.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
+
+# movzbq, encoded as movzbl because it's equivalent and shorter
+I64.enc(base.uextend.i64.i8, *r.urm.rex(0x0f, 0xb6))
+I64.enc(base.uextend.i64.i8, *r.urm(0x0f, 0xb6))
+
+# movzwq, encoded as movzwl because it's equivalent and shorter
+I64.enc(base.uextend.i64.i16, *r.urm.rex(0x0f, 0xb7))
+I64.enc(base.uextend.i64.i16, *r.urm(0x0f, 0xb7))
+
 # A 32-bit register copy clears the high 32 bits.
 I64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
 I64.enc(base.uextend.i64.i32, *r.umr(0x89))
--- a/lib/cretonne/meta/isa/intel/instructions.py
+++ b/lib/cretonne/meta/isa/intel/instructions.py
@@ -47,6 +47,28 @@ sdivmodx = Instruction(
        """,
        ins=(nlo, nhi, d), outs=(q, r), can_trap=True)

+argL = Operand('argL', iWord)
+argR = Operand('argR', iWord)
+resLo = Operand('resLo', iWord)
+resHi = Operand('resHi', iWord)
+
+umulx = Instruction(
+        'x86_umulx', r"""
+        Unsigned integer multiplication, producing a double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        """,
+        ins=(argL, argR), outs=(resLo, resHi))
+
+smulx = Instruction(
+        'x86_smulx', r"""
+        Signed integer multiplication, producing a double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        """,
+        ins=(argL, argR), outs=(resLo, resHi))

 Float = TypeVar(
        'Float', 'A scalar or vector floating point number',
--- a/lib/cretonne/meta/isa/intel/legalize.py
+++ b/lib/cretonne/meta/isa/intel/legalize.py
@@ -37,6 +37,23 @@ intel_expand.custom_legalize(insts.srem, 'expand_sdivrem')
 intel_expand.custom_legalize(insts.udiv, 'expand_udivrem')
 intel_expand.custom_legalize(insts.urem, 'expand_udivrem')

+#
+# Double length (widening) multiplication
+#
+resLo = Var('resLo')
+resHi = Var('resHi')
+intel_expand.legalize(
+        resHi << insts.umulhi(x, y),
+        Rtl(
+            (resLo, resHi) << x86.umulx(x, y)
+        ))
+
+intel_expand.legalize(
+        resHi << insts.smulhi(x, y),
+        Rtl(
+            (resLo, resHi) << x86.smulx(x, y)
+        ))
+
 # Floating point condition codes.
 #
 # The 8 condition codes in `supported_floatccs` are directly supported by a
--- a/lib/cretonne/meta/isa/intel/recipes.py
+++ b/lib/cretonne/meta/isa/intel/recipes.py
@@ -453,6 +453,15 @@ div = TailRecipe(
        modrm_r_bits(in_reg2, bits, sink);
        ''')

+# XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo)
+mulx = TailRecipe(
+        'mulx', Binary, size=1,
+        ins=(GPR.rax, GPR), outs=(GPR.rax, GPR.rdx),
+        emit='''
+        PUT_OP(bits, rex1(in_reg1), sink);
+        modrm_r_bits(in_reg1, bits, sink);
+        ''')
+
 # XX /n ib with 8-bit immediate sign-extended.
 rib = TailRecipe(
        'rib', BinaryImm, size=2, ins=GPR, outs=0,
@@ -675,7 +684,7 @@ st_abcd = TailRecipe(

 # XX /r register-indirect store of FPR with no offset.
 fst = TailRecipe(
-        'fst', Store, size=1, ins=(FPR, GPR), outs=(),
+        'fst', Store, size=1, ins=(FPR, GPR_ZERO_DEREF_SAFE), outs=(),
        instp=IsEqual(Store.offset, 0),
        clobbers_flags=False,
        emit='''
--- a/lib/cretonne/meta/isa/intel/settings.py
+++ b/lib/cretonne/meta/isa/intel/settings.py
@@ -11,9 +11,6 @@ ISA.settings = SettingGroup('intel', parent=shared.group)

 # The has_* settings here correspond to CPUID bits.

-# CPUID.01H:EDX
-has_sse2 = BoolSetting("SSE2: CPUID.01H:EDX.SSE2[bit 26]")
-
 # CPUID.01H:ECX
 has_sse3 = BoolSetting("SSE3: CPUID.01H:ECX.SSE3[bit 0]")
 has_ssse3 = BoolSetting("SSSE3: CPUID.01H:ECX.SSSE3[bit 9]")
@@ -40,9 +37,9 @@ use_lzcnt = And(has_lzcnt)

 # Presets corresponding to Intel CPUs.

-baseline = Preset(has_sse2)
+baseline = Preset()
 nehalem = Preset(
-        has_sse2, has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt)
+        has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt)
 haswell = Preset(nehalem, has_bmi1, has_lzcnt)

 ISA.settings.close(globals())
--- a/lib/cretonne/meta/isa/riscv/init.py
+++ b/lib/cretonne/meta/isa/riscv/init.py
@@ -2,7 +2,7 @@
 RISC-V Target
 -------------

-`RISC-V <http://riscv.org/>`_ is an open instruction set architecture
+`RISC-V <https://riscv.org/>`_ is an open instruction set architecture
 originally developed at UC Berkeley. It is a RISC-style ISA with either a
 32-bit (RV32I) or 64-bit (RV32I) base instruction set and a number of optional
 extensions:
--- a/lib/cretonne/src/abi.rs
+++ b/lib/cretonne/src/abi.rs
@@ -12,7 +12,7 @@ use std::vec::Vec;
 ///
 /// An argument may go through a sequence of legalization steps before it reaches the final
 /// `Assign` action.
-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Debug)]
 pub enum ArgAction {
    /// Assign the argument to the given location.
    Assign(ArgumentLoc),
@@ -151,7 +151,7 @@ pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
    match have_bits.cmp(&arg_bits) {
        // We have fewer bits than the ABI argument.
        Ordering::Less => {
-            assert!(
+            debug_assert!(
                have.is_int() && arg.value_type.is_int(),
                "Can only extend integer values"
            );
@@ -164,8 +164,8 @@ pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
        // We have the same number of bits as the argument.
        Ordering::Equal => {
            // This must be an integer vector that is split and then extended.
-            assert!(arg.value_type.is_int());
-            assert!(have.is_vector());
+            debug_assert!(arg.value_type.is_int());
+            debug_assert!(have.is_vector());
            ValueConversion::VectorSplit
        }
        // We have more bits than the argument.
--- a/lib/cretonne/src/bforest/node.rs
+++ b/lib/cretonne/src/bforest/node.rs
@@ -54,8 +54,8 @@ impl<F: Forest> Clone for NodeData<F> {
 impl<F: Forest> NodeData<F> {
    /// Is this a free/unused node?
    pub fn is_free(&self) -> bool {
-        match self {
-            &NodeData::Free { .. } => true,
+        match *self {
+            NodeData::Free { .. } => true,
            _ => false,
        }
    }
@@ -65,10 +65,10 @@ impl<F: Forest> NodeData<F> {
    /// This is the number of outgoing edges in an inner node, or the number of key-value pairs in
    /// a leaf node.
    pub fn entries(&self) -> usize {
-        match self {
-            &NodeData::Inner { size, .. } => usize::from(size) + 1,
-            &NodeData::Leaf { size, .. } => usize::from(size),
-            &NodeData::Free { .. } => panic!("freed node"),
+        match *self {
+            NodeData::Inner { size, .. } => usize::from(size) + 1,
+            NodeData::Leaf { size, .. } => usize::from(size),
+            NodeData::Free { .. } => panic!("freed node"),
        }
    }

@@ -96,8 +96,8 @@ impl<F: Forest> NodeData<F> {

    /// Unwrap an inner node into two slices (keys, trees).
    pub fn unwrap_inner(&self) -> (&[F::Key], &[Node]) {
-        match self {
-            &NodeData::Inner {
+        match *self {
+            NodeData::Inner {
                size,
                ref keys,
                ref tree,
@@ -113,8 +113,8 @@ impl<F: Forest> NodeData<F> {

    /// Unwrap a leaf node into two slices (keys, values) of the same length.
    pub fn unwrap_leaf(&self) -> (&[F::Key], &[F::Value]) {
-        match self {
-            &NodeData::Leaf {
+        match *self {
+            NodeData::Leaf {
                size,
                ref keys,
                ref vals,
@@ -132,8 +132,8 @@ impl<F: Forest> NodeData<F> {

    /// Unwrap a mutable leaf node into two slices (keys, values) of the same length.
    pub fn unwrap_leaf_mut(&mut self) -> (&mut [F::Key], &mut [F::Value]) {
-        match self {
-            &mut NodeData::Leaf {
+        match *self {
+            NodeData::Leaf {
                size,
                ref mut keys,
                ref mut vals,
@@ -152,8 +152,8 @@ impl<F: Forest> NodeData<F> {
    /// Get the critical key for a leaf node.
    /// This is simply the first key.
    pub fn leaf_crit_key(&self) -> F::Key {
-        match self {
-            &NodeData::Leaf { size, ref keys, .. } => {
+        match *self {
+            NodeData::Leaf { size, ref keys, .. } => {
                debug_assert!(size > 0, "Empty leaf node");
                keys.borrow()[0]
            }
@@ -165,8 +165,8 @@ impl<F: Forest> NodeData<F> {
    /// This means that `key` is inserted at `keys[i]` and `node` is inserted at `tree[i + 1]`.
    /// If the node is full, this leaves the node unchanged and returns false.
    pub fn try_inner_insert(&mut self, index: usize, key: F::Key, node: Node) -> bool {
-        match self {
-            &mut NodeData::Inner {
+        match *self {
+            NodeData::Inner {
                ref mut size,
                ref mut keys,
                ref mut tree,
@@ -191,8 +191,8 @@ impl<F: Forest> NodeData<F> {
    /// Try to insert `key, value` at `index` in a leaf node, but fail and return false if the node
    /// is full.
    pub fn try_leaf_insert(&mut self, index: usize, key: F::Key, value: F::Value) -> bool {
-        match self {
-            &mut NodeData::Leaf {
+        match *self {
+            NodeData::Leaf {
                ref mut size,
                ref mut keys,
                ref mut vals,
@@ -222,8 +222,8 @@ impl<F: Forest> NodeData<F> {
    /// The `insert_index` parameter is the position where an insertion was tried and failed. The
    /// node will be split in half with a bias towards an even split after the insertion is retried.
    pub fn split(&mut self, insert_index: usize) -> SplitOff<F> {
-        match self {
-            &mut NodeData::Inner {
+        match *self {
+            NodeData::Inner {
                ref mut size,
                ref keys,
                ref tree,
@@ -262,7 +262,7 @@ impl<F: Forest> NodeData<F> {
                    },
                }
            }
-            &mut NodeData::Leaf {
+            NodeData::Leaf {
                ref mut size,
                ref keys,
                ref vals,
@@ -307,8 +307,8 @@ impl<F: Forest> NodeData<F> {
    ///
    /// Return an indication of the node's health (i.e. below half capacity).
    pub fn inner_remove(&mut self, index: usize) -> Removed {
-        match self {
-            &mut NodeData::Inner {
+        match *self {
+            NodeData::Inner {
                ref mut size,
                ref mut keys,
                ref mut tree,
@@ -332,8 +332,8 @@ impl<F: Forest> NodeData<F> {
    ///
    /// Return an indication of the node's health (i.e. below half capacity).
    pub fn leaf_remove(&mut self, index: usize) -> Removed {
-        match self {
-            &mut NodeData::Leaf {
+        match *self {
+            NodeData::Leaf {
                ref mut size,
                ref mut keys,
                ref mut vals,
@@ -553,15 +553,15 @@ where
    F::Value: ValDisp,
 {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            &NodeData::Inner { size, keys, tree } => {
+        match *self {
+            NodeData::Inner { size, keys, tree } => {
                write!(f, "[ {}", tree[0])?;
                for i in 0..usize::from(size) {
                    write!(f, " {} {}", keys[i], tree[i + 1])?;
                }
                write!(f, " ]")
            }
-            &NodeData::Leaf { size, keys, vals } => {
+            NodeData::Leaf { size, keys, vals } => {
                let keys = keys.borrow();
                let vals = vals.borrow();
                write!(f, "[")?;
@@ -571,8 +571,8 @@ where
                }
                write!(f, " ]")
            }
-            &NodeData::Free { next: Some(n) } => write!(f, "[ free -> {} ]", n),
-            &NodeData::Free { next: None } => write!(f, "[ free ]"),
+            NodeData::Free { next: Some(n) } => write!(f, "[ free -> {} ]", n),
+            NodeData::Free { next: None } => write!(f, "[ free ]"),
        }
    }
 }
--- a/lib/cretonne/src/bforest/path.rs
+++ b/lib/cretonne/src/bforest/path.rs
@@ -316,7 +316,8 @@ impl<F: Forest> Path<F> {
            // Now that we have a not-full node, it must be possible to insert.
            match ins_node {
                None => {
-                    assert!(pool[node].try_leaf_insert(entry, key, value));
+                    let inserted = pool[node].try_leaf_insert(entry, key, value);
+                    debug_assert!(inserted);
                    // If we inserted at the front of the new rhs_node leaf, we need to propagate
                    // the inserted key as the critical key instead of the previous front key.
                    if entry == 0 && node == rhs_node {
@@ -324,7 +325,8 @@ impl<F: Forest> Path<F> {
                    }
                }
                Some(n) => {
-                    assert!(pool[node].try_inner_insert(entry, key, n));
+                    let inserted = pool[node].try_inner_insert(entry, key, n);
+                    debug_assert!(inserted);
                    // The lower level was moved to the new RHS node, so make sure that is
                    // reflected here.
                    if n == self.node[level + 1] {
--- a/lib/cretonne/src/binemit/mod.rs
+++ b/lib/cretonne/src/binemit/mod.rs
@@ -110,7 +110,7 @@ where
    let mut divert = RegDiversions::new();
    for ebb in func.layout.ebbs() {
        divert.clear();
-        assert_eq!(func.offsets[ebb], sink.offset());
+        debug_assert_eq!(func.offsets[ebb], sink.offset());
        for inst in func.layout.ebb_insts(ebb) {
            emit_inst(func, inst, &mut divert, sink);
        }
--- a/lib/cretonne/src/binemit/relaxation.rs
+++ b/lib/cretonne/src/binemit/relaxation.rs
@@ -60,7 +60,7 @@ pub fn relax_branches(func: &mut Function, isa: &TargetIsa) -> Result<CodeOffset
        while let Some(ebb) = cur.next_ebb() {
            // Record the offset for `ebb` and make sure we iterate until offsets are stable.
            if cur.func.offsets[ebb] != offset {
-                assert!(
+                debug_assert!(
                    cur.func.offsets[ebb] < offset,
                    "Code shrinking during relaxation"
                );
@@ -111,7 +111,7 @@ fn fallthroughs(func: &mut Function) {
                Opcode::Fallthrough => {
                    // Somebody used a fall-through instruction before the branch relaxation pass.
                    // Make sure it is correct, i.e. the destination is the layout successor.
-                    assert_eq!(destination, succ, "Illegal fall-through in {}", ebb)
+                    debug_assert_eq!(destination, succ, "Illegal fall-through in {}", ebb)
                }
                Opcode::Jump => {
                    // If this is a jump to the successor EBB, change it to a fall-through.
@@ -152,13 +152,23 @@ fn relax_branch(
    if let Some(enc) = isa.legal_encodings(dfg, &dfg[inst], ctrl_type).find(
        |&enc| {
            let range = encinfo.branch_range(enc).expect("Branch with no range");
-            let in_range = range.contains(offset, dest_offset);
-            dbg!(
-                "  trying [{}]: {}",
-                encinfo.display(enc),
-                if in_range { "OK" } else { "out of range" }
-            );
-            in_range
+            if !range.contains(offset, dest_offset) {
+                dbg!("  trying [{}]: out of range", encinfo.display(enc));
+                false
+            } else if encinfo.operand_constraints(enc) !=
+                       encinfo.operand_constraints(cur.func.encodings[inst])
+            {
+                // Conservatively give up if the encoding has different constraints
+                // than the original, so that we don't risk picking a new encoding
+                // which the existing operands don't satisfy. We can't check for
+                // validity directly because we don't have a RegDiversions active so
+                // we don't know which registers are actually in use.
+                dbg!("  trying [{}]: constraints differ", encinfo.display(enc));
+                false
+            } else {
+                dbg!("  trying [{}]: OK", encinfo.display(enc));
+                true
+            }
        },
    )
    {
--- a/lib/cretonne/src/bitset.rs
+++ b/lib/cretonne/src/bitset.rs
@@ -36,8 +36,8 @@ where

    /// Check if this BitSet contains the number num
    pub fn contains(&self, num: u8) -> bool {
-        assert!((num as usize) < Self::bits());
-        assert!((num as usize) < Self::max_bits());
+        debug_assert!((num as usize) < Self::bits());
+        debug_assert!((num as usize) < Self::max_bits());
        self.0.into() & (1 << num) != 0
    }

@@ -62,8 +62,8 @@ where

    /// Construct a BitSet with the half-open range [lo,hi) filled in
    pub fn from_range(lo: u8, hi: u8) -> Self {
-        assert!(lo <= hi);
-        assert!((hi as usize) <= Self::bits());
+        debug_assert!(lo <= hi);
+        debug_assert!((hi as usize) <= Self::bits());
        let one: T = T::from(1);
        // I can't just do (one << hi) - one here as the shift may overflow
        let hi_rng = if hi >= 1 {
--- a/lib/cretonne/src/context.rs
+++ b/lib/cretonne/src/context.rs
@@ -18,11 +18,12 @@ use isa::TargetIsa;
 use legalize_function;
 use regalloc;
 use result::{CtonError, CtonResult};
-use settings::FlagsOrIsa;
+use settings::{FlagsOrIsa, OptLevel};
 use unreachable_code::eliminate_unreachable_code;
 use verifier;
 use simple_gvn::do_simple_gvn;
 use licm::do_licm;
+use preopt::do_preopt;
 use timing;

 /// Persistent data structures and compilation pipeline.
@@ -87,15 +88,14 @@ impl Context {
        self.verify_if(isa)?;

        self.compute_cfg();
+        self.preopt(isa)?;
        self.legalize(isa)?;
-        /* TODO: Enable additional optimization passes.
        if isa.flags().opt_level() == OptLevel::Best {
            self.compute_domtree();
            self.compute_loop_analysis();
            self.licm(isa)?;
            self.simple_gvn(isa)?;
        }
-        */
        self.compute_domtree();
        self.eliminate_unreachable_code(isa)?;
        self.regalloc(isa)?;
@@ -131,6 +131,27 @@ impl Context {
        }
    }

+    /// Run the locations verifier on the function.
+    pub fn verify_locations<'a>(&self, isa: &TargetIsa) -> verifier::Result {
+        verifier::verify_locations(isa, &self.func, None)
+    }
+
+    /// Run the locations verifier only if the `enable_verifier` setting is true.
+    pub fn verify_locations_if<'a>(&self, isa: &TargetIsa) -> CtonResult {
+        if isa.flags().enable_verifier() {
+            self.verify_locations(isa).map_err(Into::into)
+        } else {
+            Ok(())
+        }
+    }
+
+    /// Perform pre-legalization rewrites on the function.
+    pub fn preopt(&mut self, isa: &TargetIsa) -> CtonResult {
+        do_preopt(&mut self.func);
+        self.verify_if(isa)?;
+        Ok(())
+    }
+
    /// Run the legalizer for `isa` on the function.
    pub fn legalize(&mut self, isa: &TargetIsa) -> CtonResult {
        // Legalization invalidates the domtree and loop_analysis by mutating the CFG.
@@ -205,13 +226,16 @@ impl Context {
    /// Insert prologue and epilogues after computing the stack frame layout.
    pub fn prologue_epilogue(&mut self, isa: &TargetIsa) -> CtonResult {
        isa.prologue_epilogue(&mut self.func)?;
-        self.verify_if(isa)
+        self.verify_if(isa)?;
+        self.verify_locations_if(isa)?;
+        Ok(())
    }

    /// Run the branch relaxation pass and return the final code size.
    pub fn relax_branches(&mut self, isa: &TargetIsa) -> Result<CodeOffset, CtonError> {
        let code_size = relax_branches(&mut self.func, isa)?;
        self.verify_if(isa)?;
+        self.verify_locations_if(isa)?;

        Ok(code_size)
    }
--- a/lib/cretonne/src/cursor.rs
+++ b/lib/cretonne/src/cursor.rs
@@ -256,7 +256,7 @@ pub trait Cursor {
    /// Go to a specific instruction which must be inserted in the layout.
    /// New instructions will be inserted before `inst`.
    fn goto_inst(&mut self, inst: ir::Inst) {
-        assert!(self.layout().inst_ebb(inst).is_some());
+        debug_assert!(self.layout().inst_ebb(inst).is_some());
        self.set_position(CursorPosition::At(inst));
    }

@@ -287,14 +287,14 @@ pub trait Cursor {
    /// At this position, instructions cannot be inserted, but `next_inst()` will move to the first
    /// instruction in `ebb`.
    fn goto_top(&mut self, ebb: ir::Ebb) {
-        assert!(self.layout().is_ebb_inserted(ebb));
+        debug_assert!(self.layout().is_ebb_inserted(ebb));
        self.set_position(CursorPosition::Before(ebb));
    }

    /// Go to the bottom of `ebb` which must be inserted into the layout.
    /// At this position, inserted instructions will be appended to `ebb`.
    fn goto_bottom(&mut self, ebb: ir::Ebb) {
-        assert!(self.layout().is_ebb_inserted(ebb));
+        debug_assert!(self.layout().is_ebb_inserted(ebb));
        self.set_position(CursorPosition::After(ebb));
    }

--- a/lib/cretonne/src/divconst_magic_numbers.rs
+++ b/lib/cretonne/src/divconst_magic_numbers.rs
@@ -0,0 +1,542 @@
+//! Compute "magic numbers" for division-by-constants transformations.
+
+#![allow(non_snake_case)]
+
+//----------------------------------------------------------------------
+//
+// Math helpers for division by (non-power-of-2) constants. This is based
+// on the presentation in "Hacker's Delight" by Henry Warren, 2003. There
+// are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size
+// makes little difference, but the signed-vs-unsigned aspect has a large
+// effect. Therefore everything is presented in the order U32 U64 S32 S64
+// so as to emphasise the similarity of the U32 and U64 cases and the S32
+// and S64 cases.
+
+// Structures to hold the "magic numbers" computed.
+
+#[derive(PartialEq, Debug)]
+pub struct MU32 {
+    pub mulBy: u32,
+    pub doAdd: bool,
+    pub shiftBy: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MU64 {
+    pub mulBy: u64,
+    pub doAdd: bool,
+    pub shiftBy: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MS32 {
+    pub mulBy: i32,
+    pub shiftBy: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MS64 {
+    pub mulBy: i64,
+    pub shiftBy: i32,
+}
+
+// The actual "magic number" generators follow.
+
+pub fn magicU32(d: u32) -> MU32 {
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
+
+    let mut do_add: bool = false;
+    let mut p: i32 = 31;
+    let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d;
+    let mut q1: u32 = 0x80000000u32 / nc;
+    let mut r1: u32 = 0x80000000u32 - q1 * nc;
+    let mut q2: u32 = 0x7FFFFFFFu32 / d;
+    let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d;
+    loop {
+        p = p + 1;
+        if r1 >= nc - r1 {
+            q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1);
+            r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc);
+        } else {
+            q1 = 2 * q1;
+            r1 = 2 * r1;
+        }
+        if r2 + 1 >= d - r2 {
+            if q2 >= 0x7FFFFFFFu32 {
+                do_add = true;
+            }
+            q2 = 2 * q2 + 1;
+            r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d);
+        } else {
+            if q2 >= 0x80000000u32 {
+                do_add = true;
+            }
+            q2 = u32::wrapping_mul(2, q2);
+            r2 = 2 * r2 + 1;
+        }
+        let delta: u32 = d - 1 - r2;
+        if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) {
+            break;
+        }
+    }
+
+    MU32 {
+        mulBy: q2 + 1,
+        doAdd: do_add,
+        shiftBy: p - 32,
+    }
+}
+
+pub fn magicU64(d: u64) -> MU64 {
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
+
+    let mut do_add: bool = false;
+    let mut p: i32 = 63;
+    let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d;
+    let mut q1: u64 = 0x8000000000000000u64 / nc;
+    let mut r1: u64 = 0x8000000000000000u64 - q1 * nc;
+    let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d;
+    let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d;
+    loop {
+        p = p + 1;
+        if r1 >= nc - r1 {
+            q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1);
+            r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc);
+        } else {
+            q1 = 2 * q1;
+            r1 = 2 * r1;
+        }
+        if r2 + 1 >= d - r2 {
+            if q2 >= 0x7FFFFFFFFFFFFFFFu64 {
+                do_add = true;
+            }
+            q2 = 2 * q2 + 1;
+            r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d);
+        } else {
+            if q2 >= 0x8000000000000000u64 {
+                do_add = true;
+            }
+            q2 = u64::wrapping_mul(2, q2);
+            r2 = 2 * r2 + 1;
+        }
+        let delta: u64 = d - 1 - r2;
+        if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) {
+            break;
+        }
+    }
+
+    MU64 {
+        mulBy: q2 + 1,
+        doAdd: do_add,
+        shiftBy: p - 64,
+    }
+}
+
+pub fn magicS32(d: i32) -> MS32 {
+    debug_assert_ne!(d, -1);
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1);
+    let two31: u32 = 0x80000000u32;
+    let mut p: i32 = 31;
+    let ad: u32 = i32::wrapping_abs(d) as u32;
+    let t: u32 = two31 + ((d as u32) >> 31);
+    let anc: u32 = u32::wrapping_sub(t - 1, t % ad);
+    let mut q1: u32 = two31 / anc;
+    let mut r1: u32 = two31 - q1 * anc;
+    let mut q2: u32 = two31 / ad;
+    let mut r2: u32 = two31 - q2 * ad;
+    loop {
+        p = p + 1;
+        q1 = 2 * q1;
+        r1 = 2 * r1;
+        if r1 >= anc {
+            q1 = q1 + 1;
+            r1 = r1 - anc;
+        }
+        q2 = 2 * q2;
+        r2 = 2 * r2;
+        if r2 >= ad {
+            q2 = q2 + 1;
+            r2 = r2 - ad;
+        }
+        let delta: u32 = ad - r2;
+        if !(q1 < delta || (q1 == delta && r1 == 0)) {
+            break;
+        }
+    }
+
+    MS32 {
+        mulBy: (if d < 0 {
+                    u32::wrapping_neg(q2 + 1)
+                } else {
+                    q2 + 1
+                }) as i32,
+        shiftBy: p - 32,
+    }
+}
+
+pub fn magicS64(d: i64) -> MS64 {
+    debug_assert_ne!(d, -1);
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1);
+    let two63: u64 = 0x8000000000000000u64;
+    let mut p: i32 = 63;
+    let ad: u64 = i64::wrapping_abs(d) as u64;
+    let t: u64 = two63 + ((d as u64) >> 63);
+    let anc: u64 = u64::wrapping_sub(t - 1, t % ad);
+    let mut q1: u64 = two63 / anc;
+    let mut r1: u64 = two63 - q1 * anc;
+    let mut q2: u64 = two63 / ad;
+    let mut r2: u64 = two63 - q2 * ad;
+    loop {
+        p = p + 1;
+        q1 = 2 * q1;
+        r1 = 2 * r1;
+        if r1 >= anc {
+            q1 = q1 + 1;
+            r1 = r1 - anc;
+        }
+        q2 = 2 * q2;
+        r2 = 2 * r2;
+        if r2 >= ad {
+            q2 = q2 + 1;
+            r2 = r2 - ad;
+        }
+        let delta: u64 = ad - r2;
+        if !(q1 < delta || (q1 == delta && r1 == 0)) {
+            break;
+        }
+    }
+
+    MS64 {
+        mulBy: (if d < 0 {
+                    u64::wrapping_neg(q2 + 1)
+                } else {
+                    q2 + 1
+                }) as i64,
+        shiftBy: p - 64,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{magicU32, magicU64, magicS32, magicS64};
+    use super::{MU32, MU64, MS32, MS64};
+
+    fn mkMU32(mulBy: u32, doAdd: bool, shiftBy: i32) -> MU32 {
+        MU32 {
+            mulBy,
+            doAdd,
+            shiftBy,
+        }
+    }
+
+    fn mkMU64(mulBy: u64, doAdd: bool, shiftBy: i32) -> MU64 {
+        MU64 {
+            mulBy,
+            doAdd,
+            shiftBy,
+        }
+    }
+
+    fn mkMS32(mulBy: i32, shiftBy: i32) -> MS32 {
+        MS32 { mulBy, shiftBy }
+    }
+
+    fn mkMS64(mulBy: i64, shiftBy: i32) -> MS64 {
+        MS64 { mulBy, shiftBy }
+    }
+
+    #[test]
+    fn test_magicU32() {
+        assert_eq!(magicU32(2u32), mkMU32(0x80000000u32, false, 0));
+        assert_eq!(magicU32(3u32), mkMU32(0xaaaaaaabu32, false, 1));
+        assert_eq!(magicU32(4u32), mkMU32(0x40000000u32, false, 0));
+        assert_eq!(magicU32(5u32), mkMU32(0xcccccccdu32, false, 2));
+        assert_eq!(magicU32(6u32), mkMU32(0xaaaaaaabu32, false, 2));
+        assert_eq!(magicU32(7u32), mkMU32(0x24924925u32, true, 3));
+        assert_eq!(magicU32(9u32), mkMU32(0x38e38e39u32, false, 1));
+        assert_eq!(magicU32(10u32), mkMU32(0xcccccccdu32, false, 3));
+        assert_eq!(magicU32(11u32), mkMU32(0xba2e8ba3u32, false, 3));
+        assert_eq!(magicU32(12u32), mkMU32(0xaaaaaaabu32, false, 3));
+        assert_eq!(magicU32(25u32), mkMU32(0x51eb851fu32, false, 3));
+        assert_eq!(magicU32(125u32), mkMU32(0x10624dd3u32, false, 3));
+        assert_eq!(magicU32(625u32), mkMU32(0xd1b71759u32, false, 9));
+        assert_eq!(magicU32(1337u32), mkMU32(0x88233b2bu32, true, 11));
+        assert_eq!(magicU32(65535u32), mkMU32(0x80008001u32, false, 15));
+        assert_eq!(magicU32(65536u32), mkMU32(0x00010000u32, false, 0));
+        assert_eq!(magicU32(65537u32), mkMU32(0xffff0001u32, false, 16));
+        assert_eq!(magicU32(31415927u32), mkMU32(0x445b4553u32, false, 23));
+        assert_eq!(magicU32(0xdeadbeefu32), mkMU32(0x93275ab3u32, false, 31));
+        assert_eq!(magicU32(0xfffffffdu32), mkMU32(0x40000001u32, false, 30));
+        assert_eq!(magicU32(0xfffffffeu32), mkMU32(0x00000003u32, true, 32));
+        assert_eq!(magicU32(0xffffffffu32), mkMU32(0x80000001u32, false, 31));
+    }
+    #[test]
+    fn test_magicU64() {
+        assert_eq!(magicU64(2u64), mkMU64(0x8000000000000000u64, false, 0));
+        assert_eq!(magicU64(3u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 1));
+        assert_eq!(magicU64(4u64), mkMU64(0x4000000000000000u64, false, 0));
+        assert_eq!(magicU64(5u64), mkMU64(0xcccccccccccccccdu64, false, 2));
+        assert_eq!(magicU64(6u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 2));
+        assert_eq!(magicU64(7u64), mkMU64(0x2492492492492493u64, true, 3));
+        assert_eq!(magicU64(9u64), mkMU64(0xe38e38e38e38e38fu64, false, 3));
+        assert_eq!(magicU64(10u64), mkMU64(0xcccccccccccccccdu64, false, 3));
+        assert_eq!(magicU64(11u64), mkMU64(0x2e8ba2e8ba2e8ba3u64, false, 1));
+        assert_eq!(magicU64(12u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 3));
+        assert_eq!(magicU64(25u64), mkMU64(0x47ae147ae147ae15u64, true, 5));
+        assert_eq!(magicU64(125u64), mkMU64(0x0624dd2f1a9fbe77u64, true, 7));
+        assert_eq!(magicU64(625u64), mkMU64(0x346dc5d63886594bu64, false, 7));
+        assert_eq!(magicU64(1337u64), mkMU64(0xc4119d952866a139u64, false, 10));
+        assert_eq!(
+            magicU64(31415927u64),
+            mkMU64(0x116d154b9c3d2f85u64, true, 25)
+        );
+        assert_eq!(
+            magicU64(0x00000000deadbeefu64),
+            mkMU64(0x93275ab2dfc9094bu64, false, 31)
+        );
+        assert_eq!(
+            magicU64(0x00000000fffffffdu64),
+            mkMU64(0x8000000180000005u64, false, 31)
+        );
+        assert_eq!(
+            magicU64(0x00000000fffffffeu64),
+            mkMU64(0x0000000200000005u64, true, 32)
+        );
+        assert_eq!(
+            magicU64(0x00000000ffffffffu64),
+            mkMU64(0x8000000080000001u64, false, 31)
+        );
+        assert_eq!(
+            magicU64(0x0000000100000000u64),
+            mkMU64(0x0000000100000000u64, false, 0)
+        );
+        assert_eq!(
+            magicU64(0x0000000100000001u64),
+            mkMU64(0xffffffff00000001u64, false, 32)
+        );
+        assert_eq!(
+            magicU64(0x0ddc0ffeebadf00du64),
+            mkMU64(0x2788e9d394b77da1u64, true, 60)
+        );
+        assert_eq!(
+            magicU64(0xfffffffffffffffdu64),
+            mkMU64(0x4000000000000001u64, false, 62)
+        );
+        assert_eq!(
+            magicU64(0xfffffffffffffffeu64),
+            mkMU64(0x0000000000000003u64, true, 64)
+        );
+        assert_eq!(
+            magicU64(0xffffffffffffffffu64),
+            mkMU64(0x8000000000000001u64, false, 63)
+        );
+    }
+    #[test]
+    fn test_magicS32() {
+        assert_eq!(magicS32(-0x80000000i32), mkMS32(0x7fffffffu32 as i32, 30));
+        assert_eq!(magicS32(-0x7FFFFFFFi32), mkMS32(0xbfffffffu32 as i32, 29));
+        assert_eq!(magicS32(-0x7FFFFFFEi32), mkMS32(0x7ffffffdu32 as i32, 30));
+        assert_eq!(magicS32(-31415927i32), mkMS32(0xbba4baadu32 as i32, 23));
+        assert_eq!(magicS32(-1337i32), mkMS32(0x9df73135u32 as i32, 9));
+        assert_eq!(magicS32(-256i32), mkMS32(0x7fffffffu32 as i32, 7));
+        assert_eq!(magicS32(-5i32), mkMS32(0x99999999u32 as i32, 1));
+        assert_eq!(magicS32(-3i32), mkMS32(0x55555555u32 as i32, 1));
+        assert_eq!(magicS32(-2i32), mkMS32(0x7fffffffu32 as i32, 0));
+        assert_eq!(magicS32(2i32), mkMS32(0x80000001u32 as i32, 0));
+        assert_eq!(magicS32(3i32), mkMS32(0x55555556u32 as i32, 0));
+        assert_eq!(magicS32(4i32), mkMS32(0x80000001u32 as i32, 1));
+        assert_eq!(magicS32(5i32), mkMS32(0x66666667u32 as i32, 1));
+        assert_eq!(magicS32(6i32), mkMS32(0x2aaaaaabu32 as i32, 0));
+        assert_eq!(magicS32(7i32), mkMS32(0x92492493u32 as i32, 2));
+        assert_eq!(magicS32(9i32), mkMS32(0x38e38e39u32 as i32, 1));
+        assert_eq!(magicS32(10i32), mkMS32(0x66666667u32 as i32, 2));
+        assert_eq!(magicS32(11i32), mkMS32(0x2e8ba2e9u32 as i32, 1));
+        assert_eq!(magicS32(12i32), mkMS32(0x2aaaaaabu32 as i32, 1));
+        assert_eq!(magicS32(25i32), mkMS32(0x51eb851fu32 as i32, 3));
+        assert_eq!(magicS32(125i32), mkMS32(0x10624dd3u32 as i32, 3));
+        assert_eq!(magicS32(625i32), mkMS32(0x68db8badu32 as i32, 8));
+        assert_eq!(magicS32(1337i32), mkMS32(0x6208cecbu32 as i32, 9));
+        assert_eq!(magicS32(31415927i32), mkMS32(0x445b4553u32 as i32, 23));
+        assert_eq!(magicS32(0x7ffffffei32), mkMS32(0x80000003u32 as i32, 30));
+        assert_eq!(magicS32(0x7fffffffi32), mkMS32(0x40000001u32 as i32, 29));
+    }
+    #[test]
+    fn test_magicS64() {
+        assert_eq!(
+            magicS64(-0x8000000000000000i64),
+            mkMS64(0x7fffffffffffffffu64 as i64, 62)
+        );
+        assert_eq!(
+            magicS64(-0x7FFFFFFFFFFFFFFFi64),
+            mkMS64(0xbfffffffffffffffu64 as i64, 61)
+        );
+        assert_eq!(
+            magicS64(-0x7FFFFFFFFFFFFFFEi64),
+            mkMS64(0x7ffffffffffffffdu64 as i64, 62)
+        );
+        assert_eq!(
+            magicS64(-0x0ddC0ffeeBadF00di64),
+            mkMS64(0x6c3b8b1635a4412fu64 as i64, 59)
+        );
+        assert_eq!(
+            magicS64(-0x100000001i64),
+            mkMS64(0x800000007fffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0x100000000i64),
+            mkMS64(0x7fffffffffffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xFFFFFFFFi64),
+            mkMS64(0x7fffffff7fffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xFFFFFFFEi64),
+            mkMS64(0x7ffffffefffffffdu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xFFFFFFFDi64),
+            mkMS64(0x7ffffffe7ffffffbu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xDeadBeefi64),
+            mkMS64(0x6cd8a54d2036f6b5u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-31415927i64),
+            mkMS64(0x7749755a31e1683du64 as i64, 24)
+        );
+        assert_eq!(magicS64(-1337i64), mkMS64(0x9df731356bccaf63u64 as i64, 9));
+        assert_eq!(magicS64(-256i64), mkMS64(0x7fffffffffffffffu64 as i64, 7));
+        assert_eq!(magicS64(-5i64), mkMS64(0x9999999999999999u64 as i64, 1));
+        assert_eq!(magicS64(-3i64), mkMS64(0x5555555555555555u64 as i64, 1));
+        assert_eq!(magicS64(-2i64), mkMS64(0x7fffffffffffffffu64 as i64, 0));
+        assert_eq!(magicS64(2i64), mkMS64(0x8000000000000001u64 as i64, 0));
+        assert_eq!(magicS64(3i64), mkMS64(0x5555555555555556u64 as i64, 0));
+        assert_eq!(magicS64(4i64), mkMS64(0x8000000000000001u64 as i64, 1));
+        assert_eq!(magicS64(5i64), mkMS64(0x6666666666666667u64 as i64, 1));
+        assert_eq!(magicS64(6i64), mkMS64(0x2aaaaaaaaaaaaaabu64 as i64, 0));
+        assert_eq!(magicS64(7i64), mkMS64(0x4924924924924925u64 as i64, 1));
+        assert_eq!(magicS64(9i64), mkMS64(0x1c71c71c71c71c72u64 as i64, 0));
+        assert_eq!(magicS64(10i64), mkMS64(0x6666666666666667u64 as i64, 2));
+        assert_eq!(magicS64(11i64), mkMS64(0x2e8ba2e8ba2e8ba3u64 as i64, 1));
+        assert_eq!(magicS64(12i64), mkMS64(0x2aaaaaaaaaaaaaabu64 as i64, 1));
+        assert_eq!(magicS64(25i64), mkMS64(0xa3d70a3d70a3d70bu64 as i64, 4));
+        assert_eq!(magicS64(125i64), mkMS64(0x20c49ba5e353f7cfu64 as i64, 4));
+        assert_eq!(magicS64(625i64), mkMS64(0x346dc5d63886594bu64 as i64, 7));
+        assert_eq!(magicS64(1337i64), mkMS64(0x6208ceca9433509du64 as i64, 9));
+        assert_eq!(
+            magicS64(31415927i64),
+            mkMS64(0x88b68aa5ce1e97c3u64 as i64, 24)
+        );
+        assert_eq!(
+            magicS64(0x00000000deadbeefi64),
+            mkMS64(0x93275ab2dfc9094bu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x00000000fffffffdi64),
+            mkMS64(0x8000000180000005u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x00000000fffffffei64),
+            mkMS64(0x8000000100000003u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x00000000ffffffffi64),
+            mkMS64(0x8000000080000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x0000000100000000i64),
+            mkMS64(0x8000000000000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x0000000100000001i64),
+            mkMS64(0x7fffffff80000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x0ddc0ffeebadf00di64),
+            mkMS64(0x93c474e9ca5bbed1u64 as i64, 59)
+        );
+        assert_eq!(
+            magicS64(0x7ffffffffffffffdi64),
+            mkMS64(0x2000000000000001u64 as i64, 60)
+        );
+        assert_eq!(
+            magicS64(0x7ffffffffffffffei64),
+            mkMS64(0x8000000000000003u64 as i64, 62)
+        );
+        assert_eq!(
+            magicS64(0x7fffffffffffffffi64),
+            mkMS64(0x4000000000000001u64 as i64, 61)
+        );
+    }
+    #[test]
+    fn test_magic_generators_dont_panic() {
+        // The point of this is to check that the magic number generators
+        // don't panic with integer wraparounds, especially at boundary
+        // cases for their arguments. The actual results are thrown away.
+        let mut total: u64 = 0;
+        println!("Testing UP magicU32");
+        for x in 2..(200 * 1000u32) {
+            let m = magicU32(x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+        println!("Testing DOWN magicU32");
+        for x in 0..(200 * 1000u32) {
+            let m = magicU32(0xFFFF_FFFFu32 - x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+
+        println!("Testing UP magicU64");
+        for x in 2..(200 * 1000u64) {
+            let m = magicU64(x);
+            total = total ^ m.mulBy;
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+        println!("Testing DOWN magicU64");
+        for x in 0..(200 * 1000u64) {
+            let m = magicU64(0xFFFF_FFFF_FFFF_FFFFu64 - x);
+            total = total ^ m.mulBy;
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+
+        println!("Testing UP magicS32");
+        for x in 0..(200 * 1000i32) {
+            let m = magicS32(-0x8000_0000i32 + x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+        println!("Testing DOWN magicS32");
+        for x in 0..(200 * 1000i32) {
+            let m = magicS32(0x7FFF_FFFFi32 - x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+
+        println!("Testing UP magicS64");
+        for x in 0..(200 * 1000i64) {
+            let m = magicS64(-0x8000_0000_0000_0000i64 + x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+        println!("Testing DOWN magicS64");
+        for x in 0..(200 * 1000i64) {
+            let m = magicS64(0x7FFF_FFFF_FFFF_FFFFi64 - x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+        // Force `total` -- and hence, the entire computation -- to
+        // be used, so that rustc can't optimise it out.
+        assert_eq!(total, 7547519887532559585u64);
+    }
+}
--- a/lib/cretonne/src/dominator_tree.rs
+++ b/lib/cretonne/src/dominator_tree.rs
@@ -197,7 +197,7 @@ impl DominatorTree {
            }
        }

-        assert_eq!(a.0, b.0, "Unreachable block passed to common_dominator?");
+        debug_assert_eq!(a.0, b.0, "Unreachable block passed to common_dominator?");

        // We're in the same EBB. The common dominator is the earlier instruction.
        if layout.cmp(a.1, b.1) == Ordering::Less {
@@ -241,7 +241,7 @@ impl DominatorTree {
    pub fn clear(&mut self) {
        self.nodes.clear();
        self.postorder.clear();
-        assert!(self.stack.is_empty());
+        debug_assert!(self.stack.is_empty());
        self.valid = false;
    }

@@ -340,7 +340,7 @@ impl DominatorTree {
    /// post-order except for the insertion of the new EBB header at the split point.
    fn push_successors(&mut self, func: &Function, ebb: Ebb) {
        for inst in func.layout.ebb_insts(ebb) {
-            match func.dfg[inst].analyze_branch(&func.dfg.value_lists) {
+            match func.dfg.analyze_branch(inst) {
                BranchInfo::SingleDest(succ, _) => {
                    if self.nodes[succ].rpo_number == 0 {
                        self.nodes[succ].rpo_number = SEEN;
@@ -539,7 +539,7 @@ impl DominatorTreePreorder {
    /// Recompute this data structure to match `domtree`.
    pub fn compute(&mut self, domtree: &DominatorTree, layout: &Layout) {
        self.nodes.clear();
-        assert_eq!(self.stack.len(), 0);
+        debug_assert_eq!(self.stack.len(), 0);

        // Step 1: Populate the child and sibling links.
        //
@@ -557,7 +557,7 @@ impl DominatorTreePreorder {
        }

        // Step 2. Assign pre-order numbers from a DFS of the dominator tree.
-        assert!(self.stack.len() <= 1);
+        debug_assert!(self.stack.len() <= 1);
        let mut n = 0;
        while let Some(ebb) = self.stack.pop() {
            n += 1;
--- a/lib/cretonne/src/entity/list.rs
+++ b/lib/cretonne/src/entity/list.rs
@@ -220,8 +220,8 @@ impl<T: EntityRef> ListPool<T> {
        to_sclass: SizeClass,
        elems_to_copy: usize,
    ) -> usize {
-        assert!(elems_to_copy <= sclass_size(from_sclass));
-        assert!(elems_to_copy <= sclass_size(to_sclass));
+        debug_assert!(elems_to_copy <= sclass_size(from_sclass));
+        debug_assert!(elems_to_copy <= sclass_size(to_sclass));
        let new_block = self.alloc(to_sclass);

        if elems_to_copy > 0 {
@@ -302,7 +302,7 @@ impl<T: EntityRef> EntityList<T> {
    pub fn clear(&mut self, pool: &mut ListPool<T>) {
        let idx = self.index as usize;
        match pool.len_of(self) {
-            None => assert_eq!(idx, 0, "Invalid pool"),
+            None => debug_assert_eq!(idx, 0, "Invalid pool"),
            Some(len) => pool.free(idx - 1, sclass_for_length(len)),
        }
        // Switch back to the empty list representation which has no storage.
@@ -323,7 +323,7 @@ impl<T: EntityRef> EntityList<T> {
        match pool.len_of(self) {
            None => {
                // This is an empty list. Allocate a block and set length=1.
-                assert_eq!(idx, 0, "Invalid pool");
+                debug_assert_eq!(idx, 0, "Invalid pool");
                let block = pool.alloc(sclass_for_length(1));
                pool.data[block] = T::new(1);
                pool.data[block + 1] = element;
@@ -359,7 +359,7 @@ impl<T: EntityRef> EntityList<T> {
        match pool.len_of(self) {
            None => {
                // This is an empty list. Allocate a block.
-                assert_eq!(idx, 0, "Invalid pool");
+                debug_assert_eq!(idx, 0, "Invalid pool");
                if count == 0 {
                    return &mut [];
                }
@@ -410,7 +410,7 @@ impl<T: EntityRef> EntityList<T> {
            }
            tail[0] = element;
        } else {
-            assert_eq!(index, seq.len());
+            debug_assert_eq!(index, seq.len());
        }
    }

@@ -420,7 +420,7 @@ impl<T: EntityRef> EntityList<T> {
        {
            let seq = self.as_mut_slice(pool);
            len = seq.len();
-            assert!(index < len);
+            debug_assert!(index < len);

            // Copy elements down.
            for i in index..len - 1 {
@@ -450,7 +450,7 @@ impl<T: EntityRef> EntityList<T> {
    /// the list.
    pub fn swap_remove(&mut self, index: usize, pool: &mut ListPool<T>) {
        let len = self.len(pool);
-        assert!(index < len);
+        debug_assert!(index < len);
        if index == len - 1 {
            self.remove(index, pool);
        } else {
--- a/lib/cretonne/src/entity/mod.rs
+++ b/lib/cretonne/src/entity/mod.rs
@@ -61,7 +61,7 @@ macro_rules! entity_impl {
    ($entity:ident) => {
        impl $crate::entity::EntityRef for $entity {
            fn new(index: usize) -> Self {
-                assert!(index < (::std::u32::MAX as usize));
+                debug_assert!(index < (::std::u32::MAX as usize));
                $entity(index as u32)
            }

--- a/lib/cretonne/src/entity/sparse.rs
+++ b/lib/cretonne/src/entity/sparse.rs
@@ -150,7 +150,7 @@ where

        // There was no previous entry for `key`. Add it to the end of `dense`.
        let idx = self.dense.len();
-        assert!(idx <= u32::MAX as usize, "SparseMap overflow");
+        debug_assert!(idx <= u32::MAX as usize, "SparseMap overflow");
        self.dense.push(value);
        self.sparse[key] = idx as u32;
        None
--- a/lib/cretonne/src/flowgraph.rs
+++ b/lib/cretonne/src/flowgraph.rs
@@ -108,7 +108,7 @@ impl ControlFlowGraph {

    fn compute_ebb(&mut self, func: &Function, ebb: Ebb) {
        for inst in func.layout.ebb_insts(ebb) {
-            match func.dfg[inst].analyze_branch(&func.dfg.value_lists) {
+            match func.dfg.analyze_branch(inst) {
                BranchInfo::SingleDest(dest, _) => {
                    self.add_edge((ebb, inst), dest);
                }
--- a/lib/cretonne/src/ir/dfg.rs
+++ b/lib/cretonne/src/ir/dfg.rs
@@ -217,11 +217,11 @@ impl DataFlowGraph {
    ///
    /// The `dest` value can't be attached to an instruction or EBB.
    pub fn change_to_alias(&mut self, dest: Value, src: Value) {
-        assert!(!self.value_is_attached(dest));
+        debug_assert!(!self.value_is_attached(dest));
        // Try to create short alias chains by finding the original source value.
        // This also avoids the creation of loops.
        let original = self.resolve_aliases(src);
-        assert_ne!(
+        debug_assert_ne!(
            dest,
            original,
            "Aliasing {} to {} would create a loop",
@@ -229,7 +229,7 @@ impl DataFlowGraph {
            src
        );
        let ty = self.value_type(original);
-        assert_eq!(
+        debug_assert_eq!(
            self.value_type(dest),
            ty,
            "Aliasing {} to {} would change its type {} to {}",
@@ -273,7 +273,7 @@ impl DataFlowGraph {
        {
            let original = src;
            let ty = self.value_type(original);
-            assert_eq!(
+            debug_assert_eq!(
                self.value_type(dest),
                ty,
                "Aliasing {} to {} would change its type {} to {}",
@@ -498,9 +498,9 @@ impl DataFlowGraph {
    /// This is a very low-level operation. Usually, instruction results with the correct types are
    /// created automatically. The `res` value must not be attached to anything else.
    pub fn attach_result(&mut self, inst: Inst, res: Value) {
-        assert!(!self.value_is_attached(res));
+        debug_assert!(!self.value_is_attached(res));
        let num = self.results[inst].push(res, &mut self.value_lists);
-        assert!(num <= u16::MAX as usize, "Too many result values");
+        debug_assert!(num <= u16::MAX as usize, "Too many result values");
        let ty = self.value_type(res);
        self.values[res] = ValueData::Inst {
            ty,
@@ -533,7 +533,7 @@ impl DataFlowGraph {
                .expect("Replacing detached result"),
            new_value,
        );
-        assert_eq!(
+        debug_assert_eq!(
            attached,
            old_value,
            "{} wasn't detached from {}",
@@ -547,7 +547,7 @@ impl DataFlowGraph {
    pub fn append_result(&mut self, inst: Inst, ty: Type) -> Value {
        let res = self.values.next_key();
        let num = self.results[inst].push(res, &mut self.value_lists);
-        assert!(num <= u16::MAX as usize, "Too many result values");
+        debug_assert!(num <= u16::MAX as usize, "Too many result values");
        self.make_value(ValueData::Inst {
            ty,
            inst,
@@ -684,7 +684,7 @@ impl DataFlowGraph {
    pub fn append_ebb_param(&mut self, ebb: Ebb, ty: Type) -> Value {
        let param = self.values.next_key();
        let num = self.ebbs[ebb].params.push(param, &mut self.value_lists);
-        assert!(num <= u16::MAX as usize, "Too many parameters on EBB");
+        debug_assert!(num <= u16::MAX as usize, "Too many parameters on EBB");
        self.make_value(ValueData::Param {
            ty,
            num: num as u16,
@@ -761,9 +761,9 @@ impl DataFlowGraph {
    ///
    /// In almost all cases, you should be using `append_ebb_param()` instead of this method.
    pub fn attach_ebb_param(&mut self, ebb: Ebb, param: Value) {
-        assert!(!self.value_is_attached(param));
+        debug_assert!(!self.value_is_attached(param));
        let num = self.ebbs[ebb].params.push(param, &mut self.value_lists);
-        assert!(num <= u16::MAX as usize, "Too many parameters on EBB");
+        debug_assert!(num <= u16::MAX as usize, "Too many parameters on EBB");
        let ty = self.value_type(param);
        self.values[param] = ValueData::Param {
            ty,
@@ -859,7 +859,7 @@ impl DataFlowGraph {
    /// to create invalid values for index padding which may be reassigned later.
    #[cold]
    fn set_value_type_for_parser(&mut self, v: Value, t: Type) {
-        debug_assert!(
+        assert!(
            self.value_type(v) == types::VOID,
            "this function is only for assigning types to previously invalid values"
        );
@@ -882,7 +882,7 @@ impl DataFlowGraph {
    ) -> usize {
        // Get the call signature if this is a function call.
        if let Some(sig) = self.call_signature(inst) {
-            debug_assert_eq!(self.insts[inst].opcode().constraints().fixed_results(), 0);
+            assert_eq!(self.insts[inst].opcode().constraints().fixed_results(), 0);
            for res_idx in 0..self.signatures[sig].returns.len() {
                let ty = self.signatures[sig].returns[res_idx].value_type;
                if let Some(v) = reuse.get(res_idx) {
--- a/lib/cretonne/src/ir/immediates.rs
+++ b/lib/cretonne/src/ir/immediates.rs
@@ -490,7 +490,7 @@ fn parse_float(s: &str, w: u8, t: u8) -> Result<u64, &'static str> {
        significand <<= adjust;
        exponent -= i32::from(adjust);
    }
-    assert_eq!(significand >> t, 1);
+    debug_assert_eq!(significand >> t, 1);

    // Trailing significand excludes the high bit.
    let t_bits = significand & ((1 << t) - 1);
@@ -538,6 +538,17 @@ impl Ieee32 {
        Ieee32(exponent << t)
    }

+    /// Create an `Ieee32` number representing the greatest negative value
+    /// not convertable from f32 to a signed integer with width n.
+    pub fn fcvt_to_sint_negative_overflow<I: Into<i32>>(n: I) -> Ieee32 {
+        let n = n.into();
+        debug_assert!(n < 32);
+        debug_assert!(23 + 1 - n < 32);
+        Self::with_bits(
+            (1u32 << (32 - 1)) | Self::pow2(n - 1).0 | (1u32 << (23 + 1 - n)),
+        )
+    }
+
    /// Return self negated.
    pub fn neg(self) -> Ieee32 {
        Ieee32(self.0 ^ (1 << 31))
@@ -590,6 +601,17 @@ impl Ieee64 {
        Ieee64(exponent << t)
    }

+    /// Create an `Ieee64` number representing the greatest negative value
+    /// not convertable from f64 to a signed integer with width n.
+    pub fn fcvt_to_sint_negative_overflow<I: Into<i64>>(n: I) -> Ieee64 {
+        let n = n.into();
+        debug_assert!(n < 64);
+        debug_assert!(52 + 1 - n < 64);
+        Self::with_bits(
+            (1u64 << (64 - 1)) | Self::pow2(n - 1).0 | (1u64 << (52 + 1 - n)),
+        )
+    }
+
    /// Return self negated.
    pub fn neg(self) -> Ieee64 {
        Ieee64(self.0 ^ (1 << 63))
@@ -858,6 +880,15 @@ mod tests {
        assert_eq!(Ieee32::pow2(1).neg().to_string(), "-0x1.000000p1");
    }

+    #[test]
+    fn fcvt_to_sint_negative_overflow_ieee32() {
+        for n in &[8, 16] {
+            assert_eq!(-((1u32 << (n - 1)) as f32) - 1.0, unsafe {
+                mem::transmute(Ieee32::fcvt_to_sint_negative_overflow(*n))
+            });
+        }
+    }
+
    #[test]
    fn format_ieee64() {
        assert_eq!(Ieee64::with_float(0.0).to_string(), "0.0");
@@ -986,4 +1017,13 @@ mod tests {

        assert_eq!(Ieee64::pow2(1).neg().to_string(), "-0x1.0000000000000p1");
    }
+
+    #[test]
+    fn fcvt_to_sint_negative_overflow_ieee64() {
+        for n in &[8, 16, 32] {
+            assert_eq!(-((1u64 << (n - 1)) as f64) - 1.0, unsafe {
+                mem::transmute(Ieee64::fcvt_to_sint_negative_overflow(*n))
+            });
+        }
+    }
 }
--- a/lib/cretonne/src/ir/instructions.rs
+++ b/lib/cretonne/src/ir/instructions.rs
@@ -561,7 +561,7 @@ impl OpcodeConstraints {
    /// Get the value type of result number `n`, having resolved the controlling type variable to
    /// `ctrl_type`.
    pub fn result_type(self, n: usize, ctrl_type: Type) -> Type {
-        assert!(n < self.fixed_results(), "Invalid result index");
+        debug_assert!(n < self.fixed_results(), "Invalid result index");
        if let ResolvedConstraint::Bound(t) =
            OPERAND_CONSTRAINTS[self.constraint_offset() + n].resolve(ctrl_type)
        {
@@ -577,7 +577,7 @@ impl OpcodeConstraints {
    /// Unlike results, it is possible for some input values to vary freely within a specific
    /// `ValueTypeSet`. This is represented with the `ArgumentConstraint::Free` variant.
    pub fn value_argument_constraint(self, n: usize, ctrl_type: Type) -> ResolvedConstraint {
-        assert!(
+        debug_assert!(
            n < self.fixed_value_arguments(),
            "Invalid value argument index"
        );
--- a/lib/cretonne/src/ir/layout.rs
+++ b/lib/cretonne/src/ir/layout.rs
@@ -88,7 +88,7 @@ const LOCAL_LIMIT: SequenceNumber = 100 * MINOR_STRIDE;
 // Compute the midpoint between `a` and `b`.
 // Return `None` if the midpoint would be equal to either.
 fn midpoint(a: SequenceNumber, b: SequenceNumber) -> Option<SequenceNumber> {
-    assert!(a < b);
+    debug_assert!(a < b);
    // Avoid integer overflow.
    let m = a + (b - a) / 2;
    if m > a { Some(m) } else { None }
@@ -148,7 +148,7 @@ impl Layout {
    /// Assign a valid sequence number to `ebb` such that the numbers are still monotonic. This may
    /// require renumbering.
    fn assign_ebb_seq(&mut self, ebb: Ebb) {
-        assert!(self.is_ebb_inserted(ebb));
+        debug_assert!(self.is_ebb_inserted(ebb));

        // Get the sequence number immediately before `ebb`, or 0.
        let prev_seq = self.ebbs[ebb]
@@ -334,13 +334,13 @@ impl Layout {

    /// Insert `ebb` as the last EBB in the layout.
    pub fn append_ebb(&mut self, ebb: Ebb) {
-        assert!(
+        debug_assert!(
            !self.is_ebb_inserted(ebb),
            "Cannot append EBB that is already in the layout"
        );
        {
            let node = &mut self.ebbs[ebb];
-            assert!(node.first_inst.is_none() && node.last_inst.is_none());
+            debug_assert!(node.first_inst.is_none() && node.last_inst.is_none());
            node.prev = self.last_ebb.into();
            node.next = None.into();
        }
@@ -355,11 +355,11 @@ impl Layout {

    /// Insert `ebb` in the layout before the existing EBB `before`.
    pub fn insert_ebb(&mut self, ebb: Ebb, before: Ebb) {
-        assert!(
+        debug_assert!(
            !self.is_ebb_inserted(ebb),
            "Cannot insert EBB that is already in the layout"
        );
-        assert!(
+        debug_assert!(
            self.is_ebb_inserted(before),
            "EBB Insertion point not in the layout"
        );
@@ -379,11 +379,11 @@ impl Layout {

    /// Insert `ebb` in the layout *after* the existing EBB `after`.
    pub fn insert_ebb_after(&mut self, ebb: Ebb, after: Ebb) {
-        assert!(
+        debug_assert!(
            !self.is_ebb_inserted(ebb),
            "Cannot insert EBB that is already in the layout"
        );
-        assert!(
+        debug_assert!(
            self.is_ebb_inserted(after),
            "EBB Insertion point not in the layout"
        );
@@ -403,8 +403,8 @@ impl Layout {

    /// Remove `ebb` from the layout.
    pub fn remove_ebb(&mut self, ebb: Ebb) {
-        assert!(self.is_ebb_inserted(ebb), "EBB not in the layout");
-        assert!(self.first_inst(ebb).is_none(), "EBB must be empty.");
+        debug_assert!(self.is_ebb_inserted(ebb), "EBB not in the layout");
+        debug_assert!(self.first_inst(ebb).is_none(), "EBB must be empty.");

        // Clear the `ebb` node and extract links.
        let prev;
@@ -521,8 +521,8 @@ impl Layout {

    /// Append `inst` to the end of `ebb`.
    pub fn append_inst(&mut self, inst: Inst, ebb: Ebb) {
-        assert_eq!(self.inst_ebb(inst), None);
-        assert!(
+        debug_assert_eq!(self.inst_ebb(inst), None);
+        debug_assert!(
            self.is_ebb_inserted(ebb),
            "Cannot append instructions to EBB not in layout"
        );
@@ -532,7 +532,7 @@ impl Layout {
                let inst_node = &mut self.insts[inst];
                inst_node.ebb = ebb.into();
                inst_node.prev = ebb_node.last_inst;
-                assert!(inst_node.next.is_none());
+                debug_assert!(inst_node.next.is_none());
            }
            if ebb_node.first_inst.is_none() {
                ebb_node.first_inst = inst.into();
@@ -566,7 +566,7 @@ impl Layout {

    /// Insert `inst` before the instruction `before` in the same EBB.
    pub fn insert_inst(&mut self, inst: Inst, before: Inst) {
-        assert_eq!(self.inst_ebb(inst), None);
+        debug_assert_eq!(self.inst_ebb(inst), None);
        let ebb = self.inst_ebb(before).expect(
            "Instruction before insertion point not in the layout",
        );
@@ -645,7 +645,7 @@ impl Layout {
        let old_ebb = self.inst_ebb(before).expect(
            "The `before` instruction must be in the layout",
        );
-        assert!(!self.is_ebb_inserted(new_ebb));
+        debug_assert!(!self.is_ebb_inserted(new_ebb));

        // Insert new_ebb after old_ebb.
        let next_ebb = self.ebbs[old_ebb].next;
--- a/lib/cretonne/src/ir/progpoint.rs
+++ b/lib/cretonne/src/ir/progpoint.rs
@@ -19,7 +19,7 @@ pub struct ProgramPoint(u32);
 impl From<Inst> for ProgramPoint {
    fn from(inst: Inst) -> ProgramPoint {
        let idx = inst.index();
-        assert!(idx < (u32::MAX / 2) as usize);
+        debug_assert!(idx < (u32::MAX / 2) as usize);
        ProgramPoint((idx * 2) as u32)
    }
 }
@@ -27,7 +27,7 @@ impl From<Inst> for ProgramPoint {
 impl From<Ebb> for ProgramPoint {
    fn from(ebb: Ebb) -> ProgramPoint {
        let idx = ebb.index();
-        assert!(idx < (u32::MAX / 2) as usize);
+        debug_assert!(idx < (u32::MAX / 2) as usize);
        ProgramPoint((idx * 2 + 1) as u32)
    }
 }
--- a/lib/cretonne/src/ir/stackslot.rs
+++ b/lib/cretonne/src/ir/stackslot.rs
@@ -41,9 +41,9 @@ pub enum StackSlotKind {
    /// A spill slot. This is a stack slot created by the register allocator.
    SpillSlot,

-    /// A local variable. This is a chunk of local stack memory for use by the `stack_load` and
-    /// `stack_store` instructions.
-    Local,
+    /// An explicit stack slot. This is a chunk of stack memory for use by the `stack_load`
+    /// and `stack_store` instructions.
+    ExplicitSlot,

    /// An incoming function argument.
    ///
@@ -72,7 +72,7 @@ impl FromStr for StackSlotKind {
    fn from_str(s: &str) -> Result<StackSlotKind, ()> {
        use self::StackSlotKind::*;
        match s {
-            "local" => Ok(Local),
+            "explicit_slot" => Ok(ExplicitSlot),
            "spill_slot" => Ok(SpillSlot),
            "incoming_arg" => Ok(IncomingArg),
            "outgoing_arg" => Ok(OutgoingArg),
@@ -86,7 +86,7 @@ impl fmt::Display for StackSlotKind {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        use self::StackSlotKind::*;
        f.write_str(match *self {
-            Local => "local",
+            ExplicitSlot => "explicit_slot",
            SpillSlot => "spill_slot",
            IncomingArg => "incoming_arg",
            OutgoingArg => "outgoing_arg",
@@ -112,7 +112,7 @@ pub struct StackSlotData {
    ///
    /// For `OutgoingArg` stack slots, the offset is relative to the current function's stack
    /// pointer immediately before the call.
-    pub offset: StackOffset,
+    pub offset: Option<StackOffset>,
 }

 impl StackSlotData {
@@ -121,7 +121,7 @@ impl StackSlotData {
        StackSlotData {
            kind,
            size,
-            offset: 0,
+            offset: None,
        }
    }

@@ -139,8 +139,8 @@ impl StackSlotData {
 impl fmt::Display for StackSlotData {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{} {}", self.kind, self.size)?;
-        if self.offset != 0 {
-            write!(f, ", offset {}", self.offset)?;
+        if let Some(offset) = self.offset {
+            write!(f, ", offset {}", offset)?;
        }
        Ok(())
    }
@@ -205,7 +205,7 @@ impl StackSlots {

    /// Set the offset of a stack slot.
    pub fn set_offset(&mut self, ss: StackSlot, offset: StackOffset) {
-        self.slots[ss].offset = offset;
+        self.slots[ss].offset = Some(offset);
    }

    /// Get an iterator over all the stack slot keys.
@@ -245,8 +245,8 @@ impl StackSlots {
    /// Create a stack slot representing an incoming function argument.
    pub fn make_incoming_arg(&mut self, ty: Type, offset: StackOffset) -> StackSlot {
        let mut data = StackSlotData::new(StackSlotKind::IncomingArg, ty.bytes());
-        assert!(offset <= StackOffset::max_value() - data.size as StackOffset);
-        data.offset = offset;
+        debug_assert!(offset <= StackOffset::max_value() - data.size as StackOffset);
+        data.offset = Some(offset);
        self.push(data)
    }

@@ -262,7 +262,7 @@ impl StackSlots {

        // Look for an existing outgoing stack slot with the same offset and size.
        let inspos = match self.outgoing.binary_search_by_key(&(offset, size), |&ss| {
-            (self[ss].offset, self[ss].size)
+            (self[ss].offset.unwrap(), self[ss].size)
        }) {
            Ok(idx) => return self.outgoing[idx],
            Err(idx) => idx,
@@ -270,8 +270,8 @@ impl StackSlots {

        // No existing slot found. Make one and insert it into `outgoing`.
        let mut data = StackSlotData::new(StackSlotKind::OutgoingArg, size);
-        assert!(offset <= StackOffset::max_value() - size as StackOffset);
-        data.offset = offset;
+        debug_assert!(offset <= StackOffset::max_value() - size as StackOffset);
+        data.offset = Some(offset);
        let ss = self.slots.push(data);
        self.outgoing.insert(inspos, ss);
        ss
@@ -346,13 +346,13 @@ mod tests {
        let ss1 = sss.get_outgoing_arg(types::I32, 4);
        let ss2 = sss.get_outgoing_arg(types::I64, 8);

-        assert_eq!(sss[ss0].offset, 8);
+        assert_eq!(sss[ss0].offset, Some(8));
        assert_eq!(sss[ss0].size, 4);

-        assert_eq!(sss[ss1].offset, 4);
+        assert_eq!(sss[ss1].offset, Some(4));
        assert_eq!(sss[ss1].size, 4);

-        assert_eq!(sss[ss2].offset, 8);
+        assert_eq!(sss[ss2].offset, Some(8));
        assert_eq!(sss[ss2].size, 8);

        assert_eq!(sss.get_outgoing_arg(types::I32, 8), ss0);
@@ -368,7 +368,7 @@ mod tests {
        assert_eq!(slot.alignment(8), 8);
        assert_eq!(slot.alignment(16), 8);

-        let slot2 = StackSlotData::new(StackSlotKind::Local, 24);
+        let slot2 = StackSlotData::new(StackSlotKind::ExplicitSlot, 24);

        assert_eq!(slot2.alignment(4), 4);
        assert_eq!(slot2.alignment(8), 8);
--- a/lib/cretonne/src/isa/arm32/settings.rs
+++ b/lib/cretonne/src/isa/arm32/settings.rs
@@ -5,5 +5,5 @@ use std::fmt;

 // Include code generated by `lib/cretonne/meta/gen_settings.py`. This file contains a public
 // `Flags` struct with an impl for all of the settings defined in
-// `lib/cretonne/meta/cretonne/settings.py`.
+// `lib/cretonne/meta/isa/arm32/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings-arm32.rs"));
--- a/lib/cretonne/src/isa/arm64/settings.rs
+++ b/lib/cretonne/src/isa/arm64/settings.rs
@@ -5,5 +5,5 @@ use std::fmt;

 // Include code generated by `lib/cretonne/meta/gen_settings.py`. This file contains a public
 // `Flags` struct with an impl for all of the settings defined in
-// `lib/cretonne/meta/cretonne/settings.py`.
+// `lib/cretonne/meta/isa/arm64/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));
--- a/lib/cretonne/src/isa/constraints.rs
+++ b/lib/cretonne/src/isa/constraints.rs
@@ -13,6 +13,7 @@ use ir::{Function, ValueLoc, Inst};
 use regalloc::RegDiversions;

 /// Register constraint for a single value operand or instruction result.
+#[derive(PartialEq, Debug)]
 pub struct OperandConstraint {
    /// The kind of constraint.
    pub kind: ConstraintKind,
@@ -53,7 +54,7 @@ impl OperandConstraint {
 }

 /// The different kinds of operand constraints.
-#[derive(Clone, Copy, PartialEq, Eq)]
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
 pub enum ConstraintKind {
    /// This operand or result must be a register from the given register class.
    Reg,
@@ -89,7 +90,7 @@ pub enum ConstraintKind {
 }

 /// Value operand constraints for an encoding recipe.
-#[derive(Clone)]
+#[derive(PartialEq, Clone)]
 pub struct RecipeConstraints {
    /// Constraints for the instruction's fixed value operands.
    ///
@@ -160,7 +161,7 @@ impl RecipeConstraints {
 /// - Intel uses the address of the instruction following the branch, `origin = 2` for a 2-byte
 ///   branch instruction.
 /// - ARM's A32 encoding uses the address of the branch instruction + 8 bytes, `origin = 8`.
-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Debug)]
 pub struct BranchRange {
    /// Offset in bytes from the address of the branch instruction to the origin used for computing
    /// the branch displacement. This is the destination of a branch that encodes a 0 displacement.
--- a/lib/cretonne/src/isa/enc_tables.rs
+++ b/lib/cretonne/src/isa/enc_tables.rs
@@ -225,7 +225,7 @@ impl<'a> Encodings<'a> {
        self.legalize_actions[self.legalize as usize]
    }

-    /// Check if the `rpred` recipe predicate s satisfied.
+    /// Check if the `rpred` recipe predicate is satisfied.
    fn check_recipe(&self, rpred: RecipePredicate) -> bool {
        match rpred {
            Some(p) => p(self.isa_preds, self.inst),
--- a/lib/cretonne/src/isa/intel/abi.rs
+++ b/lib/cretonne/src/isa/intel/abi.rs
@@ -107,7 +107,7 @@ impl ArgAssigner for Args {
        // Assign a stack location.
        let loc = ArgumentLoc::Stack(self.offset as i32);
        self.offset += self.pointer_bytes;
-        assert!(self.offset <= i32::MAX as u32);
+        debug_assert!(self.offset <= i32::MAX as u32);
        loc.into()
    }
 }
@@ -180,15 +180,13 @@ pub fn spiderwasm_prologue_epilogue(
    func: &mut ir::Function,
    isa: &TargetIsa,
 ) -> result::CtonResult {
-    let (word_size, stack_align) = if isa.flags().is_64bit() {
-        (8, 16)
-    } else {
-        (4, 4)
-    };
+    // Spiderwasm on 32-bit x86 always aligns its stack pointer to 16 bytes.
+    let stack_align = 16;
+    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
    let bytes = StackSize::from(isa.flags().spiderwasm_prologue_words()) * word_size;

    let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
-    ss.offset = -(bytes as StackOffset);
+    ss.offset = Some(-(bytes as StackOffset));
    func.stack_slots.push(ss);

    layout_stack(&mut func.stack_slots, stack_align)?;
@@ -197,11 +195,10 @@ pub fn spiderwasm_prologue_epilogue(

 /// Insert a System V-compatible prologue and epilogue.
 pub fn native_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
-    let (word_size, stack_align) = if isa.flags().is_64bit() {
-        (8, 16)
-    } else {
-        (4, 4)
-    };
+    // The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but
+    // newer versions use a 16-byte aligned stack pointer.
+    let stack_align = 16;
+    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
    let csr_type = if isa.flags().is_64bit() {
        ir::types::I64
    } else {
@@ -220,11 +217,11 @@ pub fn native_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> res
    func.create_stack_slot(ir::StackSlotData {
        kind: ir::StackSlotKind::IncomingArg,
        size: csr_stack_size as u32,
-        offset: -csr_stack_size,
+        offset: Some(-csr_stack_size),
    });

    let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
-    let local_stack_size = (total_stack_size - csr_stack_size) as i64;
+    let local_stack_size = i64::from(total_stack_size - csr_stack_size);

    // Add CSRs to function signature
    let fp_arg = ir::AbiParam::special_reg(
--- a/lib/cretonne/src/isa/intel/enc_tables.rs
+++ b/lib/cretonne/src/isa/intel/enc_tables.rs
@@ -1,5 +1,6 @@
 //! Encoding tables for Intel ISAs.

+use bitset::BitSet;
 use cursor::{Cursor, FuncCursor};
 use flowgraph::ControlFlowGraph;
 use ir::{self, InstBuilder};
@@ -375,13 +376,22 @@ fn expand_fcvt_to_sint(
    let mut overflow_cc = FloatCC::LessThan;
    let output_bits = ty.lane_bits();
    let flimit = match xty {
-        ir::types::F32 => pos.ins().f32const(Ieee32::pow2(output_bits - 1).neg()),
+        // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
+        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+        ir::types::F32 => {
+            pos.ins().f32const(if output_bits < 32 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee32::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee32::pow2(output_bits - 1).neg()
+            })
+        }
        ir::types::F64 => {
            // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
            // there are values less than -2^(N-1) that convert correctly to INT_MIN.
            pos.ins().f64const(if output_bits < 64 {
                overflow_cc = FloatCC::LessThanOrEqual;
-                Ieee64::with_float(-((1u64 << (output_bits - 1)) as f64) - 1.0)
+                Ieee64::fcvt_to_sint_negative_overflow(output_bits)
            } else {
                Ieee64::pow2(output_bits - 1).neg()
            })
@@ -393,8 +403,8 @@ fn expand_fcvt_to_sint(

    // Finally, we could have a positive value that is too large.
    let fzero = match xty {
-        ir::types::F32 => pos.ins().f32const(Ieee32::with_float(0.0)),
-        ir::types::F64 => pos.ins().f64const(Ieee64::with_float(0.0)),
+        ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
+        ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
        _ => panic!("Can't convert {}", xty),
    };
    let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
--- a/lib/cretonne/src/isa/intel/settings.rs
+++ b/lib/cretonne/src/isa/intel/settings.rs
@@ -5,7 +5,7 @@ use std::fmt;

 // Include code generated by `lib/cretonne/meta/gen_settings.py`. This file contains a public
 // `Flags` struct with an impl for all of the settings defined in
-// `lib/cretonne/meta/cretonne/settings.py`.
+// `lib/cretonne/meta/isa/intel/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings-intel.rs"));

 #[cfg(test)]
--- a/lib/cretonne/src/isa/mod.rs
+++ b/lib/cretonne/src/isa/mod.rs
@@ -252,7 +252,7 @@ pub trait TargetIsa: fmt::Display {
        if func.signature.call_conv == ir::CallConv::SpiderWASM {
            let bytes = StackSize::from(self.flags().spiderwasm_prologue_words()) * word_size;
            let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
-            ss.offset = -(bytes as StackOffset);
+            ss.offset = Some(-(bytes as StackOffset));
            func.stack_slots.push(ss);
        }

--- a/lib/cretonne/src/isa/riscv/abi.rs
+++ b/lib/cretonne/src/isa/riscv/abi.rs
@@ -80,7 +80,7 @@ impl ArgAssigner for Args {
            // Assign a stack location.
            let loc = ArgumentLoc::Stack(self.offset as i32);
            self.offset += self.pointer_bytes;
-            assert!(self.offset <= i32::MAX as u32);
+            debug_assert!(self.offset <= i32::MAX as u32);
            loc.into()
        }
    }
--- a/lib/cretonne/src/isa/riscv/binemit.rs
+++ b/lib/cretonne/src/isa/riscv/binemit.rs
@@ -106,7 +106,7 @@ fn put_i<CS: CodeSink + ?Sized>(bits: u16, rs1: RegUnit, imm: i64, rd: RegUnit,
 ///
 /// Encoding bits: `opcode[6:2] | (funct3 << 5)`
 fn put_u<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) {
-    let bits = bits as u32;
+    let bits = u32::from(bits);
    let opcode5 = bits & 0x1f;
    let rd = u32::from(rd) & 0x1f;

@@ -133,7 +133,7 @@ fn put_sb<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rs1: RegUnit, rs2: RegUnit
    let rs1 = u32::from(rs1) & 0x1f;
    let rs2 = u32::from(rs2) & 0x1f;

-    assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm);
+    debug_assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm);
    let imm = imm as u32;

    // 0-6: opcode
@@ -164,7 +164,7 @@ fn put_uj<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS
    let opcode5 = bits & 0x1f;
    let rd = u32::from(rd) & 0x1f;

-    assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm);
+    debug_assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm);
    let imm = imm as u32;

    // 0-6: opcode
--- a/lib/cretonne/src/isa/riscv/registers.py
+++ b/lib/cretonne/src/isa/riscv/registers.py
@@ -1 +0,0 @@
-
--- a/lib/cretonne/src/isa/riscv/settings.rs
+++ b/lib/cretonne/src/isa/riscv/settings.rs
@@ -5,7 +5,7 @@ use std::fmt;

 // Include code generated by `lib/cretonne/meta/gen_settings.py`. This file contains a public
 // `Flags` struct with an impl for all of the settings defined in
-// `lib/cretonne/meta/cretonne/settings.py`.
+// `lib/cretonne/meta/isa/riscv/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings-riscv.rs"));

 #[cfg(test)]
--- a/lib/cretonne/src/isa/stack.rs
+++ b/lib/cretonne/src/isa/stack.rs
@@ -41,12 +41,12 @@ impl StackRef {
        let slot = &frame[ss];
        let offset = if slot.kind == StackSlotKind::OutgoingArg {
            // Outgoing argument slots have offsets relative to our stack pointer.
-            slot.offset
+            slot.offset.unwrap()
        } else {
            // All other slots have offsets relative to our caller's stack frame.
            // Offset where SP is pointing. (All ISAs have stacks growing downwards.)
            let sp_offset = -(size as StackOffset);
-            slot.offset - sp_offset
+            slot.offset.unwrap() - sp_offset
        };
        StackRef {
            base: StackBase::SP,
--- a/lib/cretonne/src/legalizer/boundary.rs
+++ b/lib/cretonne/src/legalizer/boundary.rs
@@ -86,15 +86,15 @@ fn legalize_entry_params(func: &mut Function, entry: Ebb) {
                ArgumentPurpose::FramePointer => {}
                ArgumentPurpose::CalleeSaved => {}
                ArgumentPurpose::StructReturn => {
-                    assert!(!has_sret, "Multiple sret arguments found");
+                    debug_assert!(!has_sret, "Multiple sret arguments found");
                    has_sret = true;
                }
                ArgumentPurpose::VMContext => {
-                    assert!(!has_vmctx, "Multiple vmctx arguments found");
+                    debug_assert!(!has_vmctx, "Multiple vmctx arguments found");
                    has_vmctx = true;
                }
                ArgumentPurpose::SignatureId => {
-                    assert!(!has_sigid, "Multiple sigid arguments found");
+                    debug_assert!(!has_sigid, "Multiple sigid arguments found");
                    has_sigid = true;
                }
                _ => panic!("Unexpected special-purpose arg {}", abi_type),
@@ -104,7 +104,7 @@ fn legalize_entry_params(func: &mut Function, entry: Ebb) {
            // Compute the value we want for `arg` from the legalized ABI parameters.
            let mut get_arg = |func: &mut Function, ty| {
                let abi_type = func.signature.params[abi_arg];
-                assert_eq!(
+                debug_assert_eq!(
                    abi_type.purpose,
                    ArgumentPurpose::Normal,
                    "Can't legalize special-purpose argument"
@@ -119,7 +119,7 @@ fn legalize_entry_params(func: &mut Function, entry: Ebb) {
            let converted = convert_from_abi(&mut pos, arg_type, Some(arg), &mut get_arg);
            // The old `arg` is no longer an attached EBB argument, but there are probably still
            // uses of the value.
-            assert_eq!(pos.func.dfg.resolve_aliases(arg), converted);
+            debug_assert_eq!(pos.func.dfg.resolve_aliases(arg), converted);
        }
    }

@@ -139,19 +139,19 @@ fn legalize_entry_params(func: &mut Function, entry: Ebb) {
            }
            // These can be meaningfully added by `legalize_signature()`.
            ArgumentPurpose::Link => {
-                assert!(!has_link, "Multiple link parameters found");
+                debug_assert!(!has_link, "Multiple link parameters found");
                has_link = true;
            }
            ArgumentPurpose::StructReturn => {
-                assert!(!has_sret, "Multiple sret parameters found");
+                debug_assert!(!has_sret, "Multiple sret parameters found");
                has_sret = true;
            }
            ArgumentPurpose::VMContext => {
-                assert!(!has_vmctx, "Multiple vmctx parameters found");
+                debug_assert!(!has_vmctx, "Multiple vmctx parameters found");
                has_vmctx = true;
            }
            ArgumentPurpose::SignatureId => {
-                assert!(!has_sigid, "Multiple sigid parameters found");
+                debug_assert!(!has_sigid, "Multiple sigid parameters found");
                has_sigid = true;
            }
        }
@@ -181,7 +181,7 @@ where
    // We theoretically allow for call instructions that return a number of fixed results before
    // the call return values. In practice, it doesn't happen.
    let fixed_results = pos.func.dfg[call].opcode().constraints().fixed_results();
-    assert_eq!(fixed_results, 0, "Fixed results  on calls not supported");
+    debug_assert_eq!(fixed_results, 0, "Fixed results on calls not supported");

    let results = pos.func.dfg.detach_results(call);
    let mut next_res = 0;
@@ -210,7 +210,7 @@ where
                }
            };
            let v = convert_from_abi(pos, res_type, Some(res), &mut get_res);
-            assert_eq!(pos.func.dfg.resolve_aliases(res), v);
+            debug_assert_eq!(pos.func.dfg.resolve_aliases(res), v);
        }
    }

@@ -239,7 +239,7 @@ where
    let arg_type = match get_arg(pos.func, ty) {
        Ok(v) => {
            debug_assert_eq!(pos.func.dfg.value_type(v), ty);
-            assert_eq!(into_result, None);
+            debug_assert_eq!(into_result, None);
            return v;
        }
        Err(t) => t,
@@ -275,7 +275,7 @@ where
        }
        // Construct a `ty` by bit-casting from an integer type.
        ValueConversion::IntBits => {
-            assert!(!ty.is_int());
+            debug_assert!(!ty.is_int());
            let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
            let arg = convert_from_abi(pos, abi_ty, None, get_arg);
            pos.ins().with_results([into_result]).bitcast(ty, arg)
@@ -341,7 +341,7 @@ fn convert_to_abi<PutArg>(
            convert_to_abi(pos, cfg, hi, put_arg);
        }
        ValueConversion::IntBits => {
-            assert!(!ty.is_int());
+            debug_assert!(!ty.is_int());
            let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
            let arg = pos.ins().bitcast(abi_ty, value);
            convert_to_abi(pos, cfg, arg, put_arg);
@@ -556,7 +556,7 @@ pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph
    legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
        func.signature.returns[abi_arg]
    });
-    assert_eq!(pos.func.dfg.inst_variable_args(inst).len(), abi_args);
+    debug_assert_eq!(pos.func.dfg.inst_variable_args(inst).len(), abi_args);

    // Append special return arguments for any `sret`, `link`, and `vmctx` return values added to
    // the legalized signature. These values should simply be propagated from the entry block
--- a/lib/cretonne/src/legalizer/globalvar.rs
+++ b/lib/cretonne/src/legalizer/globalvar.rs
@@ -18,7 +18,7 @@ pub fn expand_global_addr(
    // Unpack the instruction.
    let gv = match func.dfg[inst] {
        ir::InstructionData::UnaryGlobalVar { opcode, global_var } => {
-            assert_eq!(opcode, ir::Opcode::GlobalAddr);
+            debug_assert_eq!(opcode, ir::Opcode::GlobalAddr);
            global_var
        }
        _ => panic!("Wanted global_addr: {}", func.dfg.display_inst(inst, None)),
--- a/lib/cretonne/src/legalizer/heap.rs
+++ b/lib/cretonne/src/legalizer/heap.rs
@@ -24,7 +24,7 @@ pub fn expand_heap_addr(
            arg,
            imm,
        } => {
-            assert_eq!(opcode, ir::Opcode::HeapAddr);
+            debug_assert_eq!(opcode, ir::Opcode::HeapAddr);
            (heap, arg, imm.into())
        }
        _ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst, None)),
--- a/lib/cretonne/src/legalizer/mod.rs
+++ b/lib/cretonne/src/legalizer/mod.rs
@@ -107,7 +107,7 @@ pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, is
 }

 // Include legalization patterns that were generated by `gen_legalizer.py` from the `XForms` in
-// `meta/cretonne/legalize.py`.
+// `lib/cretonne/meta/base/legalize.py`.
 //
 // Concretely, this defines private functions `narrow()`, and `expand()`.
 include!(concat!(env!("OUT_DIR"), "/legalizer.rs"));
@@ -248,7 +248,7 @@ fn expand_fconst(
    _isa: &TargetIsa,
 ) {
    let ty = func.dfg.value_type(func.dfg.first_result(inst));
-    assert!(!ty.is_vector(), "Only scalar fconst supported: {}", ty);
+    debug_assert!(!ty.is_vector(), "Only scalar fconst supported: {}", ty);

    // In the future, we may want to generate constant pool entries for these constants, but for
    // now use an `iconst` and a bit cast.
--- a/lib/cretonne/src/legalizer/split.rs
+++ b/lib/cretonne/src/legalizer/split.rs
@@ -128,7 +128,7 @@ fn split_any(
    while let Some(repair) = repairs.pop() {
        for (_, inst) in cfg.pred_iter(repair.ebb) {
            let branch_opc = pos.func.dfg[inst].opcode();
-            assert!(
+            debug_assert!(
                branch_opc.is_branch(),
                "Predecessor not a branch: {}",
                pos.func.dfg.display_inst(inst, None)
@@ -199,7 +199,7 @@ fn split_value(
            // This is an instruction result. See if the value was created by a `concat`
            // instruction.
            if let InstructionData::Binary { opcode, args, .. } = pos.func.dfg[inst] {
-                assert_eq!(num, 0);
+                debug_assert_eq!(num, 0);
                if opcode == concat {
                    reuse = Some((args[0], args[1]));
                }
--- a/lib/cretonne/src/lib.rs
+++ b/lib/cretonne/src/lib.rs
@@ -1,5 +1,8 @@
 //! Cretonne code generation library.
-#![deny(missing_docs)]
+
+#![deny(missing_docs,
+        trivial_numeric_casts,
+        unused_extern_crates)]

 // Turns on alloc feature if no_std
 #![cfg_attr(not(feature = "std"), no_std)]
@@ -46,11 +49,13 @@ mod abi;
 mod bitset;
 mod constant_hash;
 mod context;
+mod divconst_magic_numbers;
 mod iterators;
 mod legalizer;
 mod licm;
 mod partition_slice;
 mod predicates;
+mod preopt;
 mod ref_slice;
 mod regalloc;
 mod scoped_hash_map;
--- a/lib/cretonne/src/predicates.rs
+++ b/lib/cretonne/src/predicates.rs
@@ -1,7 +1,7 @@
 //! Predicate functions for testing instruction fields.
 //!
 //! This module defines functions that are used by the instruction predicates defined by
-//! `lib/cretonne/meta/cretonne/predicates.py` classes.
+//! `lib/cretonne/meta/cdsl/predicates.py` classes.
 //!
 //! The predicates the operate on integer fields use `Into<i64>` as a shared trait bound. This
 //! bound is implemented by all the native integer types as well as `Imm64`.
--- a/lib/cretonne/src/preopt.rs
+++ b/lib/cretonne/src/preopt.rs
@@ -0,0 +1,521 @@
+//! A pre-legalization rewriting pass.
+
+#![allow(non_snake_case)]
+
+use cursor::{Cursor, FuncCursor};
+use ir::dfg::ValueDef;
+use ir::{Function, InstructionData, Value, DataFlowGraph, InstBuilder, Type};
+use ir::Inst;
+use ir::types::{I32, I64};
+use ir::instructions::Opcode;
+use divconst_magic_numbers::{MU32, MU64, MS32, MS64};
+use divconst_magic_numbers::{magicU32, magicU64, magicS32, magicS64};
+use timing;
+
+
+//----------------------------------------------------------------------
+//
+// Pattern-match helpers and transformation for div and rem by constants.
+
+// Simple math helpers
+
+// if `x` is a power of two, or the negation thereof, return the power along
+// with a boolean that indicates whether `x` is negative. Else return None.
+#[inline]
+fn isPowerOf2_S32(x: i32) -> Option<(bool, u32)> {
+    // We have to special-case this because abs(x) isn't representable.
+    if x == -0x8000_0000 {
+        return Some((true, 31));
+    }
+    let abs_x = i32::wrapping_abs(x) as u32;
+    if abs_x.is_power_of_two() {
+        return Some((x < 0, abs_x.trailing_zeros()));
+    }
+    None
+}
+
+// Same comments as for isPowerOf2_S64 apply.
+#[inline]
+fn isPowerOf2_S64(x: i64) -> Option<(bool, u32)> {
+    // We have to special-case this because abs(x) isn't representable.
+    if x == -0x8000_0000_0000_0000 {
+        return Some((true, 63));
+    }
+    let abs_x = i64::wrapping_abs(x) as u64;
+    if abs_x.is_power_of_two() {
+        return Some((x < 0, abs_x.trailing_zeros()));
+    }
+    None
+}
+
+#[derive(Debug)]
+enum DivRemByConstInfo {
+    DivU32(Value, u32), // In all cases, the arguments are:
+    DivU64(Value, u64), // left operand, right operand
+    DivS32(Value, i32),
+    DivS64(Value, i64),
+    RemU32(Value, u32),
+    RemU64(Value, u64),
+    RemS32(Value, i32),
+    RemS64(Value, i64),
+}
+
+// Possibly create a DivRemByConstInfo from the given components, by
+// figuring out which, if any, of the 8 cases apply, and also taking care to
+// sanity-check the immediate.
+fn package_up_divrem_info(
+    argL: Value,
+    argL_ty: Type,
+    argRs: i64,
+    isSigned: bool,
+    isRem: bool,
+) -> Option<DivRemByConstInfo> {
+    let argRu: u64 = argRs as u64;
+    if !isSigned && argL_ty == I32 && argRu < 0x1_0000_0000 {
+        let con = if isRem {
+            DivRemByConstInfo::RemU32
+        } else {
+            DivRemByConstInfo::DivU32
+        };
+        return Some(con(argL, argRu as u32));
+    }
+    if !isSigned && argL_ty == I64 {
+        // unsigned 64, no range constraint
+        let con = if isRem {
+            DivRemByConstInfo::RemU64
+        } else {
+            DivRemByConstInfo::DivU64
+        };
+        return Some(con(argL, argRu));
+    }
+    if isSigned && argL_ty == I32 && (argRu <= 0x7fff_ffff || argRu >= 0xffff_ffff_8000_0000) {
+        let con = if isRem {
+            DivRemByConstInfo::RemS32
+        } else {
+            DivRemByConstInfo::DivS32
+        };
+        return Some(con(argL, argRu as i32));
+    }
+    if isSigned && argL_ty == I64 {
+        // signed 64, no range constraint
+        let con = if isRem {
+            DivRemByConstInfo::RemS64
+        } else {
+            DivRemByConstInfo::DivS64
+        };
+        return Some(con(argL, argRu as i64));
+    }
+    None
+}
+
+// Examine `idata` to see if it is a div or rem by a constant, and if so
+// return the operands, signedness, operation size and div-vs-rem-ness in a
+// handy bundle.
+fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
+    let idata: &InstructionData = &dfg[inst];
+
+    if let &InstructionData::BinaryImm { opcode, arg, imm } = idata {
+        let (isSigned, isRem) = match opcode {
+            Opcode::UdivImm => (false, false),
+            Opcode::UremImm => (false, true),
+            Opcode::SdivImm => (true, false),
+            Opcode::SremImm => (true, true),
+            _other => return None,
+        };
+        // Pull the operation size (type) from the left arg
+        let argL_ty = dfg.value_type(arg);
+        return package_up_divrem_info(arg, argL_ty, imm.into(), isSigned, isRem);
+    }
+
+    // TODO: should we actually bother to do this (that is, manually match
+    // the case that the second argument is an iconst)? Or should we assume
+    // that some previous constant propagation pass has pushed all such
+    // immediates to their use points, creating BinaryImm instructions
+    // instead? For now we take the conservative approach.
+    if let &InstructionData::Binary { opcode, args } = idata {
+        let (isSigned, isRem) = match opcode {
+            Opcode::Udiv => (false, false),
+            Opcode::Urem => (false, true),
+            Opcode::Sdiv => (true, false),
+            Opcode::Srem => (true, true),
+            _other => return None,
+        };
+        let argR: Value = args[1];
+        if let Some(simm64) = get_const(argR, dfg) {
+            let argL: Value = args[0];
+            // Pull the operation size (type) from the left arg
+            let argL_ty = dfg.value_type(argL);
+            return package_up_divrem_info(argL, argL_ty, simm64, isSigned, isRem);
+        }
+    }
+
+    None
+}
+
+// Actually do the transformation given a bundle containing the relevant
+// information. `divrem_info` describes a div or rem by a constant, that
+// `pos` currently points at, and `inst` is the associated instruction.
+// `inst` is replaced by a sequence of other operations that calculate the
+// same result. Note that there are various `divrem_info` cases where we
+// cannot do any transformation, in which case `inst` is left unchanged.
+fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCursor, inst: Inst) {
+    let isRem = match *divrem_info {
+        DivRemByConstInfo::DivU32(_, _) |
+        DivRemByConstInfo::DivU64(_, _) |
+        DivRemByConstInfo::DivS32(_, _) |
+        DivRemByConstInfo::DivS64(_, _) => false,
+        DivRemByConstInfo::RemU32(_, _) |
+        DivRemByConstInfo::RemU64(_, _) |
+        DivRemByConstInfo::RemS32(_, _) |
+        DivRemByConstInfo::RemS64(_, _) => true,
+    };
+
+    match divrem_info {
+
+        // -------------------- U32 --------------------
+
+        // U32 div, rem by zero: ignore
+        &DivRemByConstInfo::DivU32(_n1, 0) |
+        &DivRemByConstInfo::RemU32(_n1, 0) => {}
+
+        // U32 div by 1: identity
+        // U32 rem by 1: zero
+        &DivRemByConstInfo::DivU32(n1, 1) |
+        &DivRemByConstInfo::RemU32(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I32, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        // U32 div, rem by a power-of-2
+        &DivRemByConstInfo::DivU32(n1, d) |
+        &DivRemByConstInfo::RemU32(n1, d) if d.is_power_of_two() => {
+            debug_assert!(d >= 2);
+            // compute k where d == 2^k
+            let k = d.trailing_zeros();
+            debug_assert!(k >= 1 && k <= 31);
+            if isRem {
+                let mask = (1u64 << k) - 1;
+                pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
+            } else {
+                pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
+            }
+        }
+
+        // U32 div, rem by non-power-of-2
+        &DivRemByConstInfo::DivU32(n1, d) |
+        &DivRemByConstInfo::RemU32(n1, d) => {
+            debug_assert!(d >= 3);
+            let MU32 {
+                mulBy,
+                doAdd,
+                shiftBy,
+            } = magicU32(d);
+            let qf; // final quotient
+            let q0 = pos.ins().iconst(I32, mulBy as i64);
+            let q1 = pos.ins().umulhi(n1, q0);
+            if doAdd {
+                debug_assert!(shiftBy >= 1 && shiftBy <= 32);
+                let t1 = pos.ins().isub(n1, q1);
+                let t2 = pos.ins().ushr_imm(t1, 1);
+                let t3 = pos.ins().iadd(t2, q1);
+                // I never found any case where shiftBy == 1 here.
+                // So there's no attempt to fold out a zero shift.
+                debug_assert!(shiftBy != 1);
+                qf = pos.ins().ushr_imm(t3, (shiftBy - 1) as i64);
+            } else {
+                debug_assert!(shiftBy >= 0 && shiftBy <= 31);
+                // Whereas there are known cases here for shiftBy == 0.
+                if shiftBy > 0 {
+                    qf = pos.ins().ushr_imm(q1, shiftBy as i64);
+                } else {
+                    qf = q1;
+                }
+            }
+            // Now qf holds the final quotient. If necessary calculate the
+            // remainder instead.
+            if isRem {
+                let tt = pos.ins().imul_imm(qf, d as i64);
+                pos.func.dfg.replace(inst).isub(n1, tt);
+            } else {
+                pos.func.dfg.replace(inst).copy(qf);
+            }
+        }
+
+        // -------------------- U64 --------------------
+
+        // U64 div, rem by zero: ignore
+        &DivRemByConstInfo::DivU64(_n1, 0) |
+        &DivRemByConstInfo::RemU64(_n1, 0) => {}
+
+        // U64 div by 1: identity
+        // U64 rem by 1: zero
+        &DivRemByConstInfo::DivU64(n1, 1) |
+        &DivRemByConstInfo::RemU64(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I64, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        // U64 div, rem by a power-of-2
+        &DivRemByConstInfo::DivU64(n1, d) |
+        &DivRemByConstInfo::RemU64(n1, d) if d.is_power_of_two() => {
+            debug_assert!(d >= 2);
+            // compute k where d == 2^k
+            let k = d.trailing_zeros();
+            debug_assert!(k >= 1 && k <= 63);
+            if isRem {
+                let mask = (1u64 << k) - 1;
+                pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
+            } else {
+                pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
+            }
+        }
+
+        // U64 div, rem by non-power-of-2
+        &DivRemByConstInfo::DivU64(n1, d) |
+        &DivRemByConstInfo::RemU64(n1, d) => {
+            debug_assert!(d >= 3);
+            let MU64 {
+                mulBy,
+                doAdd,
+                shiftBy,
+            } = magicU64(d);
+            let qf; // final quotient
+            let q0 = pos.ins().iconst(I64, mulBy as i64);
+            let q1 = pos.ins().umulhi(n1, q0);
+            if doAdd {
+                debug_assert!(shiftBy >= 1 && shiftBy <= 64);
+                let t1 = pos.ins().isub(n1, q1);
+                let t2 = pos.ins().ushr_imm(t1, 1);
+                let t3 = pos.ins().iadd(t2, q1);
+                // I never found any case where shiftBy == 1 here.
+                // So there's no attempt to fold out a zero shift.
+                debug_assert!(shiftBy != 1);
+                qf = pos.ins().ushr_imm(t3, (shiftBy - 1) as i64);
+            } else {
+                debug_assert!(shiftBy >= 0 && shiftBy <= 63);
+                // Whereas there are known cases here for shiftBy == 0.
+                if shiftBy > 0 {
+                    qf = pos.ins().ushr_imm(q1, shiftBy as i64);
+                } else {
+                    qf = q1;
+                }
+            }
+            // Now qf holds the final quotient. If necessary calculate the
+            // remainder instead.
+            if isRem {
+                let tt = pos.ins().imul_imm(qf, d as i64);
+                pos.func.dfg.replace(inst).isub(n1, tt);
+            } else {
+                pos.func.dfg.replace(inst).copy(qf);
+            }
+        }
+
+        // -------------------- S32 --------------------
+
+        // S32 div, rem by zero or -1: ignore
+        &DivRemByConstInfo::DivS32(_n1, -1) |
+        &DivRemByConstInfo::RemS32(_n1, -1) |
+        &DivRemByConstInfo::DivS32(_n1, 0) |
+        &DivRemByConstInfo::RemS32(_n1, 0) => {}
+
+        // S32 div by 1: identity
+        // S32 rem by 1: zero
+        &DivRemByConstInfo::DivS32(n1, 1) |
+        &DivRemByConstInfo::RemS32(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I32, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        &DivRemByConstInfo::DivS32(n1, d) |
+        &DivRemByConstInfo::RemS32(n1, d) => {
+            if let Some((isNeg, k)) = isPowerOf2_S32(d) {
+                // k can be 31 only in the case that d is -2^31.
+                debug_assert!(k >= 1 && k <= 31);
+                let t1 = if k - 1 == 0 {
+                    n1
+                } else {
+                    pos.ins().sshr_imm(n1, (k - 1) as i64)
+                };
+                let t2 = pos.ins().ushr_imm(t1, (32 - k) as i64);
+                let t3 = pos.ins().iadd(n1, t2);
+                if isRem {
+                    // S32 rem by a power-of-2
+                    let t4 = pos.ins().band_imm(t3, i32::wrapping_neg(1 << k) as i64);
+                    // Curiously, we don't care here what the sign of d is.
+                    pos.func.dfg.replace(inst).isub(n1, t4);
+                } else {
+                    // S32 div by a power-of-2
+                    let t4 = pos.ins().sshr_imm(t3, k as i64);
+                    if isNeg {
+                        pos.func.dfg.replace(inst).irsub_imm(t4, 0);
+                    } else {
+                        pos.func.dfg.replace(inst).copy(t4);
+                    }
+                }
+            } else {
+                // S32 div, rem by a non-power-of-2
+                debug_assert!(d < -2 || d > 2);
+                let MS32 { mulBy, shiftBy } = magicS32(d);
+                let q0 = pos.ins().iconst(I32, mulBy as i64);
+                let q1 = pos.ins().smulhi(n1, q0);
+                let q2 = if d > 0 && mulBy < 0 {
+                    pos.ins().iadd(q1, n1)
+                } else if d < 0 && mulBy > 0 {
+                    pos.ins().isub(q1, n1)
+                } else {
+                    q1
+                };
+                debug_assert!(shiftBy >= 0 && shiftBy <= 31);
+                let q3 = if shiftBy == 0 {
+                    q2
+                } else {
+                    pos.ins().sshr_imm(q2, shiftBy as i64)
+                };
+                let t1 = pos.ins().ushr_imm(q3, 31);
+                let qf = pos.ins().iadd(q3, t1);
+                // Now qf holds the final quotient. If necessary calculate
+                // the remainder instead.
+                if isRem {
+                    let tt = pos.ins().imul_imm(qf, d as i64);
+                    pos.func.dfg.replace(inst).isub(n1, tt);
+                } else {
+                    pos.func.dfg.replace(inst).copy(qf);
+                }
+            }
+        }
+
+        // -------------------- S64 --------------------
+
+        // S64 div, rem by zero or -1: ignore
+        &DivRemByConstInfo::DivS64(_n1, -1) |
+        &DivRemByConstInfo::RemS64(_n1, -1) |
+        &DivRemByConstInfo::DivS64(_n1, 0) |
+        &DivRemByConstInfo::RemS64(_n1, 0) => {}
+
+        // S64 div by 1: identity
+        // S64 rem by 1: zero
+        &DivRemByConstInfo::DivS64(n1, 1) |
+        &DivRemByConstInfo::RemS64(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I64, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        &DivRemByConstInfo::DivS64(n1, d) |
+        &DivRemByConstInfo::RemS64(n1, d) => {
+            if let Some((isNeg, k)) = isPowerOf2_S64(d) {
+                // k can be 63 only in the case that d is -2^63.
+                debug_assert!(k >= 1 && k <= 63);
+                let t1 = if k - 1 == 0 {
+                    n1
+                } else {
+                    pos.ins().sshr_imm(n1, (k - 1) as i64)
+                };
+                let t2 = pos.ins().ushr_imm(t1, (64 - k) as i64);
+                let t3 = pos.ins().iadd(n1, t2);
+                if isRem {
+                    // S64 rem by a power-of-2
+                    let t4 = pos.ins().band_imm(t3, i64::wrapping_neg(1 << k));
+                    // Curiously, we don't care here what the sign of d is.
+                    pos.func.dfg.replace(inst).isub(n1, t4);
+                } else {
+                    // S64 div by a power-of-2
+                    let t4 = pos.ins().sshr_imm(t3, k as i64);
+                    if isNeg {
+                        pos.func.dfg.replace(inst).irsub_imm(t4, 0);
+                    } else {
+                        pos.func.dfg.replace(inst).copy(t4);
+                    }
+                }
+            } else {
+                // S64 div, rem by a non-power-of-2
+                debug_assert!(d < -2 || d > 2);
+                let MS64 { mulBy, shiftBy } = magicS64(d);
+                let q0 = pos.ins().iconst(I64, mulBy);
+                let q1 = pos.ins().smulhi(n1, q0);
+                let q2 = if d > 0 && mulBy < 0 {
+                    pos.ins().iadd(q1, n1)
+                } else if d < 0 && mulBy > 0 {
+                    pos.ins().isub(q1, n1)
+                } else {
+                    q1
+                };
+                debug_assert!(shiftBy >= 0 && shiftBy <= 63);
+                let q3 = if shiftBy == 0 {
+                    q2
+                } else {
+                    pos.ins().sshr_imm(q2, shiftBy as i64)
+                };
+                let t1 = pos.ins().ushr_imm(q3, 63);
+                let qf = pos.ins().iadd(q3, t1);
+                // Now qf holds the final quotient. If necessary calculate
+                // the remainder instead.
+                if isRem {
+                    let tt = pos.ins().imul_imm(qf, d);
+                    pos.func.dfg.replace(inst).isub(n1, tt);
+                } else {
+                    pos.func.dfg.replace(inst).copy(qf);
+                }
+            }
+        }
+
+    }
+}
+
+
+//----------------------------------------------------------------------
+//
+// General pattern-match helpers.
+
+// Find out if `value` actually resolves to a constant, and if so what its
+// value is.
+fn get_const(value: Value, dfg: &DataFlowGraph) -> Option<i64> {
+    match dfg.value_def(value) {
+        ValueDef::Result(definingInst, resultNo) => {
+            let definingIData: &InstructionData = &dfg[definingInst];
+            if let &InstructionData::UnaryImm { opcode, imm } = definingIData {
+                if opcode == Opcode::Iconst && resultNo == 0 {
+                    return Some(imm.into());
+                }
+            }
+            None
+        }
+        ValueDef::Param(_definingEbb, _paramNo) => None,
+    }
+}
+
+
+//----------------------------------------------------------------------
+//
+// The main pre-opt pass.
+
+pub fn do_preopt(func: &mut Function) {
+    let _tt = timing::preopt();
+    let mut pos = FuncCursor::new(func);
+    while let Some(_ebb) = pos.next_ebb() {
+
+        while let Some(inst) = pos.next_inst() {
+
+            //-- BEGIN -- division by constants ----------------
+
+            let mb_dri = get_div_info(inst, &pos.func.dfg);
+            if let Some(divrem_info) = mb_dri {
+                do_divrem_transformation(&divrem_info, &mut pos, inst);
+                continue;
+            }
+
+            //-- END -- division by constants ------------------
+        }
+    }
+}
--- a/lib/cretonne/src/regalloc/affinity.rs
+++ b/lib/cretonne/src/regalloc/affinity.rs
@@ -13,7 +13,7 @@ use ir::{AbiParam, ArgumentLoc};
 use isa::{TargetIsa, RegInfo, RegClassIndex, OperandConstraint, ConstraintKind};

 /// Preferred register allocation for an SSA value.
-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Debug)]
 pub enum Affinity {
    /// No affinity.
    ///
--- a/lib/cretonne/src/regalloc/coalescing.rs
+++ b/lib/cretonne/src/regalloc/coalescing.rs
@@ -1,9 +1,9 @@
-//! Constructing conventional SSA form.
+//! Constructing Conventional SSA form.
 //!
-//! Conventional SSA form is a subset of SSA form where any (transitively) phi-related values do
-//! not interfere. We construct CSSA by building virtual registers that are as large as possible
-//! and inserting copies where necessary such that all argument values passed to an EBB parameter
-//! will belong to the same virtual register as the EBB parameter value itself.
+//! Conventional SSA (CSSA) form is a subset of SSA form where any (transitively) phi-related
+//! values do not interfere. We construct CSSA by building virtual registers that are as large as
+//! possible and inserting copies where necessary such that all argument values passed to an EBB
+//! parameter will belong to the same virtual register as the EBB parameter value itself.

 use cursor::{Cursor, EncCursor};
 use dbg::DisplayList;
@@ -27,7 +27,7 @@ use timing;
 // The coalescing algorithm implemented follows this paper fairly closely:
 //
 //     Budimlic, Z., Cooper, K. D., Harvey, T. J., et al. (2002). Fast copy coalescing and
-//     live-range identification (Vol. 37, pp. 25–32). ACM. http://doi.org/10.1145/543552.512534
+//     live-range identification (Vol. 37, pp. 25–32). ACM. https://doi.org/10.1145/543552.512534
 //
 // We use a more efficient dominator forest representation (a linear stack) described here:
 //
@@ -104,7 +104,7 @@ impl Coalescing {
        self.backedges.clear();
    }

-    /// Convert `func` to conventional SSA form and build virtual registers in the process.
+    /// Convert `func` to Conventional SSA form and build virtual registers in the process.
    pub fn conventional_ssa(
        &mut self,
        isa: &TargetIsa,
@@ -239,7 +239,7 @@ impl<'a> Context<'a> {
                // 1. It is defined in a dominating EBB and live-in to `ebb`.
                // 2. If is itself a parameter value for `ebb`. This case should already have been
                //    eliminated by `isolate_conflicting_params()`.
-                assert!(
+                debug_assert!(
                    lr.def() != ebb.into(),
                    "{} parameter {} was missed by isolate_conflicting_params()",
                    ebb,
@@ -495,8 +495,8 @@ impl<'a> Context<'a> {
        // Second everything else in reverse layout order. Again, short forward branches get merged
        // first. There can also be backwards branches mixed in here, though, as long as they are
        // not loop backedges.
-        assert!(self.predecessors.is_empty());
-        assert!(self.backedges.is_empty());
+        debug_assert!(self.predecessors.is_empty());
+        debug_assert!(self.backedges.is_empty());
        for (pred_ebb, pred_inst) in self.cfg.pred_iter(ebb) {
            if self.preorder.dominates(ebb, pred_ebb) {
                self.backedges.push(pred_inst);
@@ -958,7 +958,8 @@ impl VirtualCopies {

    /// Indicate that `param` is now fully merged.
    pub fn merged_param(&mut self, param: Value, func: &Function) {
-        assert_eq!(self.params.pop(), Some(param));
+        let popped = self.params.pop();
+        debug_assert_eq!(popped, Some(param));

        // The domtree pre-order in `self.params` guarantees that all parameters defined at the
        // same EBB will be adjacent. This means we can see when all parameters at an EBB have been
--- a/lib/cretonne/src/regalloc/coloring.rs
+++ b/lib/cretonne/src/regalloc/coloring.rs
@@ -23,7 +23,7 @@
 //!    operands are allowed to read spilled values, but each such instance must be counted as using
 //!    a register.
 //!
-//! 5. The code must be in conventional SSA form. Among other things, this means that values passed
+//! 5. The code must be in Conventional SSA form. Among other things, this means that values passed
 //!    as arguments when branching to an EBB must belong to the same virtual register as the
 //!    corresponding EBB argument value.
 //!
@@ -246,7 +246,7 @@ impl<'a> Context<'a> {
    /// Return the set of remaining allocatable registers after filtering out the dead arguments.
    fn color_entry_params(&mut self, args: &[LiveValue]) -> AvailableRegs {
        let sig = &self.cur.func.signature;
-        assert_eq!(sig.params.len(), args.len());
+        debug_assert_eq!(sig.params.len(), args.len());

        let mut regs = AvailableRegs::new(&self.usable_regs);

@@ -271,7 +271,7 @@ impl<'a> Context<'a> {

                }
                // The spiller will have assigned an incoming stack slot already.
-                Affinity::Stack => assert!(abi.location.is_stack()),
+                Affinity::Stack => debug_assert!(abi.location.is_stack()),
                // This is a ghost value, unused in the function. Don't assign it to a location
                // either.
                Affinity::None => {}
@@ -340,7 +340,7 @@ impl<'a> Context<'a> {
            } else {
                // This is a multi-way branch like `br_table`. We only support arguments on
                // single-destination branches.
-                assert_eq!(
+                debug_assert_eq!(
                    self.cur.func.dfg.inst_variable_args(inst).len(),
                    0,
                    "Can't handle EBB arguments: {}",
@@ -586,7 +586,7 @@ impl<'a> Context<'a> {
        // Now handle the EBB arguments.
        let br_args = self.cur.func.dfg.inst_variable_args(inst);
        let dest_args = self.cur.func.dfg.ebb_params(dest);
-        assert_eq!(br_args.len(), dest_args.len());
+        debug_assert_eq!(br_args.len(), dest_args.len());
        for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) {
            // The first time we encounter a branch to `dest`, we get to pick the location. The
            // following times we see a branch to `dest`, we must follow suit.
@@ -631,7 +631,7 @@ impl<'a> Context<'a> {
    fn color_ebb_params(&mut self, inst: Inst, dest: Ebb) {
        let br_args = self.cur.func.dfg.inst_variable_args(inst);
        let dest_args = self.cur.func.dfg.ebb_params(dest);
-        assert_eq!(br_args.len(), dest_args.len());
+        debug_assert_eq!(br_args.len(), dest_args.len());
        for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) {
            match self.cur.func.locations[dest_arg] {
                ValueLoc::Unassigned => {
@@ -741,7 +741,7 @@ impl<'a> Context<'a> {
        // It's technically possible for a call instruction to have fixed results before the
        // variable list of results, but we have no known instances of that.
        // Just assume all results are variable return values.
-        assert_eq!(defs.len(), self.cur.func.dfg.signatures[sig].returns.len());
+        debug_assert_eq!(defs.len(), self.cur.func.dfg.signatures[sig].returns.len());
        for (i, lv) in defs.iter().enumerate() {
            let abi = self.cur.func.dfg.signatures[sig].returns[i];
            if let ArgumentLoc::Reg(reg) = abi.location {
@@ -787,7 +787,7 @@ impl<'a> Context<'a> {
            }

            let ok = self.solver.add_fixed_output(rc, reg);
-            assert!(ok, "Couldn't clear fixed output interference for {}", value);
+            debug_assert!(ok, "Couldn't clear fixed output interference for {}", value);
        }
        self.cur.func.locations[value] = ValueLoc::Reg(reg);
    }
@@ -858,11 +858,8 @@ impl<'a> Context<'a> {
                Ok(regs) => return regs,
                Err(SolverError::Divert(rc)) => {
                    // Do we have any live-through `rc` registers that are not already variables?
-                    assert!(
-                        self.try_add_var(rc, throughs),
-                        "Ran out of registers in {}",
-                        rc
-                    );
+                    let added = self.try_add_var(rc, throughs);
+                    debug_assert!(added, "Ran out of registers in {}", rc);
                }
                Err(SolverError::Global(value)) => {
                    dbg!("Not enough global registers for {}, trying as local", value);
@@ -908,7 +905,7 @@ impl<'a> Context<'a> {

        let inst = self.cur.current_inst().expect("Not on an instruction");
        let ctx = self.liveness.context(&self.cur.func.layout);
-        match self.cur.func.dfg[inst].analyze_branch(&self.cur.func.dfg.value_lists) {
+        match self.cur.func.dfg.analyze_branch(inst) {
            NotABranch => false,
            SingleDest(ebb, _) => {
                let lr = &self.liveness[value];
@@ -941,7 +938,7 @@ impl<'a> Context<'a> {
        // It is very unlikely (impossible?) that we would need more than one spill per top-level
        // register class, so avoid allocation by using a fixed array here.
        let mut slot = [PackedOption::default(); 8];
-        assert!(spills <= slot.len(), "Too many spills ({})", spills);
+        debug_assert!(spills <= slot.len(), "Too many spills ({})", spills);

        for m in self.solver.moves() {
            match *m {
--- a/lib/cretonne/src/regalloc/context.rs
+++ b/lib/cretonne/src/regalloc/context.rs
@@ -90,7 +90,7 @@ impl Context {
            verify_liveness(isa, func, cfg, &self.liveness)?;
        }

-        // Pass: Coalesce and create conventional SSA form.
+        // Pass: Coalesce and create Conventional SSA form.
        self.coalescing.conventional_ssa(
            isa,
            func,
--- a/lib/cretonne/src/regalloc/live_value_tracker.rs
+++ b/lib/cretonne/src/regalloc/live_value_tracker.rs
@@ -208,7 +208,7 @@ impl LiveValueTracker {
        let first_arg = self.live.values.len();
        for &value in dfg.ebb_params(ebb) {
            let lr = &liveness[value];
-            assert_eq!(lr.def(), ebb.into());
+            debug_assert_eq!(lr.def(), ebb.into());
            match lr.def_local_end().into() {
                ExpandedProgramPoint::Inst(endpoint) => {
                    self.live.push(value, endpoint, lr);
@@ -216,7 +216,7 @@ impl LiveValueTracker {
                ExpandedProgramPoint::Ebb(local_ebb) => {
                    // This is a dead EBB parameter which is not even live into the first
                    // instruction in the EBB.
-                    assert_eq!(
+                    debug_assert_eq!(
                        local_ebb,
                        ebb,
                        "EBB parameter live range ends at wrong EBB header"
@@ -261,7 +261,7 @@ impl LiveValueTracker {
    ) -> (&[LiveValue], &[LiveValue], &[LiveValue]) {
        // Save a copy of the live values before any branches or jumps that could be somebody's
        // immediate dominator.
-        match dfg[inst].analyze_branch(&dfg.value_lists) {
+        match dfg.analyze_branch(inst) {
            BranchInfo::NotABranch => {}
            _ => self.save_idom_live_set(inst),
        }
@@ -274,7 +274,7 @@ impl LiveValueTracker {
        let first_def = self.live.values.len();
        for &value in dfg.inst_results(inst) {
            let lr = &liveness[value];
-            assert_eq!(lr.def(), inst.into());
+            debug_assert_eq!(lr.def(), inst.into());
            match lr.def_local_end().into() {
                ExpandedProgramPoint::Inst(endpoint) => {
                    self.live.push(value, endpoint, lr);
--- a/lib/cretonne/src/regalloc/liveness.rs
+++ b/lib/cretonne/src/regalloc/liveness.rs
@@ -252,7 +252,7 @@ fn extend_to_use(
    forest: &mut LiveRangeForest,
 ) {
    // This is our scratch working space, and we'll leave it empty when we return.
-    assert!(worklist.is_empty());
+    debug_assert!(worklist.is_empty());

    // Extend the range locally in `ebb`.
    // If there already was a live interval in that block, we're done.
@@ -339,7 +339,7 @@ impl Liveness {
        let old = self.ranges.insert(
            LiveRange::new(value, def.into(), affinity),
        );
-        assert!(old.is_none(), "{} already has a live range", value);
+        debug_assert!(old.is_none(), "{} already has a live range", value);
    }

    /// Move the definition of `value` to `def`.
@@ -368,7 +368,7 @@ impl Liveness {
        debug_assert_eq!(Some(ebb), layout.inst_ebb(user));
        let lr = self.ranges.get_mut(value).expect("Value has no live range");
        let livein = lr.extend_in_ebb(ebb, user, layout, &mut self.forest);
-        assert!(!livein, "{} should already be live in {}", value, ebb);
+        debug_assert!(!livein, "{} should already be live in {}", value, ebb);
        &mut lr.affinity
    }

--- a/lib/cretonne/src/regalloc/liverange.rs
+++ b/lib/cretonne/src/regalloc/liverange.rs
@@ -253,7 +253,7 @@ impl<PO: ProgramOrder> GenLiveRange<PO> {
            order.cmp(to, self.def_begin) != Ordering::Less
        {
            let to_pp = to.into();
-            assert_ne!(
+            debug_assert_ne!(
                to_pp,
                self.def_begin,
                "Can't use value in the defining instruction."
--- a/lib/cretonne/src/regalloc/reload.rs
+++ b/lib/cretonne/src/regalloc/reload.rs
@@ -146,7 +146,7 @@ impl<'a> Context<'a> {
        );

        if self.cur.func.layout.entry_block() == Some(ebb) {
-            assert_eq!(liveins.len(), 0);
+            debug_assert_eq!(liveins.len(), 0);
            self.visit_entry_params(ebb, args);
        } else {
            self.visit_ebb_params(ebb, args);
@@ -156,7 +156,7 @@ impl<'a> Context<'a> {
    /// Visit the parameters on the entry block.
    /// These values have ABI constraints from the function signature.
    fn visit_entry_params(&mut self, ebb: Ebb, args: &[LiveValue]) {
-        assert_eq!(self.cur.func.signature.params.len(), args.len());
+        debug_assert_eq!(self.cur.func.signature.params.len(), args.len());
        self.cur.goto_first_inst(ebb);

        for (arg_idx, arg) in args.iter().enumerate() {
@@ -176,7 +176,7 @@ impl<'a> Context<'a> {
                    }
                }
                ArgumentLoc::Stack(_) => {
-                    assert!(arg.affinity.is_stack());
+                    debug_assert!(arg.affinity.is_stack());
                }
                ArgumentLoc::Unassigned => panic!("Unexpected ABI location"),
            }
@@ -204,7 +204,7 @@ impl<'a> Context<'a> {
        );

        // Identify reload candidates.
-        assert!(self.candidates.is_empty());
+        debug_assert!(self.candidates.is_empty());
        self.find_candidates(inst, constraints);

        // Insert fill instructions before `inst` and replace `cand.value` with the filled value.
@@ -299,7 +299,7 @@ impl<'a> Context<'a> {
        }
    }

-    // Find reload candidates for `inst` and add them to `self.condidates`.
+    // Find reload candidates for `inst` and add them to `self.candidates`.
    //
    // These are uses of spilled values where the operand constraint requires a register.
    fn find_candidates(&mut self, inst: Inst, constraints: &RecipeConstraints) {
@@ -376,7 +376,7 @@ fn handle_abi_args(
    isa: &TargetIsa,
    liveness: &Liveness,
 ) {
-    assert_eq!(abi_types.len(), var_args.len());
+    debug_assert_eq!(abi_types.len(), var_args.len());
    for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) {
        if abi.location.is_reg() {
            let lv = liveness.get(arg).expect("Missing live range for ABI arg");
--- a/lib/cretonne/src/regalloc/solver.rs
+++ b/lib/cretonne/src/regalloc/solver.rs
@@ -566,7 +566,7 @@ impl Solver {
                dbg!("-> converting variable {} to a fixed constraint", v);
                // The spiller is responsible for ensuring that all constraints on the uses of a
                // value are compatible.
-                assert!(
+                debug_assert!(
                    v.constraint.contains(to),
                    "Incompatible constraints for {}",
                    value
@@ -666,7 +666,7 @@ impl Solver {
            // No variable, then it must be a fixed reassignment.
            if let Some(a) = self.assignments.get(value) {
                dbg!("-> already fixed assignment {}", a);
-                assert!(
+                debug_assert!(
                    constraint.contains(a.to),
                    "Incompatible constraints for {}",
                    value
@@ -709,7 +709,7 @@ impl Solver {
    /// Call this method to indicate that there will be no more fixed input reassignments added
    /// and prepare for the output side constraints.
    pub fn inputs_done(&mut self) {
-        assert!(!self.has_fixed_input_conflicts());
+        debug_assert!(!self.has_fixed_input_conflicts());

        // At this point, `regs_out` contains the `to` side of the input reassignments, and the
        // `from` side has already been marked as available in `regs_in`.
@@ -747,7 +747,7 @@ impl Solver {
        // interference constraints on the output side.
        // Variables representing tied operands will get their `is_output` flag set again later.
        if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) {
-            assert!(v.is_input);
+            debug_assert!(v.is_input);
            v.is_output = false;
            return;
        }
@@ -783,7 +783,7 @@ impl Solver {

        // Check if a variable was created.
        if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) {
-            assert!(v.is_input);
+            debug_assert!(v.is_input);
            v.is_output = true;
            v.is_global = is_global;
            return None;
@@ -1027,7 +1027,7 @@ impl Solver {
    /// Returns the number of spills that had to be emitted.
    pub fn schedule_moves(&mut self, regs: &AllocatableSet) -> usize {
        self.collect_moves();
-        assert!(self.fills.is_empty());
+        debug_assert!(self.fills.is_empty());

        let mut num_spill_slots = 0;
        let mut avail = regs.clone();
--- a/lib/cretonne/src/regalloc/spilling.rs
+++ b/lib/cretonne/src/regalloc/spilling.rs
@@ -243,7 +243,7 @@ impl<'a> Context<'a> {
        debug_assert_eq!(self.cur.current_ebb(), Some(ebb));

        // We may need to resolve register constraints if there are any noteworthy uses.
-        assert!(self.reg_uses.is_empty());
+        debug_assert!(self.reg_uses.is_empty());
        self.collect_reg_uses(inst, ebb, constraints);

        // Calls usually have fixed register uses.
--- a/lib/cretonne/src/regalloc/virtregs.rs
+++ b/lib/cretonne/src/regalloc/virtregs.rs
@@ -141,7 +141,7 @@ impl VirtRegs {
        func: &Function,
        preorder: &DominatorTreePreorder,
    ) -> VirtReg {
-        assert_eq!(self.get(single), None, "Expected singleton {}", single);
+        debug_assert_eq!(self.get(single), None, "Expected singleton {}", single);

        // Make sure `big` has a vreg.
        let vreg = self.get(big).unwrap_or_else(|| {
@@ -209,7 +209,7 @@ impl VirtRegs {
            }
        }

-        assert_eq!(
+        debug_assert_eq!(
            values.len(),
            singletons + cleared,
            "Can't unify partial virtual registers"
--- a/Show More
+++ b/Show More