Merge remote-tracking branch 'origin/master' into no_std

2018-03-12 12:55:57 -07:00
parent 5ffdc51742 11eddafef8
commit 4a3077d638
138 changed files with 3795 additions and 1168 deletions
--- a/lib/cretonne/Cargo.toml
+++ b/lib/cretonne/Cargo.toml
@@ -1,12 +1,13 @@
 [package]
 authors = ["The Cretonne Project Developers"]
 name = "cretonne"
-version = "0.1.0"
+version = "0.3.4"
 description = "Low-level code generator library"
 license = "Apache-2.0"
 documentation = "https://cretonne.readthedocs.io/"
 repository = "https://github.com/Cretonne/cretonne"
 readme = "README.md"
+keywords = [ "compile", "compiler", "jit" ]
 build = "build.rs"

 [lib]
--- a/lib/cretonne/build.rs
+++ b/lib/cretonne/build.rs
@@ -46,7 +46,7 @@ fn main() {
    let cur_dir = env::current_dir().expect("Can't access current working directory");
    let crate_dir = cur_dir.as_path();

-    // Make sure we rebuild is this build script changes.
+    // Make sure we rebuild if this build script changes.
    // I guess that won't happen if you have non-UTF8 bytes in your path names.
    // The `build.py` script prints out its own dependencies.
    println!(
@@ -59,8 +59,11 @@ fn main() {
    let build_script = meta_dir.join("build.py");

    // Launch build script with Python. We'll just find python in the path.
+    // Use -B to disable .pyc files, because they cause trouble for vendoring
+    // scripts, and this is a build step that isn't run very often anyway.
    let status = process::Command::new("python")
        .current_dir(crate_dir)
+        .arg("-B")
        .arg(build_script)
        .arg("--out-dir")
        .arg(out_dir)
--- a/lib/cretonne/meta/base/instructions.py
+++ b/lib/cretonne/meta/base/instructions.py
@@ -833,6 +833,26 @@ imul = Instruction(
        """,
        ins=(x, y), outs=a)

+umulhi = Instruction(
+        'umulhi', r"""
+        Unsigned integer multiplication, producing the high half of a
+        double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        """,
+        ins=(x, y), outs=a)
+
+smulhi = Instruction(
+        'smulhi', """
+        Signed integer multiplication, producing the high half of a
+        double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        """,
+        ins=(x, y), outs=a)
+
 udiv = Instruction(
        'udiv', r"""
        Unsigned integer division: :math:`a := \lfloor {x \over y} \rfloor`.
@@ -1679,7 +1699,7 @@ fpromote = Instruction(
        This is an exact operation.

        Cretonne currently only supports two floating point formats
-        - :type:`f32` and :type:`f64`.  This may change in the future.
+        - :type:`f32` and :type:`f64`. This may change in the future.

        The result type must have the same number of vector lanes as the input,
        and the result lanes must not have fewer bits than the input lanes. If
@@ -1695,10 +1715,10 @@ fdemote = Instruction(
        by rounding to nearest, ties to even.

        Cretonne currently only supports two floating point formats
-        - :type:`f32` and :type:`f64`.  This may change in the future.
+        - :type:`f32` and :type:`f64`. This may change in the future.

        The result type must have the same number of vector lanes as the input,
-        and the result lanes must not have more bits than the input lanes.  If
+        and the result lanes must not have more bits than the input lanes. If
        the input and output types are the same, this is a no-op.
        """,
        ins=x, outs=a, constraints=WiderOrEq(Float, FloatTo))
--- a/lib/cretonne/meta/base/settings.py
+++ b/lib/cretonne/meta/base/settings.py
@@ -56,7 +56,11 @@ avoid_div_traps = BoolSetting(
 is_compressed = BoolSetting("Enable compressed instructions")

 enable_float = BoolSetting(
-        """Enable the use of floating-point instructions""",
+        """
+        Enable the use of floating-point instructions
+
+        Disabling use of floating-point instructions is not yet implemented.
+        """,
        default=True)

 enable_simd = BoolSetting(
--- a/lib/cretonne/meta/cdsl/test_ti.py
+++ b/lib/cretonne/meta/cdsl/test_ti.py
@@ -96,7 +96,7 @@ def check_concrete_typing_rtl(var_types, rtl):
    # type: (VarTyping, Rtl) -> None
    """
    Check that a concrete type assignment var_types (Dict[Var, TypeVar]) is
-    valid for an Rtl rtl.  Specifically check that:
+    valid for an Rtl rtl. Specifically check that:

    1) For each Var v \in rtl, v is defined in var_types

--- a/lib/cretonne/meta/cdsl/ti.py
+++ b/lib/cretonne/meta/cdsl/ti.py
@@ -322,7 +322,7 @@ class TypeEnv(object):
        # type: (TypeVar, TypeVar) -> None
        """
        Record a that the free tv1 is part of the same equivalence class as
-        tv2.  The canonical representative of the merged class is tv2's
+        tv2. The canonical representative of the merged class is tv2's
        cannonical representative.
        """
        assert not tv1.is_derived
@@ -364,9 +364,9 @@ class TypeEnv(object):
        # type: (TypeVar) -> int
        """
        Get the rank of tv in the partial order. TVs directly associated with a
-        Var get their rank from the Var (see register()).  Internally generated
+        Var get their rank from the Var (see register()). Internally generated
        non-derived TVs implicitly get the lowest rank (0). Derived variables
-        get their rank from their free typevar.  Singletons have the highest
+        get their rank from their free typevar. Singletons have the highest
        rank. TVs associated with vars in a source pattern have a higher rank
        than TVs associted with temporary vars.
        """
@@ -381,7 +381,7 @@ class TypeEnv(object):
    def register(self, v):
        # type: (Var) -> None
        """
-        Register a new Var v.  This computes a rank for the associated TypeVar
+        Register a new Var v. This computes a rank for the associated TypeVar
        for v, which is used to impose a partial order on type variables.
        """
        self.vars.add(v)
@@ -848,7 +848,7 @@ def ti_def(definition, typ):
 def ti_rtl(rtl, typ):
    # type: (Rtl, TypeEnv) -> TypingOrError
    """
-    Perform type inference on an Rtl in a starting type env typ.  Return an
+    Perform type inference on an Rtl in a starting type env typ. Return an
    updated type environment or error.
    """
    for (i, d) in enumerate(rtl.rtl):
@@ -866,7 +866,7 @@ def ti_rtl(rtl, typ):
 def ti_xform(xform, typ):
    # type: (XForm, TypeEnv) -> TypingOrError
    """
-    Perform type inference on an Rtl in a starting type env typ.  Return an
+    Perform type inference on an Rtl in a starting type env typ. Return an
    updated type environment or error.
    """
    typ_or_err = ti_rtl(xform.src, typ)
--- a/lib/cretonne/meta/cdsl/xform.py
+++ b/lib/cretonne/meta/cdsl/xform.py
@@ -113,8 +113,8 @@ class Rtl(object):
        # type: (Rtl) -> None
        """
        Given that there is only 1 possible concrete typing T for self, assign
-        a singleton TV with type t=T[v] for each Var v \in self.  Its an error
-        to call this on an Rtl with more than 1 possible typing.  This modifies
+        a singleton TV with type t=T[v] for each Var v \in self. Its an error
+        to call this on an Rtl with more than 1 possible typing. This modifies
        the Rtl in-place.
        """
        from .ti import ti_rtl, TypeEnv
--- a/lib/cretonne/meta/check.sh
+++ b/lib/cretonne/meta/check.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-set -e
+set -euo pipefail
 cd $(dirname "$0")

 runif() {
--- a/lib/cretonne/meta/gen_instr.py
+++ b/lib/cretonne/meta/gen_instr.py
@@ -211,7 +211,7 @@ def gen_instruction_data_impl(fmt):
                    if f.has_value_list:
                        fmt.line(n + ' { ref mut args, .. } => args,')
                fmt.line('_ => panic!("No value list: {:?}", self),')
-            fmt.line('assert!(args.is_empty(), "Value list already in use");')
+            fmt.line('debug_assert!(args.is_empty(), "Value list already in use");')
            fmt.line('*args = vlist;')


--- a/lib/cretonne/meta/gen_legalizer.py
+++ b/lib/cretonne/meta/gen_legalizer.py
@@ -211,7 +211,7 @@ def unwrap_inst(iref, node, fmt):
            fmt.format('let typeof_{0} = pos.func.dfg.value_type({0});', v)

    # If the node has results, detach the values.
-    # Place the values in  locals.
+    # Place the values in locals.
    replace_inst = False
    if len(node.defs) > 0:
        if node.defs == node.defs[0].dst_def.defs:
@@ -348,7 +348,8 @@ def gen_xform(xform, fmt, type_sets):
        # Delete the original instruction if we didn't have an opportunity to
        # replace it.
        if not replace_inst:
-            fmt.line('assert_eq!(pos.remove_inst(), inst);')
+            fmt.line('let removed = pos.remove_inst();')
+            fmt.line('debug_assert_eq!(removed, inst);')
        fmt.line('return true;')


--- a/lib/cretonne/meta/gen_settings.py
+++ b/lib/cretonne/meta/gen_settings.py
@@ -245,7 +245,7 @@ def gen_constructor(sgrp, parent, fmt):
                'pub fn new({}) -> Flags {{'.format(args), '}'):
            fmt.line('let bvec = builder.state_for("{}");'.format(sgrp.name))
            fmt.line('let mut bytes = [0; {}];'.format(sgrp.byte_size()))
-            fmt.line('assert_eq!(bvec.len(), {});'.format(sgrp.settings_size))
+            fmt.line('debug_assert_eq!(bvec.len(), {});'.format(sgrp.settings_size))
            with fmt.indented(
                    'for (i, b) in bvec.iter().enumerate() {', '}'):
                fmt.line('bytes[i] = *b;')
--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -120,6 +120,9 @@ enc_i32_i64(base.imul, r.rrx, 0x0f, 0xaf)
 enc_i32_i64(x86.sdivmodx, r.div, 0xf7, rrr=7)
 enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6)

+enc_i32_i64(x86.smulx, r.mulx, 0xf7, rrr=5)
+enc_i32_i64(x86.umulx, r.mulx, 0xf7, rrr=4)
+
 enc_i32_i64(base.copy, r.umr, 0x89)
 enc_both(base.copy.b1, r.umr, 0x89)
 enc_i32_i64(base.regmove, r.rmov, 0x89)
@@ -403,9 +406,55 @@ I64.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))

 # Numerical conversions.

-# Converting i64 to i32 is a no-op in 64-bit mode.
+# Reducing an integer is a no-op.
+I32.enc(base.ireduce.i8.i32, r.null, 0)
+I32.enc(base.ireduce.i16.i32, r.null, 0)
+I64.enc(base.ireduce.i8.i32, r.null, 0)
+I64.enc(base.ireduce.i16.i32, r.null, 0)
+I64.enc(base.ireduce.i8.i64, r.null, 0)
+I64.enc(base.ireduce.i16.i64, r.null, 0)
 I64.enc(base.ireduce.i32.i64, r.null, 0)
+
+# TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
+# instructions for %al/%ax/%eax to %ax/%eax/%rax.
+
+# movsbl
+I32.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
+I64.enc(base.sextend.i32.i8, *r.urm.rex(0x0f, 0xbe))
+I64.enc(base.sextend.i32.i8, *r.urm(0x0f, 0xbe))
+
+# movswl
+I32.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
+I64.enc(base.sextend.i32.i16, *r.urm.rex(0x0f, 0xbf))
+I64.enc(base.sextend.i32.i16, *r.urm(0x0f, 0xbf))
+
+# movsbq
+I64.enc(base.sextend.i64.i8, *r.urm.rex(0x0f, 0xbe, w=1))
+
+# movswq
+I64.enc(base.sextend.i64.i16, *r.urm.rex(0x0f, 0xbf, w=1))
+
+# movslq
 I64.enc(base.sextend.i64.i32, *r.urm.rex(0x63, w=1))
+
+# movzbl
+I32.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
+I64.enc(base.uextend.i32.i8, *r.urm.rex(0x0f, 0xb6))
+I64.enc(base.uextend.i32.i8, *r.urm(0x0f, 0xb6))
+
+# movzwl
+I32.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
+I64.enc(base.uextend.i32.i16, *r.urm.rex(0x0f, 0xb7))
+I64.enc(base.uextend.i32.i16, *r.urm(0x0f, 0xb7))
+
+# movzbq, encoded as movzbl because it's equivalent and shorter
+I64.enc(base.uextend.i64.i8, *r.urm.rex(0x0f, 0xb6))
+I64.enc(base.uextend.i64.i8, *r.urm(0x0f, 0xb6))
+
+# movzwq, encoded as movzwl because it's equivalent and shorter
+I64.enc(base.uextend.i64.i16, *r.urm.rex(0x0f, 0xb7))
+I64.enc(base.uextend.i64.i16, *r.urm(0x0f, 0xb7))
+
 # A 32-bit register copy clears the high 32 bits.
 I64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
 I64.enc(base.uextend.i64.i32, *r.umr(0x89))
--- a/lib/cretonne/meta/isa/intel/instructions.py
+++ b/lib/cretonne/meta/isa/intel/instructions.py
@@ -47,6 +47,28 @@ sdivmodx = Instruction(
        """,
        ins=(nlo, nhi, d), outs=(q, r), can_trap=True)

+argL = Operand('argL', iWord)
+argR = Operand('argR', iWord)
+resLo = Operand('resLo', iWord)
+resHi = Operand('resHi', iWord)
+
+umulx = Instruction(
+        'x86_umulx', r"""
+        Unsigned integer multiplication, producing a double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        """,
+        ins=(argL, argR), outs=(resLo, resHi))
+
+smulx = Instruction(
+        'x86_smulx', r"""
+        Signed integer multiplication, producing a double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        """,
+        ins=(argL, argR), outs=(resLo, resHi))

 Float = TypeVar(
        'Float', 'A scalar or vector floating point number',
@@ -132,7 +154,7 @@ rflags = Operand('rflags', iflags)
 bsr = Instruction(
    'x86_bsr', r"""
    Bit Scan Reverse -- returns the bit-index of the most significant 1
-    in the word.  Result is undefined if the argument is zero.  However, it
+    in the word. Result is undefined if the argument is zero. However, it
    sets the Z flag depending on the argument, so it is at least easy to
    detect and handle that case.

@@ -144,7 +166,7 @@ bsr = Instruction(
 bsf = Instruction(
    'x86_bsf', r"""
    Bit Scan Forwards -- returns the bit-index of the least significant 1
-    in the word.  Is otherwise identical to 'bsr', just above.
+    in the word. Is otherwise identical to 'bsr', just above.
    """,
    ins=x, outs=(y, rflags))

--- a/lib/cretonne/meta/isa/intel/legalize.py
+++ b/lib/cretonne/meta/isa/intel/legalize.py
@@ -37,6 +37,23 @@ intel_expand.custom_legalize(insts.srem, 'expand_sdivrem')
 intel_expand.custom_legalize(insts.udiv, 'expand_udivrem')
 intel_expand.custom_legalize(insts.urem, 'expand_udivrem')

+#
+# Double length (widening) multiplication
+#
+resLo = Var('resLo')
+resHi = Var('resHi')
+intel_expand.legalize(
+        resHi << insts.umulhi(x, y),
+        Rtl(
+            (resLo, resHi) << x86.umulx(x, y)
+        ))
+
+intel_expand.legalize(
+        resHi << insts.smulhi(x, y),
+        Rtl(
+            (resLo, resHi) << x86.smulx(x, y)
+        ))
+
 # Floating point condition codes.
 #
 # The 8 condition codes in `supported_floatccs` are directly supported by a
--- a/lib/cretonne/meta/isa/intel/recipes.py
+++ b/lib/cretonne/meta/isa/intel/recipes.py
@@ -453,6 +453,15 @@ div = TailRecipe(
        modrm_r_bits(in_reg2, bits, sink);
        ''')

+# XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo)
+mulx = TailRecipe(
+        'mulx', Binary, size=1,
+        ins=(GPR.rax, GPR), outs=(GPR.rax, GPR.rdx),
+        emit='''
+        PUT_OP(bits, rex1(in_reg1), sink);
+        modrm_r_bits(in_reg1, bits, sink);
+        ''')
+
 # XX /n ib with 8-bit immediate sign-extended.
 rib = TailRecipe(
        'rib', BinaryImm, size=2, ins=GPR, outs=0,
@@ -675,7 +684,7 @@ st_abcd = TailRecipe(

 # XX /r register-indirect store of FPR with no offset.
 fst = TailRecipe(
-        'fst', Store, size=1, ins=(FPR, GPR), outs=(),
+        'fst', Store, size=1, ins=(FPR, GPR_ZERO_DEREF_SAFE), outs=(),
        instp=IsEqual(Store.offset, 0),
        clobbers_flags=False,
        emit='''
--- a/lib/cretonne/meta/isa/intel/settings.py
+++ b/lib/cretonne/meta/isa/intel/settings.py
@@ -11,9 +11,6 @@ ISA.settings = SettingGroup('intel', parent=shared.group)

 # The has_* settings here correspond to CPUID bits.

-# CPUID.01H:EDX
-has_sse2 = BoolSetting("SSE2: CPUID.01H:EDX.SSE2[bit 26]")
-
 # CPUID.01H:ECX
 has_sse3 = BoolSetting("SSE3: CPUID.01H:ECX.SSE3[bit 0]")
 has_ssse3 = BoolSetting("SSSE3: CPUID.01H:ECX.SSSE3[bit 9]")
@@ -40,9 +37,9 @@ use_lzcnt = And(has_lzcnt)

 # Presets corresponding to Intel CPUs.

-baseline = Preset(has_sse2)
+baseline = Preset()
 nehalem = Preset(
-        has_sse2, has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt)
+        has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt)
 haswell = Preset(nehalem, has_bmi1, has_lzcnt)

 ISA.settings.close(globals())
--- a/lib/cretonne/meta/isa/riscv/init.py
+++ b/lib/cretonne/meta/isa/riscv/init.py
@@ -2,7 +2,7 @@
 RISC-V Target
 -------------

-`RISC-V <http://riscv.org/>`_ is an open instruction set architecture
+`RISC-V <https://riscv.org/>`_ is an open instruction set architecture
 originally developed at UC Berkeley. It is a RISC-style ISA with either a
 32-bit (RV32I) or 64-bit (RV32I) base instruction set and a number of optional
 extensions:
--- a/lib/cretonne/meta/semantics/init.py
+++ b/lib/cretonne/meta/semantics/init.py
@@ -17,7 +17,7 @@ def verify_semantics(inst, src, xforms):
    # type: (Instruction, Rtl, InstructionSemantics) -> None
    """
    Verify that the semantics transforms in xforms correctly describe the
-    instruction described by the src Rtl.  This involves checking that:
+    instruction described by the src Rtl. This involves checking that:
        0) src is a single instance of inst
        1) For all x\in xforms x.src is a single instance of inst
        2) For any concrete values V of Literals in inst:
--- a/lib/cretonne/src/abi.rs
+++ b/lib/cretonne/src/abi.rs
@@ -12,7 +12,7 @@ use std::vec::Vec;
 ///
 /// An argument may go through a sequence of legalization steps before it reaches the final
 /// `Assign` action.
-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Debug)]
 pub enum ArgAction {
    /// Assign the argument to the given location.
    Assign(ArgumentLoc),
@@ -151,7 +151,7 @@ pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
    match have_bits.cmp(&arg_bits) {
        // We have fewer bits than the ABI argument.
        Ordering::Less => {
-            assert!(
+            debug_assert!(
                have.is_int() && arg.value_type.is_int(),
                "Can only extend integer values"
            );
@@ -164,8 +164,8 @@ pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
        // We have the same number of bits as the argument.
        Ordering::Equal => {
            // This must be an integer vector that is split and then extended.
-            assert!(arg.value_type.is_int());
-            assert!(have.is_vector());
+            debug_assert!(arg.value_type.is_int());
+            debug_assert!(have.is_vector());
            ValueConversion::VectorSplit
        }
        // We have more bits than the argument.
--- a/lib/cretonne/src/bforest/node.rs
+++ b/lib/cretonne/src/bforest/node.rs
@@ -54,8 +54,8 @@ impl<F: Forest> Clone for NodeData<F> {
 impl<F: Forest> NodeData<F> {
    /// Is this a free/unused node?
    pub fn is_free(&self) -> bool {
-        match self {
-            &NodeData::Free { .. } => true,
+        match *self {
+            NodeData::Free { .. } => true,
            _ => false,
        }
    }
@@ -65,10 +65,10 @@ impl<F: Forest> NodeData<F> {
    /// This is the number of outgoing edges in an inner node, or the number of key-value pairs in
    /// a leaf node.
    pub fn entries(&self) -> usize {
-        match self {
-            &NodeData::Inner { size, .. } => usize::from(size) + 1,
-            &NodeData::Leaf { size, .. } => usize::from(size),
-            &NodeData::Free { .. } => panic!("freed node"),
+        match *self {
+            NodeData::Inner { size, .. } => usize::from(size) + 1,
+            NodeData::Leaf { size, .. } => usize::from(size),
+            NodeData::Free { .. } => panic!("freed node"),
        }
    }

@@ -96,8 +96,8 @@ impl<F: Forest> NodeData<F> {

    /// Unwrap an inner node into two slices (keys, trees).
    pub fn unwrap_inner(&self) -> (&[F::Key], &[Node]) {
-        match self {
-            &NodeData::Inner {
+        match *self {
+            NodeData::Inner {
                size,
                ref keys,
                ref tree,
@@ -113,8 +113,8 @@ impl<F: Forest> NodeData<F> {

    /// Unwrap a leaf node into two slices (keys, values) of the same length.
    pub fn unwrap_leaf(&self) -> (&[F::Key], &[F::Value]) {
-        match self {
-            &NodeData::Leaf {
+        match *self {
+            NodeData::Leaf {
                size,
                ref keys,
                ref vals,
@@ -132,8 +132,8 @@ impl<F: Forest> NodeData<F> {

    /// Unwrap a mutable leaf node into two slices (keys, values) of the same length.
    pub fn unwrap_leaf_mut(&mut self) -> (&mut [F::Key], &mut [F::Value]) {
-        match self {
-            &mut NodeData::Leaf {
+        match *self {
+            NodeData::Leaf {
                size,
                ref mut keys,
                ref mut vals,
@@ -152,8 +152,8 @@ impl<F: Forest> NodeData<F> {
    /// Get the critical key for a leaf node.
    /// This is simply the first key.
    pub fn leaf_crit_key(&self) -> F::Key {
-        match self {
-            &NodeData::Leaf { size, ref keys, .. } => {
+        match *self {
+            NodeData::Leaf { size, ref keys, .. } => {
                debug_assert!(size > 0, "Empty leaf node");
                keys.borrow()[0]
            }
@@ -165,8 +165,8 @@ impl<F: Forest> NodeData<F> {
    /// This means that `key` is inserted at `keys[i]` and `node` is inserted at `tree[i + 1]`.
    /// If the node is full, this leaves the node unchanged and returns false.
    pub fn try_inner_insert(&mut self, index: usize, key: F::Key, node: Node) -> bool {
-        match self {
-            &mut NodeData::Inner {
+        match *self {
+            NodeData::Inner {
                ref mut size,
                ref mut keys,
                ref mut tree,
@@ -191,8 +191,8 @@ impl<F: Forest> NodeData<F> {
    /// Try to insert `key, value` at `index` in a leaf node, but fail and return false if the node
    /// is full.
    pub fn try_leaf_insert(&mut self, index: usize, key: F::Key, value: F::Value) -> bool {
-        match self {
-            &mut NodeData::Leaf {
+        match *self {
+            NodeData::Leaf {
                ref mut size,
                ref mut keys,
                ref mut vals,
@@ -222,8 +222,8 @@ impl<F: Forest> NodeData<F> {
    /// The `insert_index` parameter is the position where an insertion was tried and failed. The
    /// node will be split in half with a bias towards an even split after the insertion is retried.
    pub fn split(&mut self, insert_index: usize) -> SplitOff<F> {
-        match self {
-            &mut NodeData::Inner {
+        match *self {
+            NodeData::Inner {
                ref mut size,
                ref keys,
                ref tree,
@@ -262,7 +262,7 @@ impl<F: Forest> NodeData<F> {
                    },
                }
            }
-            &mut NodeData::Leaf {
+            NodeData::Leaf {
                ref mut size,
                ref keys,
                ref vals,
@@ -307,8 +307,8 @@ impl<F: Forest> NodeData<F> {
    ///
    /// Return an indication of the node's health (i.e. below half capacity).
    pub fn inner_remove(&mut self, index: usize) -> Removed {
-        match self {
-            &mut NodeData::Inner {
+        match *self {
+            NodeData::Inner {
                ref mut size,
                ref mut keys,
                ref mut tree,
@@ -332,8 +332,8 @@ impl<F: Forest> NodeData<F> {
    ///
    /// Return an indication of the node's health (i.e. below half capacity).
    pub fn leaf_remove(&mut self, index: usize) -> Removed {
-        match self {
-            &mut NodeData::Leaf {
+        match *self {
+            NodeData::Leaf {
                ref mut size,
                ref mut keys,
                ref mut vals,
@@ -553,15 +553,15 @@ where
    F::Value: ValDisp,
 {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            &NodeData::Inner { size, keys, tree } => {
+        match *self {
+            NodeData::Inner { size, keys, tree } => {
                write!(f, "[ {}", tree[0])?;
                for i in 0..usize::from(size) {
                    write!(f, " {} {}", keys[i], tree[i + 1])?;
                }
                write!(f, " ]")
            }
-            &NodeData::Leaf { size, keys, vals } => {
+            NodeData::Leaf { size, keys, vals } => {
                let keys = keys.borrow();
                let vals = vals.borrow();
                write!(f, "[")?;
@@ -571,8 +571,8 @@ where
                }
                write!(f, " ]")
            }
-            &NodeData::Free { next: Some(n) } => write!(f, "[ free -> {} ]", n),
-            &NodeData::Free { next: None } => write!(f, "[ free ]"),
+            NodeData::Free { next: Some(n) } => write!(f, "[ free -> {} ]", n),
+            NodeData::Free { next: None } => write!(f, "[ free ]"),
        }
    }
 }
--- a/lib/cretonne/src/bforest/path.rs
+++ b/lib/cretonne/src/bforest/path.rs
@@ -285,7 +285,7 @@ impl<F: Forest> Path<F> {
    fn split_and_insert(&mut self, mut key: F::Key, value: F::Value, pool: &mut NodePool<F>) {
        let orig_root = self.node[0];

-        // Loop invariant: We need to split the  node at `level` and then retry a failed insertion.
+        // Loop invariant: We need to split the node at `level` and then retry a failed insertion.
        // The items to insert are either `(key, ins_node)` or `(key, value)`.
        let mut ins_node = None;
        let mut split;
@@ -316,7 +316,8 @@ impl<F: Forest> Path<F> {
            // Now that we have a not-full node, it must be possible to insert.
            match ins_node {
                None => {
-                    assert!(pool[node].try_leaf_insert(entry, key, value));
+                    let inserted = pool[node].try_leaf_insert(entry, key, value);
+                    debug_assert!(inserted);
                    // If we inserted at the front of the new rhs_node leaf, we need to propagate
                    // the inserted key as the critical key instead of the previous front key.
                    if entry == 0 && node == rhs_node {
@@ -324,7 +325,8 @@ impl<F: Forest> Path<F> {
                    }
                }
                Some(n) => {
-                    assert!(pool[node].try_inner_insert(entry, key, n));
+                    let inserted = pool[node].try_inner_insert(entry, key, n);
+                    debug_assert!(inserted);
                    // The lower level was moved to the new RHS node, so make sure that is
                    // reflected here.
                    if n == self.node[level + 1] {
--- a/lib/cretonne/src/binemit/mod.rs
+++ b/lib/cretonne/src/binemit/mod.rs
@@ -110,7 +110,7 @@ where
    let mut divert = RegDiversions::new();
    for ebb in func.layout.ebbs() {
        divert.clear();
-        assert_eq!(func.offsets[ebb], sink.offset());
+        debug_assert_eq!(func.offsets[ebb], sink.offset());
        for inst in func.layout.ebb_insts(ebb) {
            emit_inst(func, inst, &mut divert, sink);
        }
--- a/lib/cretonne/src/binemit/relaxation.rs
+++ b/lib/cretonne/src/binemit/relaxation.rs
@@ -60,7 +60,7 @@ pub fn relax_branches(func: &mut Function, isa: &TargetIsa) -> Result<CodeOffset
        while let Some(ebb) = cur.next_ebb() {
            // Record the offset for `ebb` and make sure we iterate until offsets are stable.
            if cur.func.offsets[ebb] != offset {
-                assert!(
+                debug_assert!(
                    cur.func.offsets[ebb] < offset,
                    "Code shrinking during relaxation"
                );
@@ -111,7 +111,7 @@ fn fallthroughs(func: &mut Function) {
                Opcode::Fallthrough => {
                    // Somebody used a fall-through instruction before the branch relaxation pass.
                    // Make sure it is correct, i.e. the destination is the layout successor.
-                    assert_eq!(destination, succ, "Illegal fall-through in {}", ebb)
+                    debug_assert_eq!(destination, succ, "Illegal fall-through in {}", ebb)
                }
                Opcode::Jump => {
                    // If this is a jump to the successor EBB, change it to a fall-through.
@@ -152,13 +152,23 @@ fn relax_branch(
    if let Some(enc) = isa.legal_encodings(dfg, &dfg[inst], ctrl_type).find(
        |&enc| {
            let range = encinfo.branch_range(enc).expect("Branch with no range");
-            let in_range = range.contains(offset, dest_offset);
-            dbg!(
-                "  trying [{}]: {}",
-                encinfo.display(enc),
-                if in_range { "OK" } else { "out of range" }
-            );
-            in_range
+            if !range.contains(offset, dest_offset) {
+                dbg!("  trying [{}]: out of range", encinfo.display(enc));
+                false
+            } else if encinfo.operand_constraints(enc) !=
+                       encinfo.operand_constraints(cur.func.encodings[inst])
+            {
+                // Conservatively give up if the encoding has different constraints
+                // than the original, so that we don't risk picking a new encoding
+                // which the existing operands don't satisfy. We can't check for
+                // validity directly because we don't have a RegDiversions active so
+                // we don't know which registers are actually in use.
+                dbg!("  trying [{}]: constraints differ", encinfo.display(enc));
+                false
+            } else {
+                dbg!("  trying [{}]: OK", encinfo.display(enc));
+                true
+            }
        },
    )
    {
--- a/lib/cretonne/src/bitset.rs
+++ b/lib/cretonne/src/bitset.rs
@@ -36,8 +36,8 @@ where

    /// Check if this BitSet contains the number num
    pub fn contains(&self, num: u8) -> bool {
-        assert!((num as usize) < Self::bits());
-        assert!((num as usize) < Self::max_bits());
+        debug_assert!((num as usize) < Self::bits());
+        debug_assert!((num as usize) < Self::max_bits());
        self.0.into() & (1 << num) != 0
    }

@@ -62,8 +62,8 @@ where

    /// Construct a BitSet with the half-open range [lo,hi) filled in
    pub fn from_range(lo: u8, hi: u8) -> Self {
-        assert!(lo <= hi);
-        assert!((hi as usize) <= Self::bits());
+        debug_assert!(lo <= hi);
+        debug_assert!((hi as usize) <= Self::bits());
        let one: T = T::from(1);
        // I can't just do (one << hi) - one here as the shift may overflow
        let hi_rng = if hi >= 1 {
--- a/lib/cretonne/src/context.rs
+++ b/lib/cretonne/src/context.rs
@@ -18,11 +18,12 @@ use isa::TargetIsa;
 use legalize_function;
 use regalloc;
 use result::{CtonError, CtonResult};
-use settings::FlagsOrIsa;
+use settings::{FlagsOrIsa, OptLevel};
 use unreachable_code::eliminate_unreachable_code;
 use verifier;
 use simple_gvn::do_simple_gvn;
 use licm::do_licm;
+use preopt::do_preopt;
 use timing;

 /// Persistent data structures and compilation pipeline.
@@ -87,15 +88,14 @@ impl Context {
        self.verify_if(isa)?;

        self.compute_cfg();
+        self.preopt(isa)?;
        self.legalize(isa)?;
-        /* TODO: Enable additional optimization passes.
        if isa.flags().opt_level() == OptLevel::Best {
            self.compute_domtree();
            self.compute_loop_analysis();
            self.licm(isa)?;
            self.simple_gvn(isa)?;
        }
-        */
        self.compute_domtree();
        self.eliminate_unreachable_code(isa)?;
        self.regalloc(isa)?;
@@ -131,6 +131,27 @@ impl Context {
        }
    }

+    /// Run the locations verifier on the function.
+    pub fn verify_locations<'a>(&self, isa: &TargetIsa) -> verifier::Result {
+        verifier::verify_locations(isa, &self.func, None)
+    }
+
+    /// Run the locations verifier only if the `enable_verifier` setting is true.
+    pub fn verify_locations_if<'a>(&self, isa: &TargetIsa) -> CtonResult {
+        if isa.flags().enable_verifier() {
+            self.verify_locations(isa).map_err(Into::into)
+        } else {
+            Ok(())
+        }
+    }
+
+    /// Perform pre-legalization rewrites on the function.
+    pub fn preopt(&mut self, isa: &TargetIsa) -> CtonResult {
+        do_preopt(&mut self.func);
+        self.verify_if(isa)?;
+        Ok(())
+    }
+
    /// Run the legalizer for `isa` on the function.
    pub fn legalize(&mut self, isa: &TargetIsa) -> CtonResult {
        // Legalization invalidates the domtree and loop_analysis by mutating the CFG.
@@ -205,13 +226,16 @@ impl Context {
    /// Insert prologue and epilogues after computing the stack frame layout.
    pub fn prologue_epilogue(&mut self, isa: &TargetIsa) -> CtonResult {
        isa.prologue_epilogue(&mut self.func)?;
-        self.verify_if(isa)
+        self.verify_if(isa)?;
+        self.verify_locations_if(isa)?;
+        Ok(())
    }

    /// Run the branch relaxation pass and return the final code size.
    pub fn relax_branches(&mut self, isa: &TargetIsa) -> Result<CodeOffset, CtonError> {
        let code_size = relax_branches(&mut self.func, isa)?;
        self.verify_if(isa)?;
+        self.verify_locations_if(isa)?;

        Ok(code_size)
    }
--- a/lib/cretonne/src/cursor.rs
+++ b/lib/cretonne/src/cursor.rs
@@ -256,7 +256,7 @@ pub trait Cursor {
    /// Go to a specific instruction which must be inserted in the layout.
    /// New instructions will be inserted before `inst`.
    fn goto_inst(&mut self, inst: ir::Inst) {
-        assert!(self.layout().inst_ebb(inst).is_some());
+        debug_assert!(self.layout().inst_ebb(inst).is_some());
        self.set_position(CursorPosition::At(inst));
    }

@@ -287,14 +287,14 @@ pub trait Cursor {
    /// At this position, instructions cannot be inserted, but `next_inst()` will move to the first
    /// instruction in `ebb`.
    fn goto_top(&mut self, ebb: ir::Ebb) {
-        assert!(self.layout().is_ebb_inserted(ebb));
+        debug_assert!(self.layout().is_ebb_inserted(ebb));
        self.set_position(CursorPosition::Before(ebb));
    }

    /// Go to the bottom of `ebb` which must be inserted into the layout.
    /// At this position, inserted instructions will be appended to `ebb`.
    fn goto_bottom(&mut self, ebb: ir::Ebb) {
-        assert!(self.layout().is_ebb_inserted(ebb));
+        debug_assert!(self.layout().is_ebb_inserted(ebb));
        self.set_position(CursorPosition::After(ebb));
    }

--- a/lib/cretonne/src/divconst_magic_numbers.rs
+++ b/lib/cretonne/src/divconst_magic_numbers.rs
@@ -0,0 +1,542 @@
+//! Compute "magic numbers" for division-by-constants transformations.
+
+#![allow(non_snake_case)]
+
+//----------------------------------------------------------------------
+//
+// Math helpers for division by (non-power-of-2) constants. This is based
+// on the presentation in "Hacker's Delight" by Henry Warren, 2003. There
+// are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size
+// makes little difference, but the signed-vs-unsigned aspect has a large
+// effect. Therefore everything is presented in the order U32 U64 S32 S64
+// so as to emphasise the similarity of the U32 and U64 cases and the S32
+// and S64 cases.
+
+// Structures to hold the "magic numbers" computed.
+
+#[derive(PartialEq, Debug)]
+pub struct MU32 {
+    pub mulBy: u32,
+    pub doAdd: bool,
+    pub shiftBy: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MU64 {
+    pub mulBy: u64,
+    pub doAdd: bool,
+    pub shiftBy: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MS32 {
+    pub mulBy: i32,
+    pub shiftBy: i32,
+}
+
+#[derive(PartialEq, Debug)]
+pub struct MS64 {
+    pub mulBy: i64,
+    pub shiftBy: i32,
+}
+
+// The actual "magic number" generators follow.
+
+pub fn magicU32(d: u32) -> MU32 {
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
+
+    let mut do_add: bool = false;
+    let mut p: i32 = 31;
+    let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d;
+    let mut q1: u32 = 0x80000000u32 / nc;
+    let mut r1: u32 = 0x80000000u32 - q1 * nc;
+    let mut q2: u32 = 0x7FFFFFFFu32 / d;
+    let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d;
+    loop {
+        p = p + 1;
+        if r1 >= nc - r1 {
+            q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1);
+            r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc);
+        } else {
+            q1 = 2 * q1;
+            r1 = 2 * r1;
+        }
+        if r2 + 1 >= d - r2 {
+            if q2 >= 0x7FFFFFFFu32 {
+                do_add = true;
+            }
+            q2 = 2 * q2 + 1;
+            r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d);
+        } else {
+            if q2 >= 0x80000000u32 {
+                do_add = true;
+            }
+            q2 = u32::wrapping_mul(2, q2);
+            r2 = 2 * r2 + 1;
+        }
+        let delta: u32 = d - 1 - r2;
+        if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) {
+            break;
+        }
+    }
+
+    MU32 {
+        mulBy: q2 + 1,
+        doAdd: do_add,
+        shiftBy: p - 32,
+    }
+}
+
+pub fn magicU64(d: u64) -> MU64 {
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
+
+    let mut do_add: bool = false;
+    let mut p: i32 = 63;
+    let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d;
+    let mut q1: u64 = 0x8000000000000000u64 / nc;
+    let mut r1: u64 = 0x8000000000000000u64 - q1 * nc;
+    let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d;
+    let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d;
+    loop {
+        p = p + 1;
+        if r1 >= nc - r1 {
+            q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1);
+            r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc);
+        } else {
+            q1 = 2 * q1;
+            r1 = 2 * r1;
+        }
+        if r2 + 1 >= d - r2 {
+            if q2 >= 0x7FFFFFFFFFFFFFFFu64 {
+                do_add = true;
+            }
+            q2 = 2 * q2 + 1;
+            r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d);
+        } else {
+            if q2 >= 0x8000000000000000u64 {
+                do_add = true;
+            }
+            q2 = u64::wrapping_mul(2, q2);
+            r2 = 2 * r2 + 1;
+        }
+        let delta: u64 = d - 1 - r2;
+        if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) {
+            break;
+        }
+    }
+
+    MU64 {
+        mulBy: q2 + 1,
+        doAdd: do_add,
+        shiftBy: p - 64,
+    }
+}
+
+pub fn magicS32(d: i32) -> MS32 {
+    debug_assert_ne!(d, -1);
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1);
+    let two31: u32 = 0x80000000u32;
+    let mut p: i32 = 31;
+    let ad: u32 = i32::wrapping_abs(d) as u32;
+    let t: u32 = two31 + ((d as u32) >> 31);
+    let anc: u32 = u32::wrapping_sub(t - 1, t % ad);
+    let mut q1: u32 = two31 / anc;
+    let mut r1: u32 = two31 - q1 * anc;
+    let mut q2: u32 = two31 / ad;
+    let mut r2: u32 = two31 - q2 * ad;
+    loop {
+        p = p + 1;
+        q1 = 2 * q1;
+        r1 = 2 * r1;
+        if r1 >= anc {
+            q1 = q1 + 1;
+            r1 = r1 - anc;
+        }
+        q2 = 2 * q2;
+        r2 = 2 * r2;
+        if r2 >= ad {
+            q2 = q2 + 1;
+            r2 = r2 - ad;
+        }
+        let delta: u32 = ad - r2;
+        if !(q1 < delta || (q1 == delta && r1 == 0)) {
+            break;
+        }
+    }
+
+    MS32 {
+        mulBy: (if d < 0 {
+                    u32::wrapping_neg(q2 + 1)
+                } else {
+                    q2 + 1
+                }) as i32,
+        shiftBy: p - 32,
+    }
+}
+
+pub fn magicS64(d: i64) -> MS64 {
+    debug_assert_ne!(d, -1);
+    debug_assert_ne!(d, 0);
+    debug_assert_ne!(d, 1);
+    let two63: u64 = 0x8000000000000000u64;
+    let mut p: i32 = 63;
+    let ad: u64 = i64::wrapping_abs(d) as u64;
+    let t: u64 = two63 + ((d as u64) >> 63);
+    let anc: u64 = u64::wrapping_sub(t - 1, t % ad);
+    let mut q1: u64 = two63 / anc;
+    let mut r1: u64 = two63 - q1 * anc;
+    let mut q2: u64 = two63 / ad;
+    let mut r2: u64 = two63 - q2 * ad;
+    loop {
+        p = p + 1;
+        q1 = 2 * q1;
+        r1 = 2 * r1;
+        if r1 >= anc {
+            q1 = q1 + 1;
+            r1 = r1 - anc;
+        }
+        q2 = 2 * q2;
+        r2 = 2 * r2;
+        if r2 >= ad {
+            q2 = q2 + 1;
+            r2 = r2 - ad;
+        }
+        let delta: u64 = ad - r2;
+        if !(q1 < delta || (q1 == delta && r1 == 0)) {
+            break;
+        }
+    }
+
+    MS64 {
+        mulBy: (if d < 0 {
+                    u64::wrapping_neg(q2 + 1)
+                } else {
+                    q2 + 1
+                }) as i64,
+        shiftBy: p - 64,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{magicU32, magicU64, magicS32, magicS64};
+    use super::{MU32, MU64, MS32, MS64};
+
+    fn mkMU32(mulBy: u32, doAdd: bool, shiftBy: i32) -> MU32 {
+        MU32 {
+            mulBy,
+            doAdd,
+            shiftBy,
+        }
+    }
+
+    fn mkMU64(mulBy: u64, doAdd: bool, shiftBy: i32) -> MU64 {
+        MU64 {
+            mulBy,
+            doAdd,
+            shiftBy,
+        }
+    }
+
+    fn mkMS32(mulBy: i32, shiftBy: i32) -> MS32 {
+        MS32 { mulBy, shiftBy }
+    }
+
+    fn mkMS64(mulBy: i64, shiftBy: i32) -> MS64 {
+        MS64 { mulBy, shiftBy }
+    }
+
+    #[test]
+    fn test_magicU32() {
+        assert_eq!(magicU32(2u32), mkMU32(0x80000000u32, false, 0));
+        assert_eq!(magicU32(3u32), mkMU32(0xaaaaaaabu32, false, 1));
+        assert_eq!(magicU32(4u32), mkMU32(0x40000000u32, false, 0));
+        assert_eq!(magicU32(5u32), mkMU32(0xcccccccdu32, false, 2));
+        assert_eq!(magicU32(6u32), mkMU32(0xaaaaaaabu32, false, 2));
+        assert_eq!(magicU32(7u32), mkMU32(0x24924925u32, true, 3));
+        assert_eq!(magicU32(9u32), mkMU32(0x38e38e39u32, false, 1));
+        assert_eq!(magicU32(10u32), mkMU32(0xcccccccdu32, false, 3));
+        assert_eq!(magicU32(11u32), mkMU32(0xba2e8ba3u32, false, 3));
+        assert_eq!(magicU32(12u32), mkMU32(0xaaaaaaabu32, false, 3));
+        assert_eq!(magicU32(25u32), mkMU32(0x51eb851fu32, false, 3));
+        assert_eq!(magicU32(125u32), mkMU32(0x10624dd3u32, false, 3));
+        assert_eq!(magicU32(625u32), mkMU32(0xd1b71759u32, false, 9));
+        assert_eq!(magicU32(1337u32), mkMU32(0x88233b2bu32, true, 11));
+        assert_eq!(magicU32(65535u32), mkMU32(0x80008001u32, false, 15));
+        assert_eq!(magicU32(65536u32), mkMU32(0x00010000u32, false, 0));
+        assert_eq!(magicU32(65537u32), mkMU32(0xffff0001u32, false, 16));
+        assert_eq!(magicU32(31415927u32), mkMU32(0x445b4553u32, false, 23));
+        assert_eq!(magicU32(0xdeadbeefu32), mkMU32(0x93275ab3u32, false, 31));
+        assert_eq!(magicU32(0xfffffffdu32), mkMU32(0x40000001u32, false, 30));
+        assert_eq!(magicU32(0xfffffffeu32), mkMU32(0x00000003u32, true, 32));
+        assert_eq!(magicU32(0xffffffffu32), mkMU32(0x80000001u32, false, 31));
+    }
+    #[test]
+    fn test_magicU64() {
+        assert_eq!(magicU64(2u64), mkMU64(0x8000000000000000u64, false, 0));
+        assert_eq!(magicU64(3u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 1));
+        assert_eq!(magicU64(4u64), mkMU64(0x4000000000000000u64, false, 0));
+        assert_eq!(magicU64(5u64), mkMU64(0xcccccccccccccccdu64, false, 2));
+        assert_eq!(magicU64(6u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 2));
+        assert_eq!(magicU64(7u64), mkMU64(0x2492492492492493u64, true, 3));
+        assert_eq!(magicU64(9u64), mkMU64(0xe38e38e38e38e38fu64, false, 3));
+        assert_eq!(magicU64(10u64), mkMU64(0xcccccccccccccccdu64, false, 3));
+        assert_eq!(magicU64(11u64), mkMU64(0x2e8ba2e8ba2e8ba3u64, false, 1));
+        assert_eq!(magicU64(12u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 3));
+        assert_eq!(magicU64(25u64), mkMU64(0x47ae147ae147ae15u64, true, 5));
+        assert_eq!(magicU64(125u64), mkMU64(0x0624dd2f1a9fbe77u64, true, 7));
+        assert_eq!(magicU64(625u64), mkMU64(0x346dc5d63886594bu64, false, 7));
+        assert_eq!(magicU64(1337u64), mkMU64(0xc4119d952866a139u64, false, 10));
+        assert_eq!(
+            magicU64(31415927u64),
+            mkMU64(0x116d154b9c3d2f85u64, true, 25)
+        );
+        assert_eq!(
+            magicU64(0x00000000deadbeefu64),
+            mkMU64(0x93275ab2dfc9094bu64, false, 31)
+        );
+        assert_eq!(
+            magicU64(0x00000000fffffffdu64),
+            mkMU64(0x8000000180000005u64, false, 31)
+        );
+        assert_eq!(
+            magicU64(0x00000000fffffffeu64),
+            mkMU64(0x0000000200000005u64, true, 32)
+        );
+        assert_eq!(
+            magicU64(0x00000000ffffffffu64),
+            mkMU64(0x8000000080000001u64, false, 31)
+        );
+        assert_eq!(
+            magicU64(0x0000000100000000u64),
+            mkMU64(0x0000000100000000u64, false, 0)
+        );
+        assert_eq!(
+            magicU64(0x0000000100000001u64),
+            mkMU64(0xffffffff00000001u64, false, 32)
+        );
+        assert_eq!(
+            magicU64(0x0ddc0ffeebadf00du64),
+            mkMU64(0x2788e9d394b77da1u64, true, 60)
+        );
+        assert_eq!(
+            magicU64(0xfffffffffffffffdu64),
+            mkMU64(0x4000000000000001u64, false, 62)
+        );
+        assert_eq!(
+            magicU64(0xfffffffffffffffeu64),
+            mkMU64(0x0000000000000003u64, true, 64)
+        );
+        assert_eq!(
+            magicU64(0xffffffffffffffffu64),
+            mkMU64(0x8000000000000001u64, false, 63)
+        );
+    }
+    #[test]
+    fn test_magicS32() {
+        assert_eq!(magicS32(-0x80000000i32), mkMS32(0x7fffffffu32 as i32, 30));
+        assert_eq!(magicS32(-0x7FFFFFFFi32), mkMS32(0xbfffffffu32 as i32, 29));
+        assert_eq!(magicS32(-0x7FFFFFFEi32), mkMS32(0x7ffffffdu32 as i32, 30));
+        assert_eq!(magicS32(-31415927i32), mkMS32(0xbba4baadu32 as i32, 23));
+        assert_eq!(magicS32(-1337i32), mkMS32(0x9df73135u32 as i32, 9));
+        assert_eq!(magicS32(-256i32), mkMS32(0x7fffffffu32 as i32, 7));
+        assert_eq!(magicS32(-5i32), mkMS32(0x99999999u32 as i32, 1));
+        assert_eq!(magicS32(-3i32), mkMS32(0x55555555u32 as i32, 1));
+        assert_eq!(magicS32(-2i32), mkMS32(0x7fffffffu32 as i32, 0));
+        assert_eq!(magicS32(2i32), mkMS32(0x80000001u32 as i32, 0));
+        assert_eq!(magicS32(3i32), mkMS32(0x55555556u32 as i32, 0));
+        assert_eq!(magicS32(4i32), mkMS32(0x80000001u32 as i32, 1));
+        assert_eq!(magicS32(5i32), mkMS32(0x66666667u32 as i32, 1));
+        assert_eq!(magicS32(6i32), mkMS32(0x2aaaaaabu32 as i32, 0));
+        assert_eq!(magicS32(7i32), mkMS32(0x92492493u32 as i32, 2));
+        assert_eq!(magicS32(9i32), mkMS32(0x38e38e39u32 as i32, 1));
+        assert_eq!(magicS32(10i32), mkMS32(0x66666667u32 as i32, 2));
+        assert_eq!(magicS32(11i32), mkMS32(0x2e8ba2e9u32 as i32, 1));
+        assert_eq!(magicS32(12i32), mkMS32(0x2aaaaaabu32 as i32, 1));
+        assert_eq!(magicS32(25i32), mkMS32(0x51eb851fu32 as i32, 3));
+        assert_eq!(magicS32(125i32), mkMS32(0x10624dd3u32 as i32, 3));
+        assert_eq!(magicS32(625i32), mkMS32(0x68db8badu32 as i32, 8));
+        assert_eq!(magicS32(1337i32), mkMS32(0x6208cecbu32 as i32, 9));
+        assert_eq!(magicS32(31415927i32), mkMS32(0x445b4553u32 as i32, 23));
+        assert_eq!(magicS32(0x7ffffffei32), mkMS32(0x80000003u32 as i32, 30));
+        assert_eq!(magicS32(0x7fffffffi32), mkMS32(0x40000001u32 as i32, 29));
+    }
+    #[test]
+    fn test_magicS64() {
+        assert_eq!(
+            magicS64(-0x8000000000000000i64),
+            mkMS64(0x7fffffffffffffffu64 as i64, 62)
+        );
+        assert_eq!(
+            magicS64(-0x7FFFFFFFFFFFFFFFi64),
+            mkMS64(0xbfffffffffffffffu64 as i64, 61)
+        );
+        assert_eq!(
+            magicS64(-0x7FFFFFFFFFFFFFFEi64),
+            mkMS64(0x7ffffffffffffffdu64 as i64, 62)
+        );
+        assert_eq!(
+            magicS64(-0x0ddC0ffeeBadF00di64),
+            mkMS64(0x6c3b8b1635a4412fu64 as i64, 59)
+        );
+        assert_eq!(
+            magicS64(-0x100000001i64),
+            mkMS64(0x800000007fffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0x100000000i64),
+            mkMS64(0x7fffffffffffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xFFFFFFFFi64),
+            mkMS64(0x7fffffff7fffffffu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xFFFFFFFEi64),
+            mkMS64(0x7ffffffefffffffdu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xFFFFFFFDi64),
+            mkMS64(0x7ffffffe7ffffffbu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-0xDeadBeefi64),
+            mkMS64(0x6cd8a54d2036f6b5u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(-31415927i64),
+            mkMS64(0x7749755a31e1683du64 as i64, 24)
+        );
+        assert_eq!(magicS64(-1337i64), mkMS64(0x9df731356bccaf63u64 as i64, 9));
+        assert_eq!(magicS64(-256i64), mkMS64(0x7fffffffffffffffu64 as i64, 7));
+        assert_eq!(magicS64(-5i64), mkMS64(0x9999999999999999u64 as i64, 1));
+        assert_eq!(magicS64(-3i64), mkMS64(0x5555555555555555u64 as i64, 1));
+        assert_eq!(magicS64(-2i64), mkMS64(0x7fffffffffffffffu64 as i64, 0));
+        assert_eq!(magicS64(2i64), mkMS64(0x8000000000000001u64 as i64, 0));
+        assert_eq!(magicS64(3i64), mkMS64(0x5555555555555556u64 as i64, 0));
+        assert_eq!(magicS64(4i64), mkMS64(0x8000000000000001u64 as i64, 1));
+        assert_eq!(magicS64(5i64), mkMS64(0x6666666666666667u64 as i64, 1));
+        assert_eq!(magicS64(6i64), mkMS64(0x2aaaaaaaaaaaaaabu64 as i64, 0));
+        assert_eq!(magicS64(7i64), mkMS64(0x4924924924924925u64 as i64, 1));
+        assert_eq!(magicS64(9i64), mkMS64(0x1c71c71c71c71c72u64 as i64, 0));
+        assert_eq!(magicS64(10i64), mkMS64(0x6666666666666667u64 as i64, 2));
+        assert_eq!(magicS64(11i64), mkMS64(0x2e8ba2e8ba2e8ba3u64 as i64, 1));
+        assert_eq!(magicS64(12i64), mkMS64(0x2aaaaaaaaaaaaaabu64 as i64, 1));
+        assert_eq!(magicS64(25i64), mkMS64(0xa3d70a3d70a3d70bu64 as i64, 4));
+        assert_eq!(magicS64(125i64), mkMS64(0x20c49ba5e353f7cfu64 as i64, 4));
+        assert_eq!(magicS64(625i64), mkMS64(0x346dc5d63886594bu64 as i64, 7));
+        assert_eq!(magicS64(1337i64), mkMS64(0x6208ceca9433509du64 as i64, 9));
+        assert_eq!(
+            magicS64(31415927i64),
+            mkMS64(0x88b68aa5ce1e97c3u64 as i64, 24)
+        );
+        assert_eq!(
+            magicS64(0x00000000deadbeefi64),
+            mkMS64(0x93275ab2dfc9094bu64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x00000000fffffffdi64),
+            mkMS64(0x8000000180000005u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x00000000fffffffei64),
+            mkMS64(0x8000000100000003u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x00000000ffffffffi64),
+            mkMS64(0x8000000080000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x0000000100000000i64),
+            mkMS64(0x8000000000000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x0000000100000001i64),
+            mkMS64(0x7fffffff80000001u64 as i64, 31)
+        );
+        assert_eq!(
+            magicS64(0x0ddc0ffeebadf00di64),
+            mkMS64(0x93c474e9ca5bbed1u64 as i64, 59)
+        );
+        assert_eq!(
+            magicS64(0x7ffffffffffffffdi64),
+            mkMS64(0x2000000000000001u64 as i64, 60)
+        );
+        assert_eq!(
+            magicS64(0x7ffffffffffffffei64),
+            mkMS64(0x8000000000000003u64 as i64, 62)
+        );
+        assert_eq!(
+            magicS64(0x7fffffffffffffffi64),
+            mkMS64(0x4000000000000001u64 as i64, 61)
+        );
+    }
+    #[test]
+    fn test_magic_generators_dont_panic() {
+        // The point of this is to check that the magic number generators
+        // don't panic with integer wraparounds, especially at boundary
+        // cases for their arguments. The actual results are thrown away.
+        let mut total: u64 = 0;
+        println!("Testing UP magicU32");
+        for x in 2..(200 * 1000u32) {
+            let m = magicU32(x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+        println!("Testing DOWN magicU32");
+        for x in 0..(200 * 1000u32) {
+            let m = magicU32(0xFFFF_FFFFu32 - x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+
+        println!("Testing UP magicU64");
+        for x in 2..(200 * 1000u64) {
+            let m = magicU64(x);
+            total = total ^ m.mulBy;
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+        println!("Testing DOWN magicU64");
+        for x in 0..(200 * 1000u64) {
+            let m = magicU64(0xFFFF_FFFF_FFFF_FFFFu64 - x);
+            total = total ^ m.mulBy;
+            total = total + (m.shiftBy as u64);
+            total = total - (if m.doAdd { 123 } else { 456 });
+        }
+
+        println!("Testing UP magicS32");
+        for x in 0..(200 * 1000i32) {
+            let m = magicS32(-0x8000_0000i32 + x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+        println!("Testing DOWN magicS32");
+        for x in 0..(200 * 1000i32) {
+            let m = magicS32(0x7FFF_FFFFi32 - x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+
+        println!("Testing UP magicS64");
+        for x in 0..(200 * 1000i64) {
+            let m = magicS64(-0x8000_0000_0000_0000i64 + x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+        println!("Testing DOWN magicS64");
+        for x in 0..(200 * 1000i64) {
+            let m = magicS64(0x7FFF_FFFF_FFFF_FFFFi64 - x);
+            total = total ^ (m.mulBy as u64);
+            total = total + (m.shiftBy as u64);
+        }
+        // Force `total` -- and hence, the entire computation -- to
+        // be used, so that rustc can't optimise it out.
+        assert_eq!(total, 7547519887532559585u64);
+    }
+}
--- a/lib/cretonne/src/dominator_tree.rs
+++ b/lib/cretonne/src/dominator_tree.rs
@@ -197,7 +197,7 @@ impl DominatorTree {
            }
        }

-        assert_eq!(a.0, b.0, "Unreachable block passed to common_dominator?");
+        debug_assert_eq!(a.0, b.0, "Unreachable block passed to common_dominator?");

        // We're in the same EBB. The common dominator is the earlier instruction.
        if layout.cmp(a.1, b.1) == Ordering::Less {
@@ -241,7 +241,7 @@ impl DominatorTree {
    pub fn clear(&mut self) {
        self.nodes.clear();
        self.postorder.clear();
-        assert!(self.stack.is_empty());
+        debug_assert!(self.stack.is_empty());
        self.valid = false;
    }

@@ -340,7 +340,7 @@ impl DominatorTree {
    /// post-order except for the insertion of the new EBB header at the split point.
    fn push_successors(&mut self, func: &Function, ebb: Ebb) {
        for inst in func.layout.ebb_insts(ebb) {
-            match func.dfg[inst].analyze_branch(&func.dfg.value_lists) {
+            match func.dfg.analyze_branch(inst) {
                BranchInfo::SingleDest(succ, _) => {
                    if self.nodes[succ].rpo_number == 0 {
                        self.nodes[succ].rpo_number = SEEN;
@@ -539,7 +539,7 @@ impl DominatorTreePreorder {
    /// Recompute this data structure to match `domtree`.
    pub fn compute(&mut self, domtree: &DominatorTree, layout: &Layout) {
        self.nodes.clear();
-        assert_eq!(self.stack.len(), 0);
+        debug_assert_eq!(self.stack.len(), 0);

        // Step 1: Populate the child and sibling links.
        //
@@ -557,7 +557,7 @@ impl DominatorTreePreorder {
        }

        // Step 2. Assign pre-order numbers from a DFS of the dominator tree.
-        assert!(self.stack.len() <= 1);
+        debug_assert!(self.stack.len() <= 1);
        let mut n = 0;
        while let Some(ebb) = self.stack.pop() {
            n += 1;
--- a/lib/cretonne/src/entity/list.rs
+++ b/lib/cretonne/src/entity/list.rs
@@ -220,8 +220,8 @@ impl<T: EntityRef> ListPool<T> {
        to_sclass: SizeClass,
        elems_to_copy: usize,
    ) -> usize {
-        assert!(elems_to_copy <= sclass_size(from_sclass));
-        assert!(elems_to_copy <= sclass_size(to_sclass));
+        debug_assert!(elems_to_copy <= sclass_size(from_sclass));
+        debug_assert!(elems_to_copy <= sclass_size(to_sclass));
        let new_block = self.alloc(to_sclass);

        if elems_to_copy > 0 {
@@ -302,7 +302,7 @@ impl<T: EntityRef> EntityList<T> {
    pub fn clear(&mut self, pool: &mut ListPool<T>) {
        let idx = self.index as usize;
        match pool.len_of(self) {
-            None => assert_eq!(idx, 0, "Invalid pool"),
+            None => debug_assert_eq!(idx, 0, "Invalid pool"),
            Some(len) => pool.free(idx - 1, sclass_for_length(len)),
        }
        // Switch back to the empty list representation which has no storage.
@@ -323,7 +323,7 @@ impl<T: EntityRef> EntityList<T> {
        match pool.len_of(self) {
            None => {
                // This is an empty list. Allocate a block and set length=1.
-                assert_eq!(idx, 0, "Invalid pool");
+                debug_assert_eq!(idx, 0, "Invalid pool");
                let block = pool.alloc(sclass_for_length(1));
                pool.data[block] = T::new(1);
                pool.data[block + 1] = element;
@@ -359,7 +359,7 @@ impl<T: EntityRef> EntityList<T> {
        match pool.len_of(self) {
            None => {
                // This is an empty list. Allocate a block.
-                assert_eq!(idx, 0, "Invalid pool");
+                debug_assert_eq!(idx, 0, "Invalid pool");
                if count == 0 {
                    return &mut [];
                }
@@ -410,7 +410,7 @@ impl<T: EntityRef> EntityList<T> {
            }
            tail[0] = element;
        } else {
-            assert_eq!(index, seq.len());
+            debug_assert_eq!(index, seq.len());
        }
    }

@@ -420,7 +420,7 @@ impl<T: EntityRef> EntityList<T> {
        {
            let seq = self.as_mut_slice(pool);
            len = seq.len();
-            assert!(index < len);
+            debug_assert!(index < len);

            // Copy elements down.
            for i in index..len - 1 {
@@ -450,7 +450,7 @@ impl<T: EntityRef> EntityList<T> {
    /// the list.
    pub fn swap_remove(&mut self, index: usize, pool: &mut ListPool<T>) {
        let len = self.len(pool);
-        assert!(index < len);
+        debug_assert!(index < len);
        if index == len - 1 {
            self.remove(index, pool);
        } else {
--- a/lib/cretonne/src/entity/mod.rs
+++ b/lib/cretonne/src/entity/mod.rs
@@ -61,7 +61,7 @@ macro_rules! entity_impl {
    ($entity:ident) => {
        impl $crate::entity::EntityRef for $entity {
            fn new(index: usize) -> Self {
-                assert!(index < (::std::u32::MAX as usize));
+                debug_assert!(index < (::std::u32::MAX as usize));
                $entity(index as u32)
            }

--- a/lib/cretonne/src/entity/sparse.rs
+++ b/lib/cretonne/src/entity/sparse.rs
@@ -150,7 +150,7 @@ where

        // There was no previous entry for `key`. Add it to the end of `dense`.
        let idx = self.dense.len();
-        assert!(idx <= u32::MAX as usize, "SparseMap overflow");
+        debug_assert!(idx <= u32::MAX as usize, "SparseMap overflow");
        self.dense.push(value);
        self.sparse[key] = idx as u32;
        None
--- a/lib/cretonne/src/flowgraph.rs
+++ b/lib/cretonne/src/flowgraph.rs
@@ -108,7 +108,7 @@ impl ControlFlowGraph {

    fn compute_ebb(&mut self, func: &Function, ebb: Ebb) {
        for inst in func.layout.ebb_insts(ebb) {
-            match func.dfg[inst].analyze_branch(&func.dfg.value_lists) {
+            match func.dfg.analyze_branch(inst) {
                BranchInfo::SingleDest(dest, _) => {
                    self.add_edge((ebb, inst), dest);
                }
--- a/lib/cretonne/src/ir/dfg.rs
+++ b/lib/cretonne/src/ir/dfg.rs
@@ -217,11 +217,11 @@ impl DataFlowGraph {
    ///
    /// The `dest` value can't be attached to an instruction or EBB.
    pub fn change_to_alias(&mut self, dest: Value, src: Value) {
-        assert!(!self.value_is_attached(dest));
+        debug_assert!(!self.value_is_attached(dest));
        // Try to create short alias chains by finding the original source value.
        // This also avoids the creation of loops.
        let original = self.resolve_aliases(src);
-        assert_ne!(
+        debug_assert_ne!(
            dest,
            original,
            "Aliasing {} to {} would create a loop",
@@ -229,7 +229,7 @@ impl DataFlowGraph {
            src
        );
        let ty = self.value_type(original);
-        assert_eq!(
+        debug_assert_eq!(
            self.value_type(dest),
            ty,
            "Aliasing {} to {} would change its type {} to {}",
@@ -273,7 +273,7 @@ impl DataFlowGraph {
        {
            let original = src;
            let ty = self.value_type(original);
-            assert_eq!(
+            debug_assert_eq!(
                self.value_type(dest),
                ty,
                "Aliasing {} to {} would change its type {} to {}",
@@ -498,9 +498,9 @@ impl DataFlowGraph {
    /// This is a very low-level operation. Usually, instruction results with the correct types are
    /// created automatically. The `res` value must not be attached to anything else.
    pub fn attach_result(&mut self, inst: Inst, res: Value) {
-        assert!(!self.value_is_attached(res));
+        debug_assert!(!self.value_is_attached(res));
        let num = self.results[inst].push(res, &mut self.value_lists);
-        assert!(num <= u16::MAX as usize, "Too many result values");
+        debug_assert!(num <= u16::MAX as usize, "Too many result values");
        let ty = self.value_type(res);
        self.values[res] = ValueData::Inst {
            ty,
@@ -533,7 +533,7 @@ impl DataFlowGraph {
                .expect("Replacing detached result"),
            new_value,
        );
-        assert_eq!(
+        debug_assert_eq!(
            attached,
            old_value,
            "{} wasn't detached from {}",
@@ -547,7 +547,7 @@ impl DataFlowGraph {
    pub fn append_result(&mut self, inst: Inst, ty: Type) -> Value {
        let res = self.values.next_key();
        let num = self.results[inst].push(res, &mut self.value_lists);
-        assert!(num <= u16::MAX as usize, "Too many result values");
+        debug_assert!(num <= u16::MAX as usize, "Too many result values");
        self.make_value(ValueData::Inst {
            ty,
            inst,
@@ -684,7 +684,7 @@ impl DataFlowGraph {
    pub fn append_ebb_param(&mut self, ebb: Ebb, ty: Type) -> Value {
        let param = self.values.next_key();
        let num = self.ebbs[ebb].params.push(param, &mut self.value_lists);
-        assert!(num <= u16::MAX as usize, "Too many parameters on EBB");
+        debug_assert!(num <= u16::MAX as usize, "Too many parameters on EBB");
        self.make_value(ValueData::Param {
            ty,
            num: num as u16,
@@ -761,9 +761,9 @@ impl DataFlowGraph {
    ///
    /// In almost all cases, you should be using `append_ebb_param()` instead of this method.
    pub fn attach_ebb_param(&mut self, ebb: Ebb, param: Value) {
-        assert!(!self.value_is_attached(param));
+        debug_assert!(!self.value_is_attached(param));
        let num = self.ebbs[ebb].params.push(param, &mut self.value_lists);
-        assert!(num <= u16::MAX as usize, "Too many parameters on EBB");
+        debug_assert!(num <= u16::MAX as usize, "Too many parameters on EBB");
        let ty = self.value_type(param);
        self.values[param] = ValueData::Param {
            ty,
@@ -859,7 +859,7 @@ impl DataFlowGraph {
    /// to create invalid values for index padding which may be reassigned later.
    #[cold]
    fn set_value_type_for_parser(&mut self, v: Value, t: Type) {
-        debug_assert!(
+        assert!(
            self.value_type(v) == types::VOID,
            "this function is only for assigning types to previously invalid values"
        );
@@ -882,7 +882,7 @@ impl DataFlowGraph {
    ) -> usize {
        // Get the call signature if this is a function call.
        if let Some(sig) = self.call_signature(inst) {
-            debug_assert_eq!(self.insts[inst].opcode().constraints().fixed_results(), 0);
+            assert_eq!(self.insts[inst].opcode().constraints().fixed_results(), 0);
            for res_idx in 0..self.signatures[sig].returns.len() {
                let ty = self.signatures[sig].returns[res_idx].value_type;
                if let Some(v) = reuse.get(res_idx) {
--- a/lib/cretonne/src/ir/immediates.rs
+++ b/lib/cretonne/src/ir/immediates.rs
@@ -490,7 +490,7 @@ fn parse_float(s: &str, w: u8, t: u8) -> Result<u64, &'static str> {
        significand <<= adjust;
        exponent -= i32::from(adjust);
    }
-    assert_eq!(significand >> t, 1);
+    debug_assert_eq!(significand >> t, 1);

    // Trailing significand excludes the high bit.
    let t_bits = significand & ((1 << t) - 1);
@@ -538,6 +538,17 @@ impl Ieee32 {
        Ieee32(exponent << t)
    }

+    /// Create an `Ieee32` number representing the greatest negative value
+    /// not convertable from f32 to a signed integer with width n.
+    pub fn fcvt_to_sint_negative_overflow<I: Into<i32>>(n: I) -> Ieee32 {
+        let n = n.into();
+        debug_assert!(n < 32);
+        debug_assert!(23 + 1 - n < 32);
+        Self::with_bits(
+            (1u32 << (32 - 1)) | Self::pow2(n - 1).0 | (1u32 << (23 + 1 - n)),
+        )
+    }
+
    /// Return self negated.
    pub fn neg(self) -> Ieee32 {
        Ieee32(self.0 ^ (1 << 31))
@@ -590,6 +601,17 @@ impl Ieee64 {
        Ieee64(exponent << t)
    }

+    /// Create an `Ieee64` number representing the greatest negative value
+    /// not convertable from f64 to a signed integer with width n.
+    pub fn fcvt_to_sint_negative_overflow<I: Into<i64>>(n: I) -> Ieee64 {
+        let n = n.into();
+        debug_assert!(n < 64);
+        debug_assert!(52 + 1 - n < 64);
+        Self::with_bits(
+            (1u64 << (64 - 1)) | Self::pow2(n - 1).0 | (1u64 << (52 + 1 - n)),
+        )
+    }
+
    /// Return self negated.
    pub fn neg(self) -> Ieee64 {
        Ieee64(self.0 ^ (1 << 63))
@@ -858,6 +880,15 @@ mod tests {
        assert_eq!(Ieee32::pow2(1).neg().to_string(), "-0x1.000000p1");
    }

+    #[test]
+    fn fcvt_to_sint_negative_overflow_ieee32() {
+        for n in &[8, 16] {
+            assert_eq!(-((1u32 << (n - 1)) as f32) - 1.0, unsafe {
+                mem::transmute(Ieee32::fcvt_to_sint_negative_overflow(*n))
+            });
+        }
+    }
+
    #[test]
    fn format_ieee64() {
        assert_eq!(Ieee64::with_float(0.0).to_string(), "0.0");
@@ -986,4 +1017,13 @@ mod tests {

        assert_eq!(Ieee64::pow2(1).neg().to_string(), "-0x1.0000000000000p1");
    }
+
+    #[test]
+    fn fcvt_to_sint_negative_overflow_ieee64() {
+        for n in &[8, 16, 32] {
+            assert_eq!(-((1u64 << (n - 1)) as f64) - 1.0, unsafe {
+                mem::transmute(Ieee64::fcvt_to_sint_negative_overflow(*n))
+            });
+        }
+    }
 }
--- a/lib/cretonne/src/ir/instructions.rs
+++ b/lib/cretonne/src/ir/instructions.rs
@@ -561,7 +561,7 @@ impl OpcodeConstraints {
    /// Get the value type of result number `n`, having resolved the controlling type variable to
    /// `ctrl_type`.
    pub fn result_type(self, n: usize, ctrl_type: Type) -> Type {
-        assert!(n < self.fixed_results(), "Invalid result index");
+        debug_assert!(n < self.fixed_results(), "Invalid result index");
        if let ResolvedConstraint::Bound(t) =
            OPERAND_CONSTRAINTS[self.constraint_offset() + n].resolve(ctrl_type)
        {
@@ -577,7 +577,7 @@ impl OpcodeConstraints {
    /// Unlike results, it is possible for some input values to vary freely within a specific
    /// `ValueTypeSet`. This is represented with the `ArgumentConstraint::Free` variant.
    pub fn value_argument_constraint(self, n: usize, ctrl_type: Type) -> ResolvedConstraint {
-        assert!(
+        debug_assert!(
            n < self.fixed_value_arguments(),
            "Invalid value argument index"
        );
--- a/lib/cretonne/src/ir/layout.rs
+++ b/lib/cretonne/src/ir/layout.rs
@@ -88,7 +88,7 @@ const LOCAL_LIMIT: SequenceNumber = 100 * MINOR_STRIDE;
 // Compute the midpoint between `a` and `b`.
 // Return `None` if the midpoint would be equal to either.
 fn midpoint(a: SequenceNumber, b: SequenceNumber) -> Option<SequenceNumber> {
-    assert!(a < b);
+    debug_assert!(a < b);
    // Avoid integer overflow.
    let m = a + (b - a) / 2;
    if m > a { Some(m) } else { None }
@@ -148,7 +148,7 @@ impl Layout {
    /// Assign a valid sequence number to `ebb` such that the numbers are still monotonic. This may
    /// require renumbering.
    fn assign_ebb_seq(&mut self, ebb: Ebb) {
-        assert!(self.is_ebb_inserted(ebb));
+        debug_assert!(self.is_ebb_inserted(ebb));

        // Get the sequence number immediately before `ebb`, or 0.
        let prev_seq = self.ebbs[ebb]
@@ -334,13 +334,13 @@ impl Layout {

    /// Insert `ebb` as the last EBB in the layout.
    pub fn append_ebb(&mut self, ebb: Ebb) {
-        assert!(
+        debug_assert!(
            !self.is_ebb_inserted(ebb),
            "Cannot append EBB that is already in the layout"
        );
        {
            let node = &mut self.ebbs[ebb];
-            assert!(node.first_inst.is_none() && node.last_inst.is_none());
+            debug_assert!(node.first_inst.is_none() && node.last_inst.is_none());
            node.prev = self.last_ebb.into();
            node.next = None.into();
        }
@@ -355,11 +355,11 @@ impl Layout {

    /// Insert `ebb` in the layout before the existing EBB `before`.
    pub fn insert_ebb(&mut self, ebb: Ebb, before: Ebb) {
-        assert!(
+        debug_assert!(
            !self.is_ebb_inserted(ebb),
            "Cannot insert EBB that is already in the layout"
        );
-        assert!(
+        debug_assert!(
            self.is_ebb_inserted(before),
            "EBB Insertion point not in the layout"
        );
@@ -379,11 +379,11 @@ impl Layout {

    /// Insert `ebb` in the layout *after* the existing EBB `after`.
    pub fn insert_ebb_after(&mut self, ebb: Ebb, after: Ebb) {
-        assert!(
+        debug_assert!(
            !self.is_ebb_inserted(ebb),
            "Cannot insert EBB that is already in the layout"
        );
-        assert!(
+        debug_assert!(
            self.is_ebb_inserted(after),
            "EBB Insertion point not in the layout"
        );
@@ -403,8 +403,8 @@ impl Layout {

    /// Remove `ebb` from the layout.
    pub fn remove_ebb(&mut self, ebb: Ebb) {
-        assert!(self.is_ebb_inserted(ebb), "EBB not in the layout");
-        assert!(self.first_inst(ebb).is_none(), "EBB must be empty.");
+        debug_assert!(self.is_ebb_inserted(ebb), "EBB not in the layout");
+        debug_assert!(self.first_inst(ebb).is_none(), "EBB must be empty.");

        // Clear the `ebb` node and extract links.
        let prev;
@@ -521,8 +521,8 @@ impl Layout {

    /// Append `inst` to the end of `ebb`.
    pub fn append_inst(&mut self, inst: Inst, ebb: Ebb) {
-        assert_eq!(self.inst_ebb(inst), None);
-        assert!(
+        debug_assert_eq!(self.inst_ebb(inst), None);
+        debug_assert!(
            self.is_ebb_inserted(ebb),
            "Cannot append instructions to EBB not in layout"
        );
@@ -532,7 +532,7 @@ impl Layout {
                let inst_node = &mut self.insts[inst];
                inst_node.ebb = ebb.into();
                inst_node.prev = ebb_node.last_inst;
-                assert!(inst_node.next.is_none());
+                debug_assert!(inst_node.next.is_none());
            }
            if ebb_node.first_inst.is_none() {
                ebb_node.first_inst = inst.into();
@@ -566,7 +566,7 @@ impl Layout {

    /// Insert `inst` before the instruction `before` in the same EBB.
    pub fn insert_inst(&mut self, inst: Inst, before: Inst) {
-        assert_eq!(self.inst_ebb(inst), None);
+        debug_assert_eq!(self.inst_ebb(inst), None);
        let ebb = self.inst_ebb(before).expect(
            "Instruction before insertion point not in the layout",
        );
@@ -645,7 +645,7 @@ impl Layout {
        let old_ebb = self.inst_ebb(before).expect(
            "The `before` instruction must be in the layout",
        );
-        assert!(!self.is_ebb_inserted(new_ebb));
+        debug_assert!(!self.is_ebb_inserted(new_ebb));

        // Insert new_ebb after old_ebb.
        let next_ebb = self.ebbs[old_ebb].next;
--- a/lib/cretonne/src/ir/progpoint.rs
+++ b/lib/cretonne/src/ir/progpoint.rs
@@ -19,7 +19,7 @@ pub struct ProgramPoint(u32);
 impl From<Inst> for ProgramPoint {
    fn from(inst: Inst) -> ProgramPoint {
        let idx = inst.index();
-        assert!(idx < (u32::MAX / 2) as usize);
+        debug_assert!(idx < (u32::MAX / 2) as usize);
        ProgramPoint((idx * 2) as u32)
    }
 }
@@ -27,7 +27,7 @@ impl From<Inst> for ProgramPoint {
 impl From<Ebb> for ProgramPoint {
    fn from(ebb: Ebb) -> ProgramPoint {
        let idx = ebb.index();
-        assert!(idx < (u32::MAX / 2) as usize);
+        debug_assert!(idx < (u32::MAX / 2) as usize);
        ProgramPoint((idx * 2 + 1) as u32)
    }
 }
--- a/lib/cretonne/src/ir/stackslot.rs
+++ b/lib/cretonne/src/ir/stackslot.rs
@@ -41,9 +41,9 @@ pub enum StackSlotKind {
    /// A spill slot. This is a stack slot created by the register allocator.
    SpillSlot,

-    /// A local variable. This is a chunk of local stack memory for use by the `stack_load` and
-    /// `stack_store` instructions.
-    Local,
+    /// An explicit stack slot. This is a chunk of stack memory for use by the `stack_load`
+    /// and `stack_store` instructions.
+    ExplicitSlot,

    /// An incoming function argument.
    ///
@@ -72,7 +72,7 @@ impl FromStr for StackSlotKind {
    fn from_str(s: &str) -> Result<StackSlotKind, ()> {
        use self::StackSlotKind::*;
        match s {
-            "local" => Ok(Local),
+            "explicit_slot" => Ok(ExplicitSlot),
            "spill_slot" => Ok(SpillSlot),
            "incoming_arg" => Ok(IncomingArg),
            "outgoing_arg" => Ok(OutgoingArg),
@@ -86,7 +86,7 @@ impl fmt::Display for StackSlotKind {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        use self::StackSlotKind::*;
        f.write_str(match *self {
-            Local => "local",
+            ExplicitSlot => "explicit_slot",
            SpillSlot => "spill_slot",
            IncomingArg => "incoming_arg",
            OutgoingArg => "outgoing_arg",
@@ -112,7 +112,7 @@ pub struct StackSlotData {
    ///
    /// For `OutgoingArg` stack slots, the offset is relative to the current function's stack
    /// pointer immediately before the call.
-    pub offset: StackOffset,
+    pub offset: Option<StackOffset>,
 }

 impl StackSlotData {
@@ -121,7 +121,7 @@ impl StackSlotData {
        StackSlotData {
            kind,
            size,
-            offset: 0,
+            offset: None,
        }
    }

@@ -139,8 +139,8 @@ impl StackSlotData {
 impl fmt::Display for StackSlotData {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{} {}", self.kind, self.size)?;
-        if self.offset != 0 {
-            write!(f, ", offset {}", self.offset)?;
+        if let Some(offset) = self.offset {
+            write!(f, ", offset {}", offset)?;
        }
        Ok(())
    }
@@ -205,7 +205,7 @@ impl StackSlots {

    /// Set the offset of a stack slot.
    pub fn set_offset(&mut self, ss: StackSlot, offset: StackOffset) {
-        self.slots[ss].offset = offset;
+        self.slots[ss].offset = Some(offset);
    }

    /// Get an iterator over all the stack slot keys.
@@ -245,8 +245,8 @@ impl StackSlots {
    /// Create a stack slot representing an incoming function argument.
    pub fn make_incoming_arg(&mut self, ty: Type, offset: StackOffset) -> StackSlot {
        let mut data = StackSlotData::new(StackSlotKind::IncomingArg, ty.bytes());
-        assert!(offset <= StackOffset::max_value() - data.size as StackOffset);
-        data.offset = offset;
+        debug_assert!(offset <= StackOffset::max_value() - data.size as StackOffset);
+        data.offset = Some(offset);
        self.push(data)
    }

@@ -262,7 +262,7 @@ impl StackSlots {

        // Look for an existing outgoing stack slot with the same offset and size.
        let inspos = match self.outgoing.binary_search_by_key(&(offset, size), |&ss| {
-            (self[ss].offset, self[ss].size)
+            (self[ss].offset.unwrap(), self[ss].size)
        }) {
            Ok(idx) => return self.outgoing[idx],
            Err(idx) => idx,
@@ -270,8 +270,8 @@ impl StackSlots {

        // No existing slot found. Make one and insert it into `outgoing`.
        let mut data = StackSlotData::new(StackSlotKind::OutgoingArg, size);
-        assert!(offset <= StackOffset::max_value() - size as StackOffset);
-        data.offset = offset;
+        debug_assert!(offset <= StackOffset::max_value() - size as StackOffset);
+        data.offset = Some(offset);
        let ss = self.slots.push(data);
        self.outgoing.insert(inspos, ss);
        ss
@@ -346,13 +346,13 @@ mod tests {
        let ss1 = sss.get_outgoing_arg(types::I32, 4);
        let ss2 = sss.get_outgoing_arg(types::I64, 8);

-        assert_eq!(sss[ss0].offset, 8);
+        assert_eq!(sss[ss0].offset, Some(8));
        assert_eq!(sss[ss0].size, 4);

-        assert_eq!(sss[ss1].offset, 4);
+        assert_eq!(sss[ss1].offset, Some(4));
        assert_eq!(sss[ss1].size, 4);

-        assert_eq!(sss[ss2].offset, 8);
+        assert_eq!(sss[ss2].offset, Some(8));
        assert_eq!(sss[ss2].size, 8);

        assert_eq!(sss.get_outgoing_arg(types::I32, 8), ss0);
@@ -368,7 +368,7 @@ mod tests {
        assert_eq!(slot.alignment(8), 8);
        assert_eq!(slot.alignment(16), 8);

-        let slot2 = StackSlotData::new(StackSlotKind::Local, 24);
+        let slot2 = StackSlotData::new(StackSlotKind::ExplicitSlot, 24);

        assert_eq!(slot2.alignment(4), 4);
        assert_eq!(slot2.alignment(8), 8);
--- a/lib/cretonne/src/isa/arm32/settings.rs
+++ b/lib/cretonne/src/isa/arm32/settings.rs
@@ -5,5 +5,5 @@ use std::fmt;

 // Include code generated by `lib/cretonne/meta/gen_settings.py`. This file contains a public
 // `Flags` struct with an impl for all of the settings defined in
-// `lib/cretonne/meta/cretonne/settings.py`.
+// `lib/cretonne/meta/isa/arm32/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings-arm32.rs"));
--- a/lib/cretonne/src/isa/arm64/settings.rs
+++ b/lib/cretonne/src/isa/arm64/settings.rs
@@ -5,5 +5,5 @@ use std::fmt;

 // Include code generated by `lib/cretonne/meta/gen_settings.py`. This file contains a public
 // `Flags` struct with an impl for all of the settings defined in
-// `lib/cretonne/meta/cretonne/settings.py`.
+// `lib/cretonne/meta/isa/arm64/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));
--- a/lib/cretonne/src/isa/constraints.rs
+++ b/lib/cretonne/src/isa/constraints.rs
@@ -13,6 +13,7 @@ use ir::{Function, ValueLoc, Inst};
 use regalloc::RegDiversions;

 /// Register constraint for a single value operand or instruction result.
+#[derive(PartialEq, Debug)]
 pub struct OperandConstraint {
    /// The kind of constraint.
    pub kind: ConstraintKind,
@@ -53,7 +54,7 @@ impl OperandConstraint {
 }

 /// The different kinds of operand constraints.
-#[derive(Clone, Copy, PartialEq, Eq)]
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
 pub enum ConstraintKind {
    /// This operand or result must be a register from the given register class.
    Reg,
@@ -89,7 +90,7 @@ pub enum ConstraintKind {
 }

 /// Value operand constraints for an encoding recipe.
-#[derive(Clone)]
+#[derive(PartialEq, Clone)]
 pub struct RecipeConstraints {
    /// Constraints for the instruction's fixed value operands.
    ///
@@ -160,7 +161,7 @@ impl RecipeConstraints {
 /// - Intel uses the address of the instruction following the branch, `origin = 2` for a 2-byte
 ///   branch instruction.
 /// - ARM's A32 encoding uses the address of the branch instruction + 8 bytes, `origin = 8`.
-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Debug)]
 pub struct BranchRange {
    /// Offset in bytes from the address of the branch instruction to the origin used for computing
    /// the branch displacement. This is the destination of a branch that encodes a 0 displacement.
--- a/lib/cretonne/src/isa/enc_tables.rs
+++ b/lib/cretonne/src/isa/enc_tables.rs
@@ -225,7 +225,7 @@ impl<'a> Encodings<'a> {
        self.legalize_actions[self.legalize as usize]
    }

-    /// Check if the `rpred` recipe predicate s satisfied.
+    /// Check if the `rpred` recipe predicate is satisfied.
    fn check_recipe(&self, rpred: RecipePredicate) -> bool {
        match rpred {
            Some(p) => p(self.isa_preds, self.inst),
--- a/lib/cretonne/src/isa/intel/abi.rs
+++ b/lib/cretonne/src/isa/intel/abi.rs
@@ -107,7 +107,7 @@ impl ArgAssigner for Args {
        // Assign a stack location.
        let loc = ArgumentLoc::Stack(self.offset as i32);
        self.offset += self.pointer_bytes;
-        assert!(self.offset <= i32::MAX as u32);
+        debug_assert!(self.offset <= i32::MAX as u32);
        loc.into()
    }
 }
@@ -180,15 +180,13 @@ pub fn spiderwasm_prologue_epilogue(
    func: &mut ir::Function,
    isa: &TargetIsa,
 ) -> result::CtonResult {
-    let (word_size, stack_align) = if isa.flags().is_64bit() {
-        (8, 16)
-    } else {
-        (4, 4)
-    };
+    // Spiderwasm on 32-bit x86 always aligns its stack pointer to 16 bytes.
+    let stack_align = 16;
+    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
    let bytes = StackSize::from(isa.flags().spiderwasm_prologue_words()) * word_size;

    let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
-    ss.offset = -(bytes as StackOffset);
+    ss.offset = Some(-(bytes as StackOffset));
    func.stack_slots.push(ss);

    layout_stack(&mut func.stack_slots, stack_align)?;
@@ -197,11 +195,10 @@ pub fn spiderwasm_prologue_epilogue(

 /// Insert a System V-compatible prologue and epilogue.
 pub fn native_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
-    let (word_size, stack_align) = if isa.flags().is_64bit() {
-        (8, 16)
-    } else {
-        (4, 4)
-    };
+    // The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but
+    // newer versions use a 16-byte aligned stack pointer.
+    let stack_align = 16;
+    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
    let csr_type = if isa.flags().is_64bit() {
        ir::types::I64
    } else {
@@ -220,11 +217,11 @@ pub fn native_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> res
    func.create_stack_slot(ir::StackSlotData {
        kind: ir::StackSlotKind::IncomingArg,
        size: csr_stack_size as u32,
-        offset: -csr_stack_size,
+        offset: Some(-csr_stack_size),
    });

    let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
-    let local_stack_size = (total_stack_size - csr_stack_size) as i64;
+    let local_stack_size = i64::from(total_stack_size - csr_stack_size);

    // Add CSRs to function signature
    let fp_arg = ir::AbiParam::special_reg(
--- a/lib/cretonne/src/isa/intel/enc_tables.rs
+++ b/lib/cretonne/src/isa/intel/enc_tables.rs
@@ -1,5 +1,6 @@
 //! Encoding tables for Intel ISAs.

+use bitset::BitSet;
 use cursor::{Cursor, FuncCursor};
 use flowgraph::ControlFlowGraph;
 use ir::{self, InstBuilder};
@@ -375,13 +376,22 @@ fn expand_fcvt_to_sint(
    let mut overflow_cc = FloatCC::LessThan;
    let output_bits = ty.lane_bits();
    let flimit = match xty {
-        ir::types::F32 => pos.ins().f32const(Ieee32::pow2(output_bits - 1).neg()),
+        // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
+        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+        ir::types::F32 => {
+            pos.ins().f32const(if output_bits < 32 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee32::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee32::pow2(output_bits - 1).neg()
+            })
+        }
        ir::types::F64 => {
            // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
            // there are values less than -2^(N-1) that convert correctly to INT_MIN.
            pos.ins().f64const(if output_bits < 64 {
                overflow_cc = FloatCC::LessThanOrEqual;
-                Ieee64::with_float(-((1u64 << (output_bits - 1)) as f64) - 1.0)
+                Ieee64::fcvt_to_sint_negative_overflow(output_bits)
            } else {
                Ieee64::pow2(output_bits - 1).neg()
            })
@@ -393,8 +403,8 @@ fn expand_fcvt_to_sint(

    // Finally, we could have a positive value that is too large.
    let fzero = match xty {
-        ir::types::F32 => pos.ins().f32const(Ieee32::with_float(0.0)),
-        ir::types::F64 => pos.ins().f64const(Ieee64::with_float(0.0)),
+        ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
+        ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
        _ => panic!("Can't convert {}", xty),
    };
    let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
--- a/lib/cretonne/src/isa/intel/settings.rs
+++ b/lib/cretonne/src/isa/intel/settings.rs
@@ -5,7 +5,7 @@ use std::fmt;

 // Include code generated by `lib/cretonne/meta/gen_settings.py`. This file contains a public
 // `Flags` struct with an impl for all of the settings defined in
-// `lib/cretonne/meta/cretonne/settings.py`.
+// `lib/cretonne/meta/isa/intel/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings-intel.rs"));

 #[cfg(test)]
--- a/lib/cretonne/src/isa/mod.rs
+++ b/lib/cretonne/src/isa/mod.rs
@@ -252,7 +252,7 @@ pub trait TargetIsa: fmt::Display {
        if func.signature.call_conv == ir::CallConv::SpiderWASM {
            let bytes = StackSize::from(self.flags().spiderwasm_prologue_words()) * word_size;
            let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
-            ss.offset = -(bytes as StackOffset);
+            ss.offset = Some(-(bytes as StackOffset));
            func.stack_slots.push(ss);
        }

--- a/lib/cretonne/src/isa/riscv/abi.rs
+++ b/lib/cretonne/src/isa/riscv/abi.rs
@@ -80,7 +80,7 @@ impl ArgAssigner for Args {
            // Assign a stack location.
            let loc = ArgumentLoc::Stack(self.offset as i32);
            self.offset += self.pointer_bytes;
-            assert!(self.offset <= i32::MAX as u32);
+            debug_assert!(self.offset <= i32::MAX as u32);
            loc.into()
        }
    }
--- a/lib/cretonne/src/isa/riscv/binemit.rs
+++ b/lib/cretonne/src/isa/riscv/binemit.rs
@@ -106,7 +106,7 @@ fn put_i<CS: CodeSink + ?Sized>(bits: u16, rs1: RegUnit, imm: i64, rd: RegUnit,
 ///
 /// Encoding bits: `opcode[6:2] | (funct3 << 5)`
 fn put_u<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) {
-    let bits = bits as u32;
+    let bits = u32::from(bits);
    let opcode5 = bits & 0x1f;
    let rd = u32::from(rd) & 0x1f;

@@ -133,7 +133,7 @@ fn put_sb<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rs1: RegUnit, rs2: RegUnit
    let rs1 = u32::from(rs1) & 0x1f;
    let rs2 = u32::from(rs2) & 0x1f;

-    assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm);
+    debug_assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm);
    let imm = imm as u32;

    // 0-6: opcode
@@ -164,7 +164,7 @@ fn put_uj<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS
    let opcode5 = bits & 0x1f;
    let rd = u32::from(rd) & 0x1f;

-    assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm);
+    debug_assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm);
    let imm = imm as u32;

    // 0-6: opcode
--- a/lib/cretonne/src/isa/riscv/registers.py
+++ b/lib/cretonne/src/isa/riscv/registers.py
@@ -1 +0,0 @@
-
--- a/lib/cretonne/src/isa/riscv/settings.rs
+++ b/lib/cretonne/src/isa/riscv/settings.rs
@@ -5,7 +5,7 @@ use std::fmt;

 // Include code generated by `lib/cretonne/meta/gen_settings.py`. This file contains a public
 // `Flags` struct with an impl for all of the settings defined in
-// `lib/cretonne/meta/cretonne/settings.py`.
+// `lib/cretonne/meta/isa/riscv/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings-riscv.rs"));

 #[cfg(test)]
--- a/lib/cretonne/src/isa/stack.rs
+++ b/lib/cretonne/src/isa/stack.rs
@@ -41,12 +41,12 @@ impl StackRef {
        let slot = &frame[ss];
        let offset = if slot.kind == StackSlotKind::OutgoingArg {
            // Outgoing argument slots have offsets relative to our stack pointer.
-            slot.offset
+            slot.offset.unwrap()
        } else {
            // All other slots have offsets relative to our caller's stack frame.
            // Offset where SP is pointing. (All ISAs have stacks growing downwards.)
            let sp_offset = -(size as StackOffset);
-            slot.offset - sp_offset
+            slot.offset.unwrap() - sp_offset
        };
        StackRef {
            base: StackBase::SP,
--- a/lib/cretonne/src/legalizer/boundary.rs
+++ b/lib/cretonne/src/legalizer/boundary.rs
@@ -86,15 +86,15 @@ fn legalize_entry_params(func: &mut Function, entry: Ebb) {
                ArgumentPurpose::FramePointer => {}
                ArgumentPurpose::CalleeSaved => {}
                ArgumentPurpose::StructReturn => {
-                    assert!(!has_sret, "Multiple sret arguments found");
+                    debug_assert!(!has_sret, "Multiple sret arguments found");
                    has_sret = true;
                }
                ArgumentPurpose::VMContext => {
-                    assert!(!has_vmctx, "Multiple vmctx arguments found");
+                    debug_assert!(!has_vmctx, "Multiple vmctx arguments found");
                    has_vmctx = true;
                }
                ArgumentPurpose::SignatureId => {
-                    assert!(!has_sigid, "Multiple sigid arguments found");
+                    debug_assert!(!has_sigid, "Multiple sigid arguments found");
                    has_sigid = true;
                }
                _ => panic!("Unexpected special-purpose arg {}", abi_type),
@@ -104,7 +104,7 @@ fn legalize_entry_params(func: &mut Function, entry: Ebb) {
            // Compute the value we want for `arg` from the legalized ABI parameters.
            let mut get_arg = |func: &mut Function, ty| {
                let abi_type = func.signature.params[abi_arg];
-                assert_eq!(
+                debug_assert_eq!(
                    abi_type.purpose,
                    ArgumentPurpose::Normal,
                    "Can't legalize special-purpose argument"
@@ -119,7 +119,7 @@ fn legalize_entry_params(func: &mut Function, entry: Ebb) {
            let converted = convert_from_abi(&mut pos, arg_type, Some(arg), &mut get_arg);
            // The old `arg` is no longer an attached EBB argument, but there are probably still
            // uses of the value.
-            assert_eq!(pos.func.dfg.resolve_aliases(arg), converted);
+            debug_assert_eq!(pos.func.dfg.resolve_aliases(arg), converted);
        }
    }

@@ -139,19 +139,19 @@ fn legalize_entry_params(func: &mut Function, entry: Ebb) {
            }
            // These can be meaningfully added by `legalize_signature()`.
            ArgumentPurpose::Link => {
-                assert!(!has_link, "Multiple link parameters found");
+                debug_assert!(!has_link, "Multiple link parameters found");
                has_link = true;
            }
            ArgumentPurpose::StructReturn => {
-                assert!(!has_sret, "Multiple sret parameters found");
+                debug_assert!(!has_sret, "Multiple sret parameters found");
                has_sret = true;
            }
            ArgumentPurpose::VMContext => {
-                assert!(!has_vmctx, "Multiple vmctx parameters found");
+                debug_assert!(!has_vmctx, "Multiple vmctx parameters found");
                has_vmctx = true;
            }
            ArgumentPurpose::SignatureId => {
-                assert!(!has_sigid, "Multiple sigid parameters found");
+                debug_assert!(!has_sigid, "Multiple sigid parameters found");
                has_sigid = true;
            }
        }
@@ -181,7 +181,7 @@ where
    // We theoretically allow for call instructions that return a number of fixed results before
    // the call return values. In practice, it doesn't happen.
    let fixed_results = pos.func.dfg[call].opcode().constraints().fixed_results();
-    assert_eq!(fixed_results, 0, "Fixed results  on calls not supported");
+    debug_assert_eq!(fixed_results, 0, "Fixed results on calls not supported");

    let results = pos.func.dfg.detach_results(call);
    let mut next_res = 0;
@@ -210,7 +210,7 @@ where
                }
            };
            let v = convert_from_abi(pos, res_type, Some(res), &mut get_res);
-            assert_eq!(pos.func.dfg.resolve_aliases(res), v);
+            debug_assert_eq!(pos.func.dfg.resolve_aliases(res), v);
        }
    }

@@ -239,7 +239,7 @@ where
    let arg_type = match get_arg(pos.func, ty) {
        Ok(v) => {
            debug_assert_eq!(pos.func.dfg.value_type(v), ty);
-            assert_eq!(into_result, None);
+            debug_assert_eq!(into_result, None);
            return v;
        }
        Err(t) => t,
@@ -275,7 +275,7 @@ where
        }
        // Construct a `ty` by bit-casting from an integer type.
        ValueConversion::IntBits => {
-            assert!(!ty.is_int());
+            debug_assert!(!ty.is_int());
            let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
            let arg = convert_from_abi(pos, abi_ty, None, get_arg);
            pos.ins().with_results([into_result]).bitcast(ty, arg)
@@ -341,7 +341,7 @@ fn convert_to_abi<PutArg>(
            convert_to_abi(pos, cfg, hi, put_arg);
        }
        ValueConversion::IntBits => {
-            assert!(!ty.is_int());
+            debug_assert!(!ty.is_int());
            let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
            let arg = pos.ins().bitcast(abi_ty, value);
            convert_to_abi(pos, cfg, arg, put_arg);
@@ -556,7 +556,7 @@ pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph
    legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
        func.signature.returns[abi_arg]
    });
-    assert_eq!(pos.func.dfg.inst_variable_args(inst).len(), abi_args);
+    debug_assert_eq!(pos.func.dfg.inst_variable_args(inst).len(), abi_args);

    // Append special return arguments for any `sret`, `link`, and `vmctx` return values added to
    // the legalized signature. These values should simply be propagated from the entry block
--- a/lib/cretonne/src/legalizer/globalvar.rs
+++ b/lib/cretonne/src/legalizer/globalvar.rs
@@ -18,7 +18,7 @@ pub fn expand_global_addr(
    // Unpack the instruction.
    let gv = match func.dfg[inst] {
        ir::InstructionData::UnaryGlobalVar { opcode, global_var } => {
-            assert_eq!(opcode, ir::Opcode::GlobalAddr);
+            debug_assert_eq!(opcode, ir::Opcode::GlobalAddr);
            global_var
        }
        _ => panic!("Wanted global_addr: {}", func.dfg.display_inst(inst, None)),
--- a/lib/cretonne/src/legalizer/heap.rs
+++ b/lib/cretonne/src/legalizer/heap.rs
@@ -24,7 +24,7 @@ pub fn expand_heap_addr(
            arg,
            imm,
        } => {
-            assert_eq!(opcode, ir::Opcode::HeapAddr);
+            debug_assert_eq!(opcode, ir::Opcode::HeapAddr);
            (heap, arg, imm.into())
        }
        _ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst, None)),
--- a/lib/cretonne/src/legalizer/mod.rs
+++ b/lib/cretonne/src/legalizer/mod.rs
@@ -107,7 +107,7 @@ pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, is
 }

 // Include legalization patterns that were generated by `gen_legalizer.py` from the `XForms` in
-// `meta/cretonne/legalize.py`.
+// `lib/cretonne/meta/base/legalize.py`.
 //
 // Concretely, this defines private functions `narrow()`, and `expand()`.
 include!(concat!(env!("OUT_DIR"), "/legalizer.rs"));
@@ -248,7 +248,7 @@ fn expand_fconst(
    _isa: &TargetIsa,
 ) {
    let ty = func.dfg.value_type(func.dfg.first_result(inst));
-    assert!(!ty.is_vector(), "Only scalar fconst supported: {}", ty);
+    debug_assert!(!ty.is_vector(), "Only scalar fconst supported: {}", ty);

    // In the future, we may want to generate constant pool entries for these constants, but for
    // now use an `iconst` and a bit cast.
--- a/lib/cretonne/src/legalizer/split.rs
+++ b/lib/cretonne/src/legalizer/split.rs
@@ -128,7 +128,7 @@ fn split_any(
    while let Some(repair) = repairs.pop() {
        for (_, inst) in cfg.pred_iter(repair.ebb) {
            let branch_opc = pos.func.dfg[inst].opcode();
-            assert!(
+            debug_assert!(
                branch_opc.is_branch(),
                "Predecessor not a branch: {}",
                pos.func.dfg.display_inst(inst, None)
@@ -199,7 +199,7 @@ fn split_value(
            // This is an instruction result. See if the value was created by a `concat`
            // instruction.
            if let InstructionData::Binary { opcode, args, .. } = pos.func.dfg[inst] {
-                assert_eq!(num, 0);
+                debug_assert_eq!(num, 0);
                if opcode == concat {
                    reuse = Some((args[0], args[1]));
                }
--- a/lib/cretonne/src/lib.rs
+++ b/lib/cretonne/src/lib.rs
@@ -1,5 +1,8 @@
 //! Cretonne code generation library.
-#![deny(missing_docs)]
+
+#![deny(missing_docs,
+        trivial_numeric_casts,
+        unused_extern_crates)]

 // Turns on alloc feature if no_std
 #![cfg_attr(not(feature = "std"), no_std)]
@@ -46,11 +49,13 @@ mod abi;
 mod bitset;
 mod constant_hash;
 mod context;
+mod divconst_magic_numbers;
 mod iterators;
 mod legalizer;
 mod licm;
 mod partition_slice;
 mod predicates;
+mod preopt;
 mod ref_slice;
 mod regalloc;
 mod scoped_hash_map;
--- a/lib/cretonne/src/predicates.rs
+++ b/lib/cretonne/src/predicates.rs
@@ -1,7 +1,7 @@
 //! Predicate functions for testing instruction fields.
 //!
 //! This module defines functions that are used by the instruction predicates defined by
-//! `lib/cretonne/meta/cretonne/predicates.py` classes.
+//! `lib/cretonne/meta/cdsl/predicates.py` classes.
 //!
 //! The predicates the operate on integer fields use `Into<i64>` as a shared trait bound. This
 //! bound is implemented by all the native integer types as well as `Imm64`.
--- a/lib/cretonne/src/preopt.rs
+++ b/lib/cretonne/src/preopt.rs
@@ -0,0 +1,521 @@
+//! A pre-legalization rewriting pass.
+
+#![allow(non_snake_case)]
+
+use cursor::{Cursor, FuncCursor};
+use ir::dfg::ValueDef;
+use ir::{Function, InstructionData, Value, DataFlowGraph, InstBuilder, Type};
+use ir::Inst;
+use ir::types::{I32, I64};
+use ir::instructions::Opcode;
+use divconst_magic_numbers::{MU32, MU64, MS32, MS64};
+use divconst_magic_numbers::{magicU32, magicU64, magicS32, magicS64};
+use timing;
+
+
+//----------------------------------------------------------------------
+//
+// Pattern-match helpers and transformation for div and rem by constants.
+
+// Simple math helpers
+
+// if `x` is a power of two, or the negation thereof, return the power along
+// with a boolean that indicates whether `x` is negative. Else return None.
+#[inline]
+fn isPowerOf2_S32(x: i32) -> Option<(bool, u32)> {
+    // We have to special-case this because abs(x) isn't representable.
+    if x == -0x8000_0000 {
+        return Some((true, 31));
+    }
+    let abs_x = i32::wrapping_abs(x) as u32;
+    if abs_x.is_power_of_two() {
+        return Some((x < 0, abs_x.trailing_zeros()));
+    }
+    None
+}
+
+// Same comments as for isPowerOf2_S64 apply.
+#[inline]
+fn isPowerOf2_S64(x: i64) -> Option<(bool, u32)> {
+    // We have to special-case this because abs(x) isn't representable.
+    if x == -0x8000_0000_0000_0000 {
+        return Some((true, 63));
+    }
+    let abs_x = i64::wrapping_abs(x) as u64;
+    if abs_x.is_power_of_two() {
+        return Some((x < 0, abs_x.trailing_zeros()));
+    }
+    None
+}
+
+#[derive(Debug)]
+enum DivRemByConstInfo {
+    DivU32(Value, u32), // In all cases, the arguments are:
+    DivU64(Value, u64), // left operand, right operand
+    DivS32(Value, i32),
+    DivS64(Value, i64),
+    RemU32(Value, u32),
+    RemU64(Value, u64),
+    RemS32(Value, i32),
+    RemS64(Value, i64),
+}
+
+// Possibly create a DivRemByConstInfo from the given components, by
+// figuring out which, if any, of the 8 cases apply, and also taking care to
+// sanity-check the immediate.
+fn package_up_divrem_info(
+    argL: Value,
+    argL_ty: Type,
+    argRs: i64,
+    isSigned: bool,
+    isRem: bool,
+) -> Option<DivRemByConstInfo> {
+    let argRu: u64 = argRs as u64;
+    if !isSigned && argL_ty == I32 && argRu < 0x1_0000_0000 {
+        let con = if isRem {
+            DivRemByConstInfo::RemU32
+        } else {
+            DivRemByConstInfo::DivU32
+        };
+        return Some(con(argL, argRu as u32));
+    }
+    if !isSigned && argL_ty == I64 {
+        // unsigned 64, no range constraint
+        let con = if isRem {
+            DivRemByConstInfo::RemU64
+        } else {
+            DivRemByConstInfo::DivU64
+        };
+        return Some(con(argL, argRu));
+    }
+    if isSigned && argL_ty == I32 && (argRu <= 0x7fff_ffff || argRu >= 0xffff_ffff_8000_0000) {
+        let con = if isRem {
+            DivRemByConstInfo::RemS32
+        } else {
+            DivRemByConstInfo::DivS32
+        };
+        return Some(con(argL, argRu as i32));
+    }
+    if isSigned && argL_ty == I64 {
+        // signed 64, no range constraint
+        let con = if isRem {
+            DivRemByConstInfo::RemS64
+        } else {
+            DivRemByConstInfo::DivS64
+        };
+        return Some(con(argL, argRu as i64));
+    }
+    None
+}
+
+// Examine `idata` to see if it is a div or rem by a constant, and if so
+// return the operands, signedness, operation size and div-vs-rem-ness in a
+// handy bundle.
+fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
+    let idata: &InstructionData = &dfg[inst];
+
+    if let &InstructionData::BinaryImm { opcode, arg, imm } = idata {
+        let (isSigned, isRem) = match opcode {
+            Opcode::UdivImm => (false, false),
+            Opcode::UremImm => (false, true),
+            Opcode::SdivImm => (true, false),
+            Opcode::SremImm => (true, true),
+            _other => return None,
+        };
+        // Pull the operation size (type) from the left arg
+        let argL_ty = dfg.value_type(arg);
+        return package_up_divrem_info(arg, argL_ty, imm.into(), isSigned, isRem);
+    }
+
+    // TODO: should we actually bother to do this (that is, manually match
+    // the case that the second argument is an iconst)? Or should we assume
+    // that some previous constant propagation pass has pushed all such
+    // immediates to their use points, creating BinaryImm instructions
+    // instead? For now we take the conservative approach.
+    if let &InstructionData::Binary { opcode, args } = idata {
+        let (isSigned, isRem) = match opcode {
+            Opcode::Udiv => (false, false),
+            Opcode::Urem => (false, true),
+            Opcode::Sdiv => (true, false),
+            Opcode::Srem => (true, true),
+            _other => return None,
+        };
+        let argR: Value = args[1];
+        if let Some(simm64) = get_const(argR, dfg) {
+            let argL: Value = args[0];
+            // Pull the operation size (type) from the left arg
+            let argL_ty = dfg.value_type(argL);
+            return package_up_divrem_info(argL, argL_ty, simm64, isSigned, isRem);
+        }
+    }
+
+    None
+}
+
+// Actually do the transformation given a bundle containing the relevant
+// information. `divrem_info` describes a div or rem by a constant, that
+// `pos` currently points at, and `inst` is the associated instruction.
+// `inst` is replaced by a sequence of other operations that calculate the
+// same result. Note that there are various `divrem_info` cases where we
+// cannot do any transformation, in which case `inst` is left unchanged.
+fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCursor, inst: Inst) {
+    let isRem = match *divrem_info {
+        DivRemByConstInfo::DivU32(_, _) |
+        DivRemByConstInfo::DivU64(_, _) |
+        DivRemByConstInfo::DivS32(_, _) |
+        DivRemByConstInfo::DivS64(_, _) => false,
+        DivRemByConstInfo::RemU32(_, _) |
+        DivRemByConstInfo::RemU64(_, _) |
+        DivRemByConstInfo::RemS32(_, _) |
+        DivRemByConstInfo::RemS64(_, _) => true,
+    };
+
+    match divrem_info {
+
+        // -------------------- U32 --------------------
+
+        // U32 div, rem by zero: ignore
+        &DivRemByConstInfo::DivU32(_n1, 0) |
+        &DivRemByConstInfo::RemU32(_n1, 0) => {}
+
+        // U32 div by 1: identity
+        // U32 rem by 1: zero
+        &DivRemByConstInfo::DivU32(n1, 1) |
+        &DivRemByConstInfo::RemU32(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I32, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        // U32 div, rem by a power-of-2
+        &DivRemByConstInfo::DivU32(n1, d) |
+        &DivRemByConstInfo::RemU32(n1, d) if d.is_power_of_two() => {
+            debug_assert!(d >= 2);
+            // compute k where d == 2^k
+            let k = d.trailing_zeros();
+            debug_assert!(k >= 1 && k <= 31);
+            if isRem {
+                let mask = (1u64 << k) - 1;
+                pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
+            } else {
+                pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
+            }
+        }
+
+        // U32 div, rem by non-power-of-2
+        &DivRemByConstInfo::DivU32(n1, d) |
+        &DivRemByConstInfo::RemU32(n1, d) => {
+            debug_assert!(d >= 3);
+            let MU32 {
+                mulBy,
+                doAdd,
+                shiftBy,
+            } = magicU32(d);
+            let qf; // final quotient
+            let q0 = pos.ins().iconst(I32, mulBy as i64);
+            let q1 = pos.ins().umulhi(n1, q0);
+            if doAdd {
+                debug_assert!(shiftBy >= 1 && shiftBy <= 32);
+                let t1 = pos.ins().isub(n1, q1);
+                let t2 = pos.ins().ushr_imm(t1, 1);
+                let t3 = pos.ins().iadd(t2, q1);
+                // I never found any case where shiftBy == 1 here.
+                // So there's no attempt to fold out a zero shift.
+                debug_assert!(shiftBy != 1);
+                qf = pos.ins().ushr_imm(t3, (shiftBy - 1) as i64);
+            } else {
+                debug_assert!(shiftBy >= 0 && shiftBy <= 31);
+                // Whereas there are known cases here for shiftBy == 0.
+                if shiftBy > 0 {
+                    qf = pos.ins().ushr_imm(q1, shiftBy as i64);
+                } else {
+                    qf = q1;
+                }
+            }
+            // Now qf holds the final quotient. If necessary calculate the
+            // remainder instead.
+            if isRem {
+                let tt = pos.ins().imul_imm(qf, d as i64);
+                pos.func.dfg.replace(inst).isub(n1, tt);
+            } else {
+                pos.func.dfg.replace(inst).copy(qf);
+            }
+        }
+
+        // -------------------- U64 --------------------
+
+        // U64 div, rem by zero: ignore
+        &DivRemByConstInfo::DivU64(_n1, 0) |
+        &DivRemByConstInfo::RemU64(_n1, 0) => {}
+
+        // U64 div by 1: identity
+        // U64 rem by 1: zero
+        &DivRemByConstInfo::DivU64(n1, 1) |
+        &DivRemByConstInfo::RemU64(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I64, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        // U64 div, rem by a power-of-2
+        &DivRemByConstInfo::DivU64(n1, d) |
+        &DivRemByConstInfo::RemU64(n1, d) if d.is_power_of_two() => {
+            debug_assert!(d >= 2);
+            // compute k where d == 2^k
+            let k = d.trailing_zeros();
+            debug_assert!(k >= 1 && k <= 63);
+            if isRem {
+                let mask = (1u64 << k) - 1;
+                pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
+            } else {
+                pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
+            }
+        }
+
+        // U64 div, rem by non-power-of-2
+        &DivRemByConstInfo::DivU64(n1, d) |
+        &DivRemByConstInfo::RemU64(n1, d) => {
+            debug_assert!(d >= 3);
+            let MU64 {
+                mulBy,
+                doAdd,
+                shiftBy,
+            } = magicU64(d);
+            let qf; // final quotient
+            let q0 = pos.ins().iconst(I64, mulBy as i64);
+            let q1 = pos.ins().umulhi(n1, q0);
+            if doAdd {
+                debug_assert!(shiftBy >= 1 && shiftBy <= 64);
+                let t1 = pos.ins().isub(n1, q1);
+                let t2 = pos.ins().ushr_imm(t1, 1);
+                let t3 = pos.ins().iadd(t2, q1);
+                // I never found any case where shiftBy == 1 here.
+                // So there's no attempt to fold out a zero shift.
+                debug_assert!(shiftBy != 1);
+                qf = pos.ins().ushr_imm(t3, (shiftBy - 1) as i64);
+            } else {
+                debug_assert!(shiftBy >= 0 && shiftBy <= 63);
+                // Whereas there are known cases here for shiftBy == 0.
+                if shiftBy > 0 {
+                    qf = pos.ins().ushr_imm(q1, shiftBy as i64);
+                } else {
+                    qf = q1;
+                }
+            }
+            // Now qf holds the final quotient. If necessary calculate the
+            // remainder instead.
+            if isRem {
+                let tt = pos.ins().imul_imm(qf, d as i64);
+                pos.func.dfg.replace(inst).isub(n1, tt);
+            } else {
+                pos.func.dfg.replace(inst).copy(qf);
+            }
+        }
+
+        // -------------------- S32 --------------------
+
+        // S32 div, rem by zero or -1: ignore
+        &DivRemByConstInfo::DivS32(_n1, -1) |
+        &DivRemByConstInfo::RemS32(_n1, -1) |
+        &DivRemByConstInfo::DivS32(_n1, 0) |
+        &DivRemByConstInfo::RemS32(_n1, 0) => {}
+
+        // S32 div by 1: identity
+        // S32 rem by 1: zero
+        &DivRemByConstInfo::DivS32(n1, 1) |
+        &DivRemByConstInfo::RemS32(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I32, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        &DivRemByConstInfo::DivS32(n1, d) |
+        &DivRemByConstInfo::RemS32(n1, d) => {
+            if let Some((isNeg, k)) = isPowerOf2_S32(d) {
+                // k can be 31 only in the case that d is -2^31.
+                debug_assert!(k >= 1 && k <= 31);
+                let t1 = if k - 1 == 0 {
+                    n1
+                } else {
+                    pos.ins().sshr_imm(n1, (k - 1) as i64)
+                };
+                let t2 = pos.ins().ushr_imm(t1, (32 - k) as i64);
+                let t3 = pos.ins().iadd(n1, t2);
+                if isRem {
+                    // S32 rem by a power-of-2
+                    let t4 = pos.ins().band_imm(t3, i32::wrapping_neg(1 << k) as i64);
+                    // Curiously, we don't care here what the sign of d is.
+                    pos.func.dfg.replace(inst).isub(n1, t4);
+                } else {
+                    // S32 div by a power-of-2
+                    let t4 = pos.ins().sshr_imm(t3, k as i64);
+                    if isNeg {
+                        pos.func.dfg.replace(inst).irsub_imm(t4, 0);
+                    } else {
+                        pos.func.dfg.replace(inst).copy(t4);
+                    }
+                }
+            } else {
+                // S32 div, rem by a non-power-of-2
+                debug_assert!(d < -2 || d > 2);
+                let MS32 { mulBy, shiftBy } = magicS32(d);
+                let q0 = pos.ins().iconst(I32, mulBy as i64);
+                let q1 = pos.ins().smulhi(n1, q0);
+                let q2 = if d > 0 && mulBy < 0 {
+                    pos.ins().iadd(q1, n1)
+                } else if d < 0 && mulBy > 0 {
+                    pos.ins().isub(q1, n1)
+                } else {
+                    q1
+                };
+                debug_assert!(shiftBy >= 0 && shiftBy <= 31);
+                let q3 = if shiftBy == 0 {
+                    q2
+                } else {
+                    pos.ins().sshr_imm(q2, shiftBy as i64)
+                };
+                let t1 = pos.ins().ushr_imm(q3, 31);
+                let qf = pos.ins().iadd(q3, t1);
+                // Now qf holds the final quotient. If necessary calculate
+                // the remainder instead.
+                if isRem {
+                    let tt = pos.ins().imul_imm(qf, d as i64);
+                    pos.func.dfg.replace(inst).isub(n1, tt);
+                } else {
+                    pos.func.dfg.replace(inst).copy(qf);
+                }
+            }
+        }
+
+        // -------------------- S64 --------------------
+
+        // S64 div, rem by zero or -1: ignore
+        &DivRemByConstInfo::DivS64(_n1, -1) |
+        &DivRemByConstInfo::RemS64(_n1, -1) |
+        &DivRemByConstInfo::DivS64(_n1, 0) |
+        &DivRemByConstInfo::RemS64(_n1, 0) => {}
+
+        // S64 div by 1: identity
+        // S64 rem by 1: zero
+        &DivRemByConstInfo::DivS64(n1, 1) |
+        &DivRemByConstInfo::RemS64(n1, 1) => {
+            if isRem {
+                pos.func.dfg.replace(inst).iconst(I64, 0);
+            } else {
+                pos.func.dfg.replace(inst).copy(n1);
+            }
+        }
+
+        &DivRemByConstInfo::DivS64(n1, d) |
+        &DivRemByConstInfo::RemS64(n1, d) => {
+            if let Some((isNeg, k)) = isPowerOf2_S64(d) {
+                // k can be 63 only in the case that d is -2^63.
+                debug_assert!(k >= 1 && k <= 63);
+                let t1 = if k - 1 == 0 {
+                    n1
+                } else {
+                    pos.ins().sshr_imm(n1, (k - 1) as i64)
+                };
+                let t2 = pos.ins().ushr_imm(t1, (64 - k) as i64);
+                let t3 = pos.ins().iadd(n1, t2);
+                if isRem {
+                    // S64 rem by a power-of-2
+                    let t4 = pos.ins().band_imm(t3, i64::wrapping_neg(1 << k));
+                    // Curiously, we don't care here what the sign of d is.
+                    pos.func.dfg.replace(inst).isub(n1, t4);
+                } else {
+                    // S64 div by a power-of-2
+                    let t4 = pos.ins().sshr_imm(t3, k as i64);
+                    if isNeg {
+                        pos.func.dfg.replace(inst).irsub_imm(t4, 0);
+                    } else {
+                        pos.func.dfg.replace(inst).copy(t4);
+                    }
+                }
+            } else {
+                // S64 div, rem by a non-power-of-2
+                debug_assert!(d < -2 || d > 2);
+                let MS64 { mulBy, shiftBy } = magicS64(d);
+                let q0 = pos.ins().iconst(I64, mulBy);
+                let q1 = pos.ins().smulhi(n1, q0);
+                let q2 = if d > 0 && mulBy < 0 {
+                    pos.ins().iadd(q1, n1)
+                } else if d < 0 && mulBy > 0 {
+                    pos.ins().isub(q1, n1)
+                } else {
+                    q1
+                };
+                debug_assert!(shiftBy >= 0 && shiftBy <= 63);
+                let q3 = if shiftBy == 0 {
+                    q2
+                } else {
+                    pos.ins().sshr_imm(q2, shiftBy as i64)
+                };
+                let t1 = pos.ins().ushr_imm(q3, 63);
+                let qf = pos.ins().iadd(q3, t1);
+                // Now qf holds the final quotient. If necessary calculate
+                // the remainder instead.
+                if isRem {
+                    let tt = pos.ins().imul_imm(qf, d);
+                    pos.func.dfg.replace(inst).isub(n1, tt);
+                } else {
+                    pos.func.dfg.replace(inst).copy(qf);
+                }
+            }
+        }
+
+    }
+}
+
+
+//----------------------------------------------------------------------
+//
+// General pattern-match helpers.
+
+// Find out if `value` actually resolves to a constant, and if so what its
+// value is.
+fn get_const(value: Value, dfg: &DataFlowGraph) -> Option<i64> {
+    match dfg.value_def(value) {
+        ValueDef::Result(definingInst, resultNo) => {
+            let definingIData: &InstructionData = &dfg[definingInst];
+            if let &InstructionData::UnaryImm { opcode, imm } = definingIData {
+                if opcode == Opcode::Iconst && resultNo == 0 {
+                    return Some(imm.into());
+                }
+            }
+            None
+        }
+        ValueDef::Param(_definingEbb, _paramNo) => None,
+    }
+}
+
+
+//----------------------------------------------------------------------
+//
+// The main pre-opt pass.
+
+pub fn do_preopt(func: &mut Function) {
+    let _tt = timing::preopt();
+    let mut pos = FuncCursor::new(func);
+    while let Some(_ebb) = pos.next_ebb() {
+
+        while let Some(inst) = pos.next_inst() {
+
+            //-- BEGIN -- division by constants ----------------
+
+            let mb_dri = get_div_info(inst, &pos.func.dfg);
+            if let Some(divrem_info) = mb_dri {
+                do_divrem_transformation(&divrem_info, &mut pos, inst);
+                continue;
+            }
+
+            //-- END -- division by constants ------------------
+        }
+    }
+}
--- a/lib/cretonne/src/regalloc/affinity.rs
+++ b/lib/cretonne/src/regalloc/affinity.rs
@@ -13,7 +13,7 @@ use ir::{AbiParam, ArgumentLoc};
 use isa::{TargetIsa, RegInfo, RegClassIndex, OperandConstraint, ConstraintKind};

 /// Preferred register allocation for an SSA value.
-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Debug)]
 pub enum Affinity {
    /// No affinity.
    ///
--- a/lib/cretonne/src/regalloc/coalescing.rs
+++ b/lib/cretonne/src/regalloc/coalescing.rs
@@ -1,9 +1,9 @@
-//! Constructing conventional SSA form.
+//! Constructing Conventional SSA form.
 //!
-//! Conventional SSA form is a subset of SSA form where any (transitively) phi-related values do
-//! not interfere. We construct CSSA by building virtual registers that are as large as possible
-//! and inserting copies where necessary such that all argument values passed to an EBB parameter
-//! will belong to the same virtual register as the EBB parameter value itself.
+//! Conventional SSA (CSSA) form is a subset of SSA form where any (transitively) phi-related
+//! values do not interfere. We construct CSSA by building virtual registers that are as large as
+//! possible and inserting copies where necessary such that all argument values passed to an EBB
+//! parameter will belong to the same virtual register as the EBB parameter value itself.

 use cursor::{Cursor, EncCursor};
 use dbg::DisplayList;
@@ -27,7 +27,7 @@ use timing;
 // The coalescing algorithm implemented follows this paper fairly closely:
 //
 //     Budimlic, Z., Cooper, K. D., Harvey, T. J., et al. (2002). Fast copy coalescing and
-//     live-range identification (Vol. 37, pp. 25–32). ACM. http://doi.org/10.1145/543552.512534
+//     live-range identification (Vol. 37, pp. 25–32). ACM. https://doi.org/10.1145/543552.512534
 //
 // We use a more efficient dominator forest representation (a linear stack) described here:
 //
@@ -104,7 +104,7 @@ impl Coalescing {
        self.backedges.clear();
    }

-    /// Convert `func` to conventional SSA form and build virtual registers in the process.
+    /// Convert `func` to Conventional SSA form and build virtual registers in the process.
    pub fn conventional_ssa(
        &mut self,
        isa: &TargetIsa,
@@ -239,7 +239,7 @@ impl<'a> Context<'a> {
                // 1. It is defined in a dominating EBB and live-in to `ebb`.
                // 2. If is itself a parameter value for `ebb`. This case should already have been
                //    eliminated by `isolate_conflicting_params()`.
-                assert!(
+                debug_assert!(
                    lr.def() != ebb.into(),
                    "{} parameter {} was missed by isolate_conflicting_params()",
                    ebb,
@@ -495,8 +495,8 @@ impl<'a> Context<'a> {
        // Second everything else in reverse layout order. Again, short forward branches get merged
        // first. There can also be backwards branches mixed in here, though, as long as they are
        // not loop backedges.
-        assert!(self.predecessors.is_empty());
-        assert!(self.backedges.is_empty());
+        debug_assert!(self.predecessors.is_empty());
+        debug_assert!(self.backedges.is_empty());
        for (pred_ebb, pred_inst) in self.cfg.pred_iter(ebb) {
            if self.preorder.dominates(ebb, pred_ebb) {
                self.backedges.push(pred_inst);
@@ -958,7 +958,8 @@ impl VirtualCopies {

    /// Indicate that `param` is now fully merged.
    pub fn merged_param(&mut self, param: Value, func: &Function) {
-        assert_eq!(self.params.pop(), Some(param));
+        let popped = self.params.pop();
+        debug_assert_eq!(popped, Some(param));

        // The domtree pre-order in `self.params` guarantees that all parameters defined at the
        // same EBB will be adjacent. This means we can see when all parameters at an EBB have been
--- a/lib/cretonne/src/regalloc/coloring.rs
+++ b/lib/cretonne/src/regalloc/coloring.rs
@@ -23,7 +23,7 @@
 //!    operands are allowed to read spilled values, but each such instance must be counted as using
 //!    a register.
 //!
-//! 5. The code must be in conventional SSA form. Among other things, this means that values passed
+//! 5. The code must be in Conventional SSA form. Among other things, this means that values passed
 //!    as arguments when branching to an EBB must belong to the same virtual register as the
 //!    corresponding EBB argument value.
 //!
@@ -246,7 +246,7 @@ impl<'a> Context<'a> {
    /// Return the set of remaining allocatable registers after filtering out the dead arguments.
    fn color_entry_params(&mut self, args: &[LiveValue]) -> AvailableRegs {
        let sig = &self.cur.func.signature;
-        assert_eq!(sig.params.len(), args.len());
+        debug_assert_eq!(sig.params.len(), args.len());

        let mut regs = AvailableRegs::new(&self.usable_regs);

@@ -271,7 +271,7 @@ impl<'a> Context<'a> {

                }
                // The spiller will have assigned an incoming stack slot already.
-                Affinity::Stack => assert!(abi.location.is_stack()),
+                Affinity::Stack => debug_assert!(abi.location.is_stack()),
                // This is a ghost value, unused in the function. Don't assign it to a location
                // either.
                Affinity::None => {}
@@ -340,7 +340,7 @@ impl<'a> Context<'a> {
            } else {
                // This is a multi-way branch like `br_table`. We only support arguments on
                // single-destination branches.
-                assert_eq!(
+                debug_assert_eq!(
                    self.cur.func.dfg.inst_variable_args(inst).len(),
                    0,
                    "Can't handle EBB arguments: {}",
@@ -586,7 +586,7 @@ impl<'a> Context<'a> {
        // Now handle the EBB arguments.
        let br_args = self.cur.func.dfg.inst_variable_args(inst);
        let dest_args = self.cur.func.dfg.ebb_params(dest);
-        assert_eq!(br_args.len(), dest_args.len());
+        debug_assert_eq!(br_args.len(), dest_args.len());
        for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) {
            // The first time we encounter a branch to `dest`, we get to pick the location. The
            // following times we see a branch to `dest`, we must follow suit.
@@ -631,7 +631,7 @@ impl<'a> Context<'a> {
    fn color_ebb_params(&mut self, inst: Inst, dest: Ebb) {
        let br_args = self.cur.func.dfg.inst_variable_args(inst);
        let dest_args = self.cur.func.dfg.ebb_params(dest);
-        assert_eq!(br_args.len(), dest_args.len());
+        debug_assert_eq!(br_args.len(), dest_args.len());
        for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) {
            match self.cur.func.locations[dest_arg] {
                ValueLoc::Unassigned => {
@@ -741,7 +741,7 @@ impl<'a> Context<'a> {
        // It's technically possible for a call instruction to have fixed results before the
        // variable list of results, but we have no known instances of that.
        // Just assume all results are variable return values.
-        assert_eq!(defs.len(), self.cur.func.dfg.signatures[sig].returns.len());
+        debug_assert_eq!(defs.len(), self.cur.func.dfg.signatures[sig].returns.len());
        for (i, lv) in defs.iter().enumerate() {
            let abi = self.cur.func.dfg.signatures[sig].returns[i];
            if let ArgumentLoc::Reg(reg) = abi.location {
@@ -787,7 +787,7 @@ impl<'a> Context<'a> {
            }

            let ok = self.solver.add_fixed_output(rc, reg);
-            assert!(ok, "Couldn't clear fixed output interference for {}", value);
+            debug_assert!(ok, "Couldn't clear fixed output interference for {}", value);
        }
        self.cur.func.locations[value] = ValueLoc::Reg(reg);
    }
@@ -858,11 +858,8 @@ impl<'a> Context<'a> {
                Ok(regs) => return regs,
                Err(SolverError::Divert(rc)) => {
                    // Do we have any live-through `rc` registers that are not already variables?
-                    assert!(
-                        self.try_add_var(rc, throughs),
-                        "Ran out of registers in {}",
-                        rc
-                    );
+                    let added = self.try_add_var(rc, throughs);
+                    debug_assert!(added, "Ran out of registers in {}", rc);
                }
                Err(SolverError::Global(value)) => {
                    dbg!("Not enough global registers for {}, trying as local", value);
@@ -908,7 +905,7 @@ impl<'a> Context<'a> {

        let inst = self.cur.current_inst().expect("Not on an instruction");
        let ctx = self.liveness.context(&self.cur.func.layout);
-        match self.cur.func.dfg[inst].analyze_branch(&self.cur.func.dfg.value_lists) {
+        match self.cur.func.dfg.analyze_branch(inst) {
            NotABranch => false,
            SingleDest(ebb, _) => {
                let lr = &self.liveness[value];
@@ -941,7 +938,7 @@ impl<'a> Context<'a> {
        // It is very unlikely (impossible?) that we would need more than one spill per top-level
        // register class, so avoid allocation by using a fixed array here.
        let mut slot = [PackedOption::default(); 8];
-        assert!(spills <= slot.len(), "Too many spills ({})", spills);
+        debug_assert!(spills <= slot.len(), "Too many spills ({})", spills);

        for m in self.solver.moves() {
            match *m {
--- a/lib/cretonne/src/regalloc/context.rs
+++ b/lib/cretonne/src/regalloc/context.rs
@@ -90,7 +90,7 @@ impl Context {
            verify_liveness(isa, func, cfg, &self.liveness)?;
        }

-        // Pass: Coalesce and create conventional SSA form.
+        // Pass: Coalesce and create Conventional SSA form.
        self.coalescing.conventional_ssa(
            isa,
            func,
--- a/lib/cretonne/src/regalloc/live_value_tracker.rs
+++ b/lib/cretonne/src/regalloc/live_value_tracker.rs
@@ -208,7 +208,7 @@ impl LiveValueTracker {
        let first_arg = self.live.values.len();
        for &value in dfg.ebb_params(ebb) {
            let lr = &liveness[value];
-            assert_eq!(lr.def(), ebb.into());
+            debug_assert_eq!(lr.def(), ebb.into());
            match lr.def_local_end().into() {
                ExpandedProgramPoint::Inst(endpoint) => {
                    self.live.push(value, endpoint, lr);
@@ -216,7 +216,7 @@ impl LiveValueTracker {
                ExpandedProgramPoint::Ebb(local_ebb) => {
                    // This is a dead EBB parameter which is not even live into the first
                    // instruction in the EBB.
-                    assert_eq!(
+                    debug_assert_eq!(
                        local_ebb,
                        ebb,
                        "EBB parameter live range ends at wrong EBB header"
@@ -261,7 +261,7 @@ impl LiveValueTracker {
    ) -> (&[LiveValue], &[LiveValue], &[LiveValue]) {
        // Save a copy of the live values before any branches or jumps that could be somebody's
        // immediate dominator.
-        match dfg[inst].analyze_branch(&dfg.value_lists) {
+        match dfg.analyze_branch(inst) {
            BranchInfo::NotABranch => {}
            _ => self.save_idom_live_set(inst),
        }
@@ -274,7 +274,7 @@ impl LiveValueTracker {
        let first_def = self.live.values.len();
        for &value in dfg.inst_results(inst) {
            let lr = &liveness[value];
-            assert_eq!(lr.def(), inst.into());
+            debug_assert_eq!(lr.def(), inst.into());
            match lr.def_local_end().into() {
                ExpandedProgramPoint::Inst(endpoint) => {
                    self.live.push(value, endpoint, lr);
--- a/lib/cretonne/src/regalloc/liveness.rs
+++ b/lib/cretonne/src/regalloc/liveness.rs
@@ -252,7 +252,7 @@ fn extend_to_use(
    forest: &mut LiveRangeForest,
 ) {
    // This is our scratch working space, and we'll leave it empty when we return.
-    assert!(worklist.is_empty());
+    debug_assert!(worklist.is_empty());

    // Extend the range locally in `ebb`.
    // If there already was a live interval in that block, we're done.
@@ -339,7 +339,7 @@ impl Liveness {
        let old = self.ranges.insert(
            LiveRange::new(value, def.into(), affinity),
        );
-        assert!(old.is_none(), "{} already has a live range", value);
+        debug_assert!(old.is_none(), "{} already has a live range", value);
    }

    /// Move the definition of `value` to `def`.
@@ -368,7 +368,7 @@ impl Liveness {
        debug_assert_eq!(Some(ebb), layout.inst_ebb(user));
        let lr = self.ranges.get_mut(value).expect("Value has no live range");
        let livein = lr.extend_in_ebb(ebb, user, layout, &mut self.forest);
-        assert!(!livein, "{} should already be live in {}", value, ebb);
+        debug_assert!(!livein, "{} should already be live in {}", value, ebb);
        &mut lr.affinity
    }

--- a/lib/cretonne/src/regalloc/liverange.rs
+++ b/lib/cretonne/src/regalloc/liverange.rs
@@ -253,7 +253,7 @@ impl<PO: ProgramOrder> GenLiveRange<PO> {
            order.cmp(to, self.def_begin) != Ordering::Less
        {
            let to_pp = to.into();
-            assert_ne!(
+            debug_assert_ne!(
                to_pp,
                self.def_begin,
                "Can't use value in the defining instruction."
--- a/lib/cretonne/src/regalloc/reload.rs
+++ b/lib/cretonne/src/regalloc/reload.rs
@@ -146,7 +146,7 @@ impl<'a> Context<'a> {
        );

        if self.cur.func.layout.entry_block() == Some(ebb) {
-            assert_eq!(liveins.len(), 0);
+            debug_assert_eq!(liveins.len(), 0);
            self.visit_entry_params(ebb, args);
        } else {
            self.visit_ebb_params(ebb, args);
@@ -156,7 +156,7 @@ impl<'a> Context<'a> {
    /// Visit the parameters on the entry block.
    /// These values have ABI constraints from the function signature.
    fn visit_entry_params(&mut self, ebb: Ebb, args: &[LiveValue]) {
-        assert_eq!(self.cur.func.signature.params.len(), args.len());
+        debug_assert_eq!(self.cur.func.signature.params.len(), args.len());
        self.cur.goto_first_inst(ebb);

        for (arg_idx, arg) in args.iter().enumerate() {
@@ -176,7 +176,7 @@ impl<'a> Context<'a> {
                    }
                }
                ArgumentLoc::Stack(_) => {
-                    assert!(arg.affinity.is_stack());
+                    debug_assert!(arg.affinity.is_stack());
                }
                ArgumentLoc::Unassigned => panic!("Unexpected ABI location"),
            }
@@ -204,7 +204,7 @@ impl<'a> Context<'a> {
        );

        // Identify reload candidates.
-        assert!(self.candidates.is_empty());
+        debug_assert!(self.candidates.is_empty());
        self.find_candidates(inst, constraints);

        // Insert fill instructions before `inst` and replace `cand.value` with the filled value.
@@ -299,7 +299,7 @@ impl<'a> Context<'a> {
        }
    }

-    // Find reload candidates for `inst` and add them to `self.condidates`.
+    // Find reload candidates for `inst` and add them to `self.candidates`.
    //
    // These are uses of spilled values where the operand constraint requires a register.
    fn find_candidates(&mut self, inst: Inst, constraints: &RecipeConstraints) {
@@ -376,7 +376,7 @@ fn handle_abi_args(
    isa: &TargetIsa,
    liveness: &Liveness,
 ) {
-    assert_eq!(abi_types.len(), var_args.len());
+    debug_assert_eq!(abi_types.len(), var_args.len());
    for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) {
        if abi.location.is_reg() {
            let lv = liveness.get(arg).expect("Missing live range for ABI arg");
--- a/lib/cretonne/src/regalloc/solver.rs
+++ b/lib/cretonne/src/regalloc/solver.rs
@@ -566,7 +566,7 @@ impl Solver {
                dbg!("-> converting variable {} to a fixed constraint", v);
                // The spiller is responsible for ensuring that all constraints on the uses of a
                // value are compatible.
-                assert!(
+                debug_assert!(
                    v.constraint.contains(to),
                    "Incompatible constraints for {}",
                    value
@@ -666,7 +666,7 @@ impl Solver {
            // No variable, then it must be a fixed reassignment.
            if let Some(a) = self.assignments.get(value) {
                dbg!("-> already fixed assignment {}", a);
-                assert!(
+                debug_assert!(
                    constraint.contains(a.to),
                    "Incompatible constraints for {}",
                    value
@@ -709,7 +709,7 @@ impl Solver {
    /// Call this method to indicate that there will be no more fixed input reassignments added
    /// and prepare for the output side constraints.
    pub fn inputs_done(&mut self) {
-        assert!(!self.has_fixed_input_conflicts());
+        debug_assert!(!self.has_fixed_input_conflicts());

        // At this point, `regs_out` contains the `to` side of the input reassignments, and the
        // `from` side has already been marked as available in `regs_in`.
@@ -747,7 +747,7 @@ impl Solver {
        // interference constraints on the output side.
        // Variables representing tied operands will get their `is_output` flag set again later.
        if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) {
-            assert!(v.is_input);
+            debug_assert!(v.is_input);
            v.is_output = false;
            return;
        }
@@ -783,7 +783,7 @@ impl Solver {

        // Check if a variable was created.
        if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) {
-            assert!(v.is_input);
+            debug_assert!(v.is_input);
            v.is_output = true;
            v.is_global = is_global;
            return None;
@@ -1027,7 +1027,7 @@ impl Solver {
    /// Returns the number of spills that had to be emitted.
    pub fn schedule_moves(&mut self, regs: &AllocatableSet) -> usize {
        self.collect_moves();
-        assert!(self.fills.is_empty());
+        debug_assert!(self.fills.is_empty());

        let mut num_spill_slots = 0;
        let mut avail = regs.clone();
--- a/lib/cretonne/src/regalloc/spilling.rs
+++ b/lib/cretonne/src/regalloc/spilling.rs
@@ -243,7 +243,7 @@ impl<'a> Context<'a> {
        debug_assert_eq!(self.cur.current_ebb(), Some(ebb));

        // We may need to resolve register constraints if there are any noteworthy uses.
-        assert!(self.reg_uses.is_empty());
+        debug_assert!(self.reg_uses.is_empty());
        self.collect_reg_uses(inst, ebb, constraints);

        // Calls usually have fixed register uses.
--- a/lib/cretonne/src/regalloc/virtregs.rs
+++ b/lib/cretonne/src/regalloc/virtregs.rs
@@ -141,7 +141,7 @@ impl VirtRegs {
        func: &Function,
        preorder: &DominatorTreePreorder,
    ) -> VirtReg {
-        assert_eq!(self.get(single), None, "Expected singleton {}", single);
+        debug_assert_eq!(self.get(single), None, "Expected singleton {}", single);

        // Make sure `big` has a vreg.
        let vreg = self.get(big).unwrap_or_else(|| {
@@ -209,7 +209,7 @@ impl VirtRegs {
            }
        }

-        assert_eq!(
+        debug_assert_eq!(
            values.len(),
            singletons + cleared,
            "Can't unify partial virtual registers"
--- a/lib/cretonne/src/result.rs
+++ b/lib/cretonne/src/result.rs
@@ -29,7 +29,7 @@ pub enum CtonError {
    /// Cretonne can compile very large and complicated functions, but the [implementation has
    /// limits][limits] that cause compilation to fail when they are exceeded.
    ///
-    /// [limits]: http://cretonne.readthedocs.io/en/latest/langref.html#implementation-limits
+    /// [limits]: https://cretonne.readthedocs.io/en/latest/langref.html#implementation-limits
    #[fail(display = "Implementation limit exceeded")]
    ImplLimitExceeded,

--- a/lib/cretonne/src/settings.rs
+++ b/lib/cretonne/src/settings.rs
@@ -312,7 +312,7 @@ pub mod detail {
 }

 // Include code generated by `meta/gen_settings.py`. This file contains a public `Flags` struct
-// with an impl for all of the settings defined in `meta/cretonne/settings.py`.
+// with an impl for all of the settings defined in `lib/cretonne/meta/base/settings.py`.
 include!(concat!(env!("OUT_DIR"), "/settings.rs"));

 /// Wrapper containing flags and optionally a `TargetIsa` trait object.
--- a/lib/cretonne/src/stack_layout.rs
+++ b/lib/cretonne/src/stack_layout.rs
@@ -7,8 +7,8 @@ use std::cmp::{min, max};

 /// Compute the stack frame layout.
 ///
-/// Determine the total size of this stack frame and assign offsets to all `Spill` and `Local`
-/// stack slots.
+/// Determine the total size of this stack frame and assign offsets to all `Spill` and
+/// `Explicit` stack slots.
 ///
 /// The total frame size will be a multiple of `alignment` which must be a power of two.
 ///
@@ -19,13 +19,13 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> Result<Stac
    // Each object and the whole stack frame must fit in 2 GB such that any relative offset within
    // the frame fits in a `StackOffset`.
    let max_size = StackOffset::max_value() as StackSize;
-    assert!(alignment.is_power_of_two() && alignment <= max_size);
+    debug_assert!(alignment.is_power_of_two() && alignment <= max_size);

    // We assume a stack that grows toward lower addresses as implemented by modern ISAs. The
    // stack layout from high to low addresses will be:
    //
    // 1. incoming arguments.
-    // 2. spills + locals.
+    // 2. spills + explicits.
    // 3. outgoing arguments.
    //
    // The incoming arguments can have both positive and negative offsets. A negative offset
@@ -48,40 +48,44 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> Result<Stac

        match slot.kind {
            StackSlotKind::IncomingArg => {
-                incoming_min = min(incoming_min, slot.offset);
+                incoming_min = min(incoming_min, slot.offset.unwrap());
            }
            StackSlotKind::OutgoingArg => {
-                let offset = slot.offset.checked_add(slot.size as StackOffset).ok_or(
-                    CtonError::ImplLimitExceeded,
-                )?;
+                let offset = slot.offset
+                    .unwrap()
+                    .checked_add(slot.size as StackOffset)
+                    .ok_or(CtonError::ImplLimitExceeded)?;
                outgoing_max = max(outgoing_max, offset);
            }
            StackSlotKind::SpillSlot |
-            StackSlotKind::Local |
+            StackSlotKind::ExplicitSlot |
            StackSlotKind::EmergencySlot => {
-                // Determine the smallest alignment of any local or spill slot.
+                // Determine the smallest alignment of any explicit or spill slot.
                min_align = slot.alignment(min_align);
            }
        }
    }

-    // Lay out spill slots and locals below the incoming arguments.
+    // Lay out spill slots and explicit slots below the incoming arguments.
    // The offset is negative, growing downwards.
    // Start with the smallest alignments for better packing.
    let mut offset = incoming_min;
-    assert!(min_align.is_power_of_two());
+    debug_assert!(min_align.is_power_of_two());
    while min_align <= alignment {
        for ss in frame.keys() {
            let slot = frame[ss].clone();

-            // Pick out locals and spill slots with exact alignment `min_align`.
+            // Pick out explicit and spill slots with exact alignment `min_align`.
            match slot.kind {
-                StackSlotKind::SpillSlot | StackSlotKind::Local => {
+                StackSlotKind::SpillSlot |
+                StackSlotKind::ExplicitSlot |
+                StackSlotKind::EmergencySlot => {
                    if slot.alignment(alignment) != min_align {
                        continue;
                    }
                }
-                _ => continue,
+                StackSlotKind::IncomingArg |
+                StackSlotKind::OutgoingArg => continue,
            }

            offset = offset.checked_sub(slot.size as StackOffset).ok_or(
@@ -110,7 +114,7 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> Result<Stac

 #[cfg(test)]
 mod tests {
-    use ir::StackSlots;
+    use ir::{StackSlots, StackSlotData, StackSlotKind};
    use ir::types;
    use super::layout_stack;
    use ir::stackslot::StackOffset;
@@ -130,64 +134,82 @@ mod tests {

        assert_eq!(layout_stack(sss, 1), Ok(0));
        assert_eq!(layout_stack(sss, 16), Ok(0));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));

        // Add some spill slots.
        let ss0 = sss.make_spill_slot(types::I64);
        let ss1 = sss.make_spill_slot(types::I32);

        assert_eq!(layout_stack(sss, 1), Ok(12));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
-        assert_eq!(sss[ss0].offset, -8);
-        assert_eq!(sss[ss1].offset, -12);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));
+        assert_eq!(sss[ss0].offset, Some(-8));
+        assert_eq!(sss[ss1].offset, Some(-12));

        assert_eq!(layout_stack(sss, 16), Ok(16));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
-        assert_eq!(sss[ss0].offset, -16);
-        assert_eq!(sss[ss1].offset, -4);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));
+        assert_eq!(sss[ss0].offset, Some(-16));
+        assert_eq!(sss[ss1].offset, Some(-4));

        // An incoming argument with negative offset counts towards the total frame size, but it
        // should still pack nicely with the spill slots.
        let in2 = sss.make_incoming_arg(types::I32, -4);

        assert_eq!(layout_stack(sss, 1), Ok(16));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
-        assert_eq!(sss[in2].offset, -4);
-        assert_eq!(sss[ss0].offset, -12);
-        assert_eq!(sss[ss1].offset, -16);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));
+        assert_eq!(sss[in2].offset, Some(-4));
+        assert_eq!(sss[ss0].offset, Some(-12));
+        assert_eq!(sss[ss1].offset, Some(-16));

        assert_eq!(layout_stack(sss, 16), Ok(16));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
-        assert_eq!(sss[in2].offset, -4);
-        assert_eq!(sss[ss0].offset, -16);
-        assert_eq!(sss[ss1].offset, -8);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));
+        assert_eq!(sss[in2].offset, Some(-4));
+        assert_eq!(sss[ss0].offset, Some(-16));
+        assert_eq!(sss[ss1].offset, Some(-8));

        // Finally, make sure there is room for the outgoing args.
        let out0 = sss.get_outgoing_arg(types::I32, 0);

        assert_eq!(layout_stack(sss, 1), Ok(20));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
-        assert_eq!(sss[in2].offset, -4);
-        assert_eq!(sss[ss0].offset, -12);
-        assert_eq!(sss[ss1].offset, -16);
-        assert_eq!(sss[out0].offset, 0);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));
+        assert_eq!(sss[in2].offset, Some(-4));
+        assert_eq!(sss[ss0].offset, Some(-12));
+        assert_eq!(sss[ss1].offset, Some(-16));
+        assert_eq!(sss[out0].offset, Some(0));

        assert_eq!(layout_stack(sss, 16), Ok(32));
-        assert_eq!(sss[in0].offset, 0);
-        assert_eq!(sss[in1].offset, 8);
-        assert_eq!(sss[in2].offset, -4);
-        assert_eq!(sss[ss0].offset, -16);
-        assert_eq!(sss[ss1].offset, -8);
-        assert_eq!(sss[out0].offset, 0);
+        assert_eq!(sss[in0].offset, Some(0));
+        assert_eq!(sss[in1].offset, Some(8));
+        assert_eq!(sss[in2].offset, Some(-4));
+        assert_eq!(sss[ss0].offset, Some(-16));
+        assert_eq!(sss[ss1].offset, Some(-8));
+        assert_eq!(sss[out0].offset, Some(0));

        // Also test that an unsupported offset is rejected.
        sss.get_outgoing_arg(types::I8, StackOffset::max_value() - 1);
        assert_eq!(layout_stack(sss, 1), Err(CtonError::ImplLimitExceeded));
    }
+
+    #[test]
+    fn slot_kinds() {
+        let sss = &mut StackSlots::new();
+
+        // Add some slots of various kinds.
+        let ss0 = sss.make_spill_slot(types::I32);
+        let ss1 = sss.push(StackSlotData::new(
+            StackSlotKind::ExplicitSlot,
+            types::I32.bytes(),
+        ));
+        let ss2 = sss.get_emergency_slot(types::I32, &[]);
+
+        assert_eq!(layout_stack(sss, 1), Ok(12));
+        assert_eq!(sss[ss0].offset, Some(-4));
+        assert_eq!(sss[ss1].offset, Some(-8));
+        assert_eq!(sss[ss2].offset, Some(-12));
+    }
 }
--- a/lib/cretonne/src/timing.rs
+++ b/lib/cretonne/src/timing.rs
@@ -55,6 +55,7 @@ define_passes!{
    flowgraph: "Control flow graph",
    domtree: "Dominator tree",
    loop_analysis: "Loop analysis",
+    preopt: "Pre-legalization rewriting",
    legalize: "Legalization",
    gvn: "Global value numbering",
    licm: "Loop invariant code motion",
@@ -186,7 +187,7 @@ mod details {
            let duration = self.start.elapsed();
            dbg!("timing: Ending {}", self.pass);
            let old_cur = CURRENT_PASS.with(|p| p.replace(self.prev));
-            assert_eq!(self.pass, old_cur, "Timing tokens dropped out of order");
+            debug_assert_eq!(self.pass, old_cur, "Timing tokens dropped out of order");
            PASS_TIME.with(|rc| {
                let mut table = rc.borrow_mut();
                table.pass[self.pass.idx()].total += duration;
--- a/lib/cretonne/src/verifier/locations.rs
+++ b/lib/cretonne/src/verifier/locations.rs
@@ -207,14 +207,14 @@ impl<'a> LocationVerifier<'a> {
                            slot.kind
                        );
                    }
-                    if slot.offset != offset {
+                    if slot.offset.unwrap() != offset {
                        return err!(
                            inst,
                            "ABI expects {} at stack offset {}, but {} is at {}",
                            value,
                            offset,
                            ss,
-                            slot.offset
+                            slot.offset.unwrap()
                        );
                    }
                } else {
@@ -274,7 +274,7 @@ impl<'a> LocationVerifier<'a> {
        };
        let dfg = &self.func.dfg;

-        match dfg[inst].analyze_branch(&dfg.value_lists) {
+        match dfg.analyze_branch(inst) {
            NotABranch => {
                panic!(
                    "No branch information for {}",
--- a/lib/cretonne/src/verifier/mod.rs
+++ b/lib/cretonne/src/verifier/mod.rs
@@ -688,7 +688,7 @@ impl<'a> Verifier<'a> {
    }

    fn typecheck_variable_args(&self, inst: Inst) -> Result {
-        match self.func.dfg[inst].analyze_branch(&self.func.dfg.value_lists) {
+        match self.func.dfg.analyze_branch(inst) {
            BranchInfo::SingleDest(ebb, _) => {
                let iter = self.func.dfg.ebb_params(ebb).iter().map(|&v| {
                    self.func.dfg.value_type(v)
@@ -803,7 +803,7 @@ impl<'a> Verifier<'a> {
                            slot
                        );
                    }
-                    if slot.offset != offset {
+                    if slot.offset != Some(offset) {
                        return err!(
                            inst,
                            "Outgoing stack argument {} should have offset {}: {} = {}",
--- a/lib/cretonne/src/write.rs
+++ b/lib/cretonne/src/write.rs
@@ -477,29 +477,29 @@ mod tests {
        f.name = ExternalName::testcase("foo");
        assert_eq!(f.to_string(), "function %foo() native {\n}\n");

-        f.create_stack_slot(StackSlotData::new(StackSlotKind::Local, 4));
+        f.create_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4));
        assert_eq!(
            f.to_string(),
-            "function %foo() native {\n    ss0 = local 4\n}\n"
+            "function %foo() native {\n    ss0 = explicit_slot 4\n}\n"
        );

        let ebb = f.dfg.make_ebb();
        f.layout.append_ebb(ebb);
        assert_eq!(
            f.to_string(),
-            "function %foo() native {\n    ss0 = local 4\n\nebb0:\n}\n"
+            "function %foo() native {\n    ss0 = explicit_slot 4\n\nebb0:\n}\n"
        );

        f.dfg.append_ebb_param(ebb, types::I8);
        assert_eq!(
            f.to_string(),
-            "function %foo() native {\n    ss0 = local 4\n\nebb0(v0: i8):\n}\n"
+            "function %foo() native {\n    ss0 = explicit_slot 4\n\nebb0(v0: i8):\n}\n"
        );

        f.dfg.append_ebb_param(ebb, types::F32.by(4).unwrap());
        assert_eq!(
            f.to_string(),
-            "function %foo() native {\n    ss0 = local 4\n\nebb0(v0: i8, v1: f32x4):\n}\n"
+            "function %foo() native {\n    ss0 = explicit_slot 4\n\nebb0(v0: i8, v1: f32x4):\n}\n"
        );
    }
 }