diff --git a/cranelift/Cargo.toml b/cranelift/Cargo.toml
index f2e4ea9607..335a69c808 100644
--- a/cranelift/Cargo.toml
+++ b/cranelift/Cargo.toml
@@ -47,4 +47,5 @@ walkdir = "2.2"
 [features]
 default = ["disas", "wasm", "cranelift-codegen/all-arch"]
 disas = ["capstone"]
+enable-peepmatic = ["cranelift-codegen/enable-peepmatic", "cranelift-filetests/enable-peepmatic"]
 wasm = ["wat", "cranelift-wasm"]
diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml
index 4737904eb3..0bc1c32006 100644
--- a/cranelift/codegen/Cargo.toml
+++ b/cranelift/codegen/Cargo.toml
@@ -24,7 +24,7 @@ gimli = { version = "0.20.0", default-features = false, features = ["write"], op
 smallvec = { version = "1.0.0" }
 thiserror = "1.0.4"
 byteorder = { version = "1.3.2", default-features = false }
-peepmatic-runtime = { path = "../peepmatic/crates/runtime" }
+peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true }
 regalloc = "0.0.23"
 # It is a goal of the cranelift-codegen crate to have minimal external dependencies.
 # Please don't add any unless they are essential to the task of creating binary
@@ -74,9 +74,12 @@ all-arch = [
 # For dependent crates that want to serialize some parts of cranelift
 enable-serde = ["serde"]
 
-# Recompile our optimizations that are written in the peepmatic DSL into a
+# Recompile our optimizations that are written in the `peepmatic` DSL into a
 # compact finite-state transducer automaton.
 rebuild-peephole-optimizers = ["peepmatic"]
 
+# Enable the use of `peepmatic`-generated peephole optimizers.
+enable-peepmatic = ["peepmatic-runtime"]
+
 [badges]
 maintenance = { status = "experimental" }
diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs
index 05c5583f5b..3483219fea 100644
--- a/cranelift/codegen/src/lib.rs
+++ b/cranelift/codegen/src/lib.rs
@@ -101,7 +101,6 @@ mod licm;
 mod nan_canonicalization;
 mod num_uses;
 mod partition_slice;
-mod peepmatic;
 mod postopt;
 mod predicates;
 mod redundant_reload_remover;
@@ -116,6 +115,9 @@ mod topo_order;
 mod unreachable_code;
 mod value_label;
 
+#[cfg(feature = "enable-peepmatic")]
+mod peepmatic;
+
 pub use crate::result::{CodegenError, CodegenResult};
 
 /// Version number of this crate.
diff --git a/cranelift/codegen/src/simple_preopt.rs b/cranelift/codegen/src/simple_preopt.rs
index a77c8cb19a..7413b01e90 100644
--- a/cranelift/codegen/src/simple_preopt.rs
+++ b/cranelift/codegen/src/simple_preopt.rs
@@ -15,7 +15,6 @@ use crate::ir::{
     Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Type, Value,
 };
 use crate::isa::TargetIsa;
-use crate::peepmatic::ValueOrInst;
 use crate::timing;
 
 #[inline]
@@ -182,8 +181,12 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
 
         // U32 div by 1: identity
         // U32 rem by 1: zero
-        DivRemByConstInfo::DivU32(_, 1) | DivRemByConstInfo::RemU32(_, 1) => {
-            unreachable!("unsigned division and remainder by one is handled in `preopt.peepmatic`");
+        DivRemByConstInfo::DivU32(n1, 1) | DivRemByConstInfo::RemU32(n1, 1) => {
+            if is_rem {
+                pos.func.dfg.replace(inst).iconst(I32, 0);
+            } else {
+                replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
+            }
         }
 
         // U32 div, rem by a power-of-2
@@ -198,10 +201,7 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
                 let mask = (1u64 << k) - 1;
                 pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
             } else {
-                unreachable!(
-                    "unsigned division by a power of two is handled in \
-                     `preopt.peepmatic`"
-                );
+                pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
             }
         }
 
@@ -251,8 +251,12 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
 
         // U64 div by 1: identity
         // U64 rem by 1: zero
-        DivRemByConstInfo::DivU64(_, 1) | DivRemByConstInfo::RemU64(_, 1) => {
-            unreachable!("unsigned division and remainder by one is handled in `preopt.peepmatic`");
+        DivRemByConstInfo::DivU64(n1, 1) | DivRemByConstInfo::RemU64(n1, 1) => {
+            if is_rem {
+                pos.func.dfg.replace(inst).iconst(I64, 0);
+            } else {
+                replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
+            }
         }
 
         // U64 div, rem by a power-of-2
@@ -267,9 +271,7 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
                 let mask = (1u64 << k) - 1;
                 pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
             } else {
-                unreachable!(
-                    "unsigned division by a power of two is handled in `preopt.peepmatic`"
-                );
+                pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
             }
         }
 
@@ -322,8 +324,12 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
 
         // S32 div by 1: identity
         // S32 rem by 1: zero
-        DivRemByConstInfo::DivS32(_, 1) | DivRemByConstInfo::RemS32(_, 1) => {
-            unreachable!("signed division and remainder by one is handled in `preopt.peepmatic`");
+        DivRemByConstInfo::DivS32(n1, 1) | DivRemByConstInfo::RemS32(n1, 1) => {
+            if is_rem {
+                pos.func.dfg.replace(inst).iconst(I32, 0);
+            } else {
+                replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
+            }
         }
 
         DivRemByConstInfo::DivS32(n1, d) | DivRemByConstInfo::RemS32(n1, d) => {
@@ -393,8 +399,12 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
 
         // S64 div by 1: identity
         // S64 rem by 1: zero
-        DivRemByConstInfo::DivS64(_, 1) | DivRemByConstInfo::RemS64(_, 1) => {
-            unreachable!("division and remaineder by one are handled in `preopt.peepmatic`");
+        DivRemByConstInfo::DivS64(n1, 1) | DivRemByConstInfo::RemS64(n1, 1) => {
+            if is_rem {
+                pos.func.dfg.replace(inst).iconst(I64, 0);
+            } else {
+                replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
+            }
         }
 
         DivRemByConstInfo::DivS64(n1, d) | DivRemByConstInfo::RemS64(n1, d) => {
@@ -598,6 +608,416 @@ fn branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, block: Block,
     cfg.recompute_block(pos.func, block);
 }
 
+#[cfg(feature = "enable-peepmatic")]
+mod simplify {
+    use super::*;
+    use crate::peepmatic::ValueOrInst;
+
+    pub type PeepholeOptimizer<'a, 'b> =
+        peepmatic_runtime::optimizer::PeepholeOptimizer<'static, 'a, &'b dyn TargetIsa>;
+
+    pub fn peephole_optimizer<'a, 'b>(isa: &'b dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
+        crate::peepmatic::preopt(isa)
+    }
+
+    pub fn apply_all<'a, 'b>(
+        optimizer: &mut PeepholeOptimizer<'a, 'b>,
+        pos: &mut FuncCursor<'a>,
+        inst: Inst,
+        _native_word_width: u32,
+    ) {
+        // After we apply one optimization, that might make another
+        // optimization applicable. Keep running the peephole optimizer
+        // until either:
+        //
+        // * No optimization applied, and therefore it doesn't make sense to
+        //   try again, because no optimization will apply again.
+        //
+        // * Or when we replaced an instruction with an alias to an existing
+        //   value, because we already ran the peephole optimizer over the
+        //   aliased value's instruction in an early part of the traversal
+        //   over the function.
+        while let Some(ValueOrInst::Inst(new_inst)) =
+            optimizer.apply_one(pos, ValueOrInst::Inst(inst))
+        {
+            // We transplanted a new instruction into the current
+            // instruction, so the "new" instruction is actually the same
+            // one, just with different data.
+            debug_assert_eq!(new_inst, inst);
+        }
+        debug_assert_eq!(pos.current_inst(), Some(inst));
+    }
+}
+
+#[cfg(not(feature = "enable-peepmatic"))]
+mod simplify {
+    use super::*;
+    use crate::ir::{
+        dfg::ValueDef,
+        immediates,
+        instructions::{Opcode, ValueList},
+        types::{I16, I32, I8},
+    };
+    use std::marker::PhantomData;
+
+    pub struct PeepholeOptimizer<'a, 'b> {
+        phantom: PhantomData<(&'a (), &'b ())>,
+    }
+
+    pub fn peephole_optimizer<'a, 'b>(_: &dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
+        PeepholeOptimizer {
+            phantom: PhantomData,
+        }
+    }
+
+    pub fn apply_all<'a, 'b>(
+        _optimizer: &mut PeepholeOptimizer<'a, 'b>,
+        pos: &mut FuncCursor<'a>,
+        inst: Inst,
+        native_word_width: u32,
+    ) {
+        simplify(pos, inst, native_word_width);
+        branch_opt(pos, inst);
+    }
+
+    #[inline]
+    fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64> {
+        if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) {
+            if let InstructionData::UnaryImm {
+                opcode: Opcode::Iconst,
+                imm,
+            } = dfg[candidate_inst]
+            {
+                return Some(imm);
+            }
+        }
+        None
+    }
+
+    /// Try to transform [(x << N) >> N] into a (un)signed-extending move.
+    /// Returns true if the final instruction has been converted to such a move.
+    fn try_fold_extended_move(
+        pos: &mut FuncCursor,
+        inst: Inst,
+        opcode: Opcode,
+        arg: Value,
+        imm: immediates::Imm64,
+    ) -> bool {
+        if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
+            if let InstructionData::BinaryImm {
+                opcode: Opcode::IshlImm,
+                arg: prev_arg,
+                imm: prev_imm,
+            } = &pos.func.dfg[arg_inst]
+            {
+                if imm != *prev_imm {
+                    return false;
+                }
+
+                let dest_ty = pos.func.dfg.ctrl_typevar(inst);
+                if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
+                    return false;
+                }
+
+                let imm_bits: i64 = imm.into();
+                let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) {
+                    8 => I8,
+                    16 => I16,
+                    32 => I32,
+                    _ => return false,
+                };
+                let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
+
+                // This becomes a no-op, since ireduce_ty has a smaller lane width than
+                // the argument type (also the destination type).
+                let arg = *prev_arg;
+                let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
+
+                if opcode == Opcode::UshrImm {
+                    pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
+                } else {
+                    pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
+                }
+                return true;
+            }
+        }
+        false
+    }
+
+    /// Apply basic simplifications.
+    ///
+    /// This folds constants with arithmetic to form `_imm` instructions, and other minor
+    /// simplifications.
+    ///
+    /// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
+    /// controlling type's width of the instruction. This would result in an illegal instruction that
+    /// would likely be expanded back into an instruction on smaller types with the same initial
+    /// opcode, creating unnecessary churn.
+    fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
+        match pos.func.dfg[inst] {
+            InstructionData::Binary { opcode, args } => {
+                if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
+                    let new_opcode = match opcode {
+                        Opcode::Iadd => Opcode::IaddImm,
+                        Opcode::Imul => Opcode::ImulImm,
+                        Opcode::Sdiv => Opcode::SdivImm,
+                        Opcode::Udiv => Opcode::UdivImm,
+                        Opcode::Srem => Opcode::SremImm,
+                        Opcode::Urem => Opcode::UremImm,
+                        Opcode::Band => Opcode::BandImm,
+                        Opcode::Bor => Opcode::BorImm,
+                        Opcode::Bxor => Opcode::BxorImm,
+                        Opcode::Rotl => Opcode::RotlImm,
+                        Opcode::Rotr => Opcode::RotrImm,
+                        Opcode::Ishl => Opcode::IshlImm,
+                        Opcode::Ushr => Opcode::UshrImm,
+                        Opcode::Sshr => Opcode::SshrImm,
+                        Opcode::Isub => {
+                            imm = imm.wrapping_neg();
+                            Opcode::IaddImm
+                        }
+                        Opcode::Ifcmp => Opcode::IfcmpImm,
+                        _ => return,
+                    };
+                    let ty = pos.func.dfg.ctrl_typevar(inst);
+                    if ty.bytes() <= native_word_width {
+                        pos.func
+                            .dfg
+                            .replace(inst)
+                            .BinaryImm(new_opcode, ty, imm, args[0]);
+
+                        // Repeat for BinaryImm simplification.
+                        simplify(pos, inst, native_word_width);
+                    }
+                } else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
+                    let new_opcode = match opcode {
+                        Opcode::Iadd => Opcode::IaddImm,
+                        Opcode::Imul => Opcode::ImulImm,
+                        Opcode::Band => Opcode::BandImm,
+                        Opcode::Bor => Opcode::BorImm,
+                        Opcode::Bxor => Opcode::BxorImm,
+                        Opcode::Isub => Opcode::IrsubImm,
+                        _ => return,
+                    };
+                    let ty = pos.func.dfg.ctrl_typevar(inst);
+                    if ty.bytes() <= native_word_width {
+                        pos.func
+                            .dfg
+                            .replace(inst)
+                            .BinaryImm(new_opcode, ty, imm, args[1]);
+                    }
+                }
+            }
+
+            InstructionData::Unary { opcode, arg } => {
+                if let Opcode::AdjustSpDown = opcode {
+                    if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) {
+                        // Note this works for both positive and negative immediate values.
+                        pos.func.dfg.replace(inst).adjust_sp_down_imm(imm);
+                    }
+                }
+            }
+
+            InstructionData::BinaryImm { opcode, arg, imm } => {
+                let ty = pos.func.dfg.ctrl_typevar(inst);
+
+                let mut arg = arg;
+                let mut imm = imm;
+                match opcode {
+                    Opcode::IaddImm
+                    | Opcode::ImulImm
+                    | Opcode::BorImm
+                    | Opcode::BandImm
+                    | Opcode::BxorImm => {
+                        // Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
+                        if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
+                            if let InstructionData::BinaryImm {
+                                opcode: prev_opcode,
+                                arg: prev_arg,
+                                imm: prev_imm,
+                            } = &pos.func.dfg[arg_inst]
+                            {
+                                if opcode == *prev_opcode
+                                    && ty == pos.func.dfg.ctrl_typevar(arg_inst)
+                                {
+                                    let lhs: i64 = imm.into();
+                                    let rhs: i64 = (*prev_imm).into();
+                                    let new_imm = match opcode {
+                                        Opcode::BorImm => lhs | rhs,
+                                        Opcode::BandImm => lhs & rhs,
+                                        Opcode::BxorImm => lhs ^ rhs,
+                                        Opcode::IaddImm => lhs.wrapping_add(rhs),
+                                        Opcode::ImulImm => lhs.wrapping_mul(rhs),
+                                        _ => panic!("can't happen"),
+                                    };
+                                    let new_imm = immediates::Imm64::from(new_imm);
+                                    let new_arg = *prev_arg;
+                                    pos.func
+                                        .dfg
+                                        .replace(inst)
+                                        .BinaryImm(opcode, ty, new_imm, new_arg);
+                                    imm = new_imm;
+                                    arg = new_arg;
+                                }
+                            }
+                        }
+                    }
+
+                    Opcode::UshrImm | Opcode::SshrImm => {
+                        if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
+                            && try_fold_extended_move(pos, inst, opcode, arg, imm)
+                        {
+                            return;
+                        }
+                    }
+
+                    _ => {}
+                };
+
+                // Replace operations that are no-ops.
+                match (opcode, imm.into()) {
+                    (Opcode::IaddImm, 0)
+                    | (Opcode::ImulImm, 1)
+                    | (Opcode::SdivImm, 1)
+                    | (Opcode::UdivImm, 1)
+                    | (Opcode::BorImm, 0)
+                    | (Opcode::BandImm, -1)
+                    | (Opcode::BxorImm, 0)
+                    | (Opcode::RotlImm, 0)
+                    | (Opcode::RotrImm, 0)
+                    | (Opcode::IshlImm, 0)
+                    | (Opcode::UshrImm, 0)
+                    | (Opcode::SshrImm, 0) => {
+                        // Alias the result value with the original argument.
+                        replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
+                    }
+                    (Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => {
+                        // Replace by zero.
+                        pos.func.dfg.replace(inst).iconst(ty, 0);
+                    }
+                    (Opcode::BorImm, -1) => {
+                        // Replace by minus one.
+                        pos.func.dfg.replace(inst).iconst(ty, -1);
+                    }
+                    _ => {}
+                }
+            }
+
+            InstructionData::IntCompare { opcode, cond, args } => {
+                debug_assert_eq!(opcode, Opcode::Icmp);
+                if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
+                    if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
+                        pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
+                    }
+                }
+            }
+
+            InstructionData::CondTrap { .. }
+            | InstructionData::Branch { .. }
+            | InstructionData::Ternary {
+                opcode: Opcode::Select,
+                ..
+            } => {
+                // Fold away a redundant `bint`.
+                let condition_def = {
+                    let args = pos.func.dfg.inst_args(inst);
+                    pos.func.dfg.value_def(args[0])
+                };
+                if let ValueDef::Result(def_inst, _) = condition_def {
+                    if let InstructionData::Unary {
+                        opcode: Opcode::Bint,
+                        arg: bool_val,
+                    } = pos.func.dfg[def_inst]
+                    {
+                        let args = pos.func.dfg.inst_args_mut(inst);
+                        args[0] = bool_val;
+                    }
+                }
+            }
+
+            _ => {}
+        }
+    }
+
+    struct BranchOptInfo {
+        br_inst: Inst,
+        cmp_arg: Value,
+        args: ValueList,
+        new_opcode: Opcode,
+    }
+
+    /// Fold comparisons into branch operations when possible.
+    ///
+    /// This matches against operations which compare against zero, then use the
+    /// result in a `brz` or `brnz` branch. It folds those two operations into a
+    /// single `brz` or `brnz`.
+    fn branch_opt(pos: &mut FuncCursor, inst: Inst) {
+        let mut info = if let InstructionData::Branch {
+            opcode: br_opcode,
+            args: ref br_args,
+            ..
+        } = pos.func.dfg[inst]
+        {
+            let first_arg = {
+                let args = pos.func.dfg.inst_args(inst);
+                args[0]
+            };
+
+            let icmp_inst =
+                if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) {
+                    icmp_inst
+                } else {
+                    return;
+                };
+
+            if let InstructionData::IntCompareImm {
+                opcode: Opcode::IcmpImm,
+                arg: cmp_arg,
+                cond: cmp_cond,
+                imm: cmp_imm,
+            } = pos.func.dfg[icmp_inst]
+            {
+                let cmp_imm: i64 = cmp_imm.into();
+                if cmp_imm != 0 {
+                    return;
+                }
+
+                // icmp_imm returns non-zero when the comparison is true. So, if
+                // we're branching on zero, we need to invert the condition.
+                let cond = match br_opcode {
+                    Opcode::Brz => cmp_cond.inverse(),
+                    Opcode::Brnz => cmp_cond,
+                    _ => return,
+                };
+
+                let new_opcode = match cond {
+                    IntCC::Equal => Opcode::Brz,
+                    IntCC::NotEqual => Opcode::Brnz,
+                    _ => return,
+                };
+
+                BranchOptInfo {
+                    br_inst: inst,
+                    cmp_arg,
+                    args: br_args.clone(),
+                    new_opcode,
+                }
+            } else {
+                return;
+            }
+        } else {
+            return;
+        };
+
+        info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg;
+        if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] {
+            *opcode = info.new_opcode;
+        } else {
+            panic!();
+        }
+    }
+}
+
 /// The main pre-opt pass.
 pub fn do_preopt<'func, 'isa>(
     func: &'func mut Function,
@@ -607,30 +1027,12 @@ pub fn do_preopt<'func, 'isa>(
     let _tt = timing::preopt();
 
     let mut pos = FuncCursor::new(func);
-    let mut preopt = crate::peepmatic::preopt(isa);
+    let native_word_width = isa.pointer_bytes() as u32;
+    let mut optimizer = simplify::peephole_optimizer(isa);
 
     while let Some(block) = pos.next_block() {
         while let Some(inst) = pos.next_inst() {
-            // After we apply one optimization, that might make another
-            // optimization applicable. Keep running the peephole optimizer
-            // until either:
-            //
-            // * No optimization applied, and therefore it doesn't make sense to
-            //   try again, because no optimization will apply again.
-            //
-            // * Or when we replaced an instruction with an alias to an existing
-            //   value, because we already ran the peephole optimizer over the
-            //   aliased value's instruction in an early part of the traversal
-            //   over the function.
-            while let Some(ValueOrInst::Inst(new_inst)) =
-                preopt.apply_one(&mut pos, ValueOrInst::Inst(inst))
-            {
-                // We transplanted a new instruction into the current
-                // instruction, so the "new" instruction is actually the same
-                // one, just with different data.
-                debug_assert_eq!(new_inst, inst);
-            }
-            debug_assert_eq!(pos.current_inst(), Some(inst));
+            simplify::apply_all(&mut optimizer, &mut pos, inst, native_word_width);
 
             // Try to transform divide-by-constant into simpler operations.
             if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) {
diff --git a/cranelift/filetests/Cargo.toml b/cranelift/filetests/Cargo.toml
index 481401cf8a..705c31fc61 100644
--- a/cranelift/filetests/Cargo.toml
+++ b/cranelift/filetests/Cargo.toml
@@ -26,3 +26,6 @@ num_cpus = "1.8.0"
 region = "2.1.2"
 target-lexicon = "0.10"
 thiserror = "1.0.15"
+
+[features]
+enable-peepmatic = []
diff --git a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif
index e59226c7de..dcf6c77e9a 100644
--- a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif
+++ b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif
@@ -6,9 +6,9 @@ function u0:0(i8) -> i8 fast {
 block0(v0: i8):
     v1 = iconst.i8 0
     v2 = isub v1, v0
-    ; check:  v4 = uextend.i32 v0
-    ; nextln: v6 = iconst.i32 0
-    ; nextln: v5 = isub v6, v4
-    ; nextln: v2 = ireduce.i8 v5
+    ; check:  uextend.i32
+    ; nextln: iconst.i32
+    ; nextln: isub
+    ; nextln: ireduce.i8
     return v2
 }
diff --git a/cranelift/filetests/filetests/peepmatic/branch.clif b/cranelift/filetests/filetests/peepmatic/branch.clif
new file mode 100644
index 0000000000..0f68bbe9cb
--- /dev/null
+++ b/cranelift/filetests/filetests/peepmatic/branch.clif
@@ -0,0 +1,81 @@
+test peepmatic
+target x86_64
+
+function %icmp_to_brz_fold(i32) -> i32 {
+block0(v0: i32):
+    v1 = icmp_imm eq v0, 0
+    brnz v1, block1
+    jump block2
+block1:
+    v3 = iconst.i32 1
+    return v3
+block2:
+    v4 = iconst.i32 2
+    return v4
+}
+; sameln: function %icmp_to_brz_fold
+; nextln: block0(v0: i32):
+; nextln:     v1 = icmp_imm eq v0, 0
+; nextln:     brnz v0, block2
+; nextln:     jump block1
+; nextln: 
+; nextln: block1:
+; nextln:     v3 = iconst.i32 1
+; nextln:     return v3
+; nextln: 
+; nextln: block2:
+; nextln:     v4 = iconst.i32 2
+; nextln:     return v4
+; nextln: }
+
+function %icmp_to_brz_inverted_fold(i32) -> i32 {
+block0(v0: i32):
+    v1 = icmp_imm ne v0, 0
+    brz v1, block1
+    jump block2
+block1:
+    v3 = iconst.i32 1
+    return v3
+block2:
+    v4 = iconst.i32 2
+    return v4
+}
+; sameln: function %icmp_to_brz_inve
+; nextln: block0(v0: i32):
+; nextln:     v1 = icmp_imm ne v0, 0
+; nextln:     brnz v0, block2
+; nextln:     jump block1
+; nextln: 
+; nextln: block1:
+; nextln:     v3 = iconst.i32 1
+; nextln:     return v3
+; nextln: 
+; nextln: block2:
+; nextln:     v4 = iconst.i32 2
+; nextln:     return v4
+; nextln: }
+
+function %br_icmp_inversion(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    br_icmp ugt v0, v1, block1
+    jump block2
+block1:
+    v2 = iconst.i32 1
+    return v2
+block2:
+    v3 = iconst.i32 2
+    return v3
+}
+; sameln: function %br_icmp_inversio
+; nextln: block0(v0: i32, v1: i32):
+; nextln:     br_icmp ule v0, v1, block2
+; nextln:     jump block1
+; nextln: 
+; nextln: block1:
+; nextln:     v2 = iconst.i32 1
+; nextln:     return v2
+; nextln: 
+; nextln: block2:
+; nextln:     v3 = iconst.i32 2
+; nextln:     return v3
+; nextln: }
diff --git a/cranelift/filetests/filetests/peepmatic/div_by_const_indirect.clif b/cranelift/filetests/filetests/peepmatic/div_by_const_indirect.clif
new file mode 100644
index 0000000000..ba65b2418c
--- /dev/null
+++ b/cranelift/filetests/filetests/peepmatic/div_by_const_indirect.clif
@@ -0,0 +1,55 @@
+test peepmatic
+target x86_64 baseline
+
+; Cases where the denominator is created by an iconst
+
+function %indir_udiv32(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 7
+    v2 = udiv v0, v1
+    ; check:  v4 = iconst.i32 0x2492_4925
+    ; nextln: v5 = umulhi v0, v4
+    ; nextln: v6 = isub v0, v5
+    ; nextln: v7 = ushr_imm v6, 1
+    ; nextln: v8 = iadd v7, v5
+    ; nextln: v9 = ushr_imm v8, 2
+    ; nextln: v2 -> v9
+    return v2
+}
+
+function %indir_sdiv32(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 -17
+    v2 = sdiv v0, v1
+    ; check:  v4 = iconst.i32 0xffff_ffff_8787_8787
+    ; nextln: v5 = smulhi v0, v4
+    ; nextln: v6 = sshr_imm v5, 3
+    ; nextln: v7 = ushr_imm v6, 31
+    ; nextln: v8 = iadd v6, v7
+    ; nextln: v2 -> v8
+    return v2
+}
+
+function %indir_udiv64(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 1337
+    v2 = udiv v0, v1
+    ; check:  v4 = iconst.i64 0xc411_9d95_2866_a139
+    ; nextln: v5 = umulhi v0, v4
+    ; nextln: v6 = ushr_imm v5, 10
+    ; nextln: v2 -> v6
+    return v2
+}
+
+function %indir_sdiv64(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 -90210
+    v2 = sdiv v0, v1
+    ; check:  v4 = iconst.i64 0xd181_4ee8_939c_b8bb
+    ; nextln: v5 = smulhi v0, v4
+    ; nextln: v6 = sshr_imm v5, 14
+    ; nextln: v7 = ushr_imm v6, 63
+    ; nextln: v8 = iadd v6, v7
+    ; nextln: v2 -> v8
+    return v2
+}
diff --git a/cranelift/filetests/filetests/peepmatic/div_by_const_non_power_of_2.clif b/cranelift/filetests/filetests/peepmatic/div_by_const_non_power_of_2.clif
new file mode 100644
index 0000000000..0759f92ca9
--- /dev/null
+++ b/cranelift/filetests/filetests/peepmatic/div_by_const_non_power_of_2.clif
@@ -0,0 +1,266 @@
+test peepmatic
+target i686 baseline
+
+; -------- U32 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_udiv32_p7(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 7
+    ; check: iconst.i32 0x2492_4925
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: v7 = ushr_imm v6, 2
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_udiv32_p125(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 125
+    ; check: iconst.i32 0x1062_4dd3
+    ; check: umulhi v0, v2
+    ; check: v4 = ushr_imm v3, 3
+    ; check: v1 -> v4
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_udiv32_p641(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 641
+    ; check: iconst.i32 0x0066_3d81
+    ; check: v3 = umulhi v0, v2
+    ; check: v1 -> v3
+    return v1
+}
+
+
+; -------- S32 --------
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_sdiv32_n6(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -6
+    ; check: iconst.i32 0xffff_ffff_d555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: v5 = iadd v3, v4
+    ; check: v1 -> v5
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv32_n5(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -5
+    ; check: iconst.i32 0xffff_ffff_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 31
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_sdiv32_n3(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -3
+    ; check: iconst.i32 0x5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 31
+    ; check: v7 = iadd v5, v6
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_sdiv32_p6(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 6
+    ; check: iconst.i32 0x2aaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: v5 = iadd v3, v4
+    ; check: v1 -> v5
+    return v1
+}
+
+; case d > 0 && M < 0 (mull, add, shift, add-sign-bit)
+function %t_sdiv32_p7(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 7
+    ; check: iconst.i32 0xffff_ffff_9249_2493
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 2
+    ; check: ushr_imm v5, 31
+    ; check: v7 = iadd v5, v6
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv32_p625(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 625
+    ; check: iconst.i32 0x68db_8bad
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 8
+    ; check: ushr_imm v4, 31
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
+    return v1
+}
+
+
+; -------- U64 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_udiv64_p7(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 7
+    ; check: iconst.i64 0x2492_4924_9249_2493
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: v7 = ushr_imm v6, 2
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_udiv64_p9(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 9
+    ; check: iconst.i64 0xe38e_38e3_8e38_e38f
+    ; check: umulhi v0, v2
+    ; check: v4 = ushr_imm v3, 3
+    ; check: v1 -> v4
+    return v1
+}
+
+; complex case (mul, sub, shift, add, shift)
+function %t_udiv64_p125(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 125
+    ; check: iconst.i64 0x0624_dd2f_1a9f_be77
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: v7 = ushr_imm v6, 6
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_udiv64_p274177(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 274177
+    ; check: iconst.i64 0x3d30_f19c_d101
+    ; check: v3 = umulhi v0, v2
+    ; check: v1 -> v3
+    return v1
+}
+
+
+; -------- S64 --------
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv64_n625(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -625
+    ; check: iconst.i64 0xcb92_3a29_c779_a6b5
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_sdiv64_n6(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -6
+    ; check: iconst.i64 0xd555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: v5 = iadd v3, v4
+    ; check: v1 -> v5
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_sdiv64_n5(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -5
+    ; check: iconst.i64 0x9999_9999_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 63
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_sdiv64_n3(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -3
+    ; check: iconst.i64 0x5555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 63
+    ; check: v7 = iadd v5, v6
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_sdiv64_p6(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 6
+    ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: v5 = iadd v3, v4
+    ; check: v1 -> v5
+    return v1
+}
+
+; case d > 0 && M < 0 (mul, add, shift, add-sign-bit)
+function %t_sdiv64_p15(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 15
+    ; check: iconst.i64 0x8888_8888_8888_8889
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 3
+    ; check: ushr_imm v5, 63
+    ; check: v7 = iadd v5, v6
+    ; check: v1 -> v7
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_sdiv64_p625(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 625
+    ; check: iconst.i64 0x346d_c5d6_3886_594b
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
+    return v1
+}
diff --git a/cranelift/filetests/filetests/peepmatic/div_by_const_power_of_2.clif b/cranelift/filetests/filetests/peepmatic/div_by_const_power_of_2.clif
new file mode 100644
index 0000000000..a2110a5a75
--- /dev/null
+++ b/cranelift/filetests/filetests/peepmatic/div_by_const_power_of_2.clif
@@ -0,0 +1,292 @@
+test peepmatic
+target i686 baseline
+
+; -------- U32 --------
+
+; ignored
+function %t_udiv32_p0(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 0
+    ; check: udiv_imm v0, 0
+    return v1
+}
+
+; converted to a nop
+function %t_udiv32_p1(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 1
+    ; check: nop
+    return v1
+}
+
+; shift
+function %t_udiv32_p2(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 2
+    ; check: ushr_imm v0, 1
+    return v1
+}
+
+; shift
+function %t_udiv32_p2p31(i32) -> i32 {
+block0(v0: i32):
+    v1 = udiv_imm v0, 0x8000_0000
+    ; check: ushr_imm v0, 31
+    return v1
+}
+
+
+; -------- U64 --------
+
+; ignored
+function %t_udiv64_p0(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 0
+    ; check: udiv_imm v0, 0
+    return v1
+}
+
+; converted to a nop
+function %t_udiv64_p1(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 1
+    ; check: nop
+    return v1
+}
+
+; shift
+function %t_udiv64_p2(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 2
+    ; check: ushr_imm v0, 1
+    return v1
+}
+
+; shift
+function %t_udiv64_p2p63(i64) -> i64 {
+block0(v0: i64):
+    v1 = udiv_imm v0, 0x8000_0000_0000_0000
+    ; check: ushr_imm v0, 63
+    return v1
+}
+
+
+; -------- S32 --------
+
+; ignored
+function %t_sdiv32_p0(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 0
+    ; check: sdiv_imm v0, 0
+    return v1
+}
+
+; converted to a nop
+function %t_sdiv32_p1(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 1
+    ; check: nop
+    return v1
+}
+
+; ignored
+function %t_sdiv32_n1(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -1
+    ; check: sdiv_imm v0, -1
+    return v1
+}
+
+; shift
+function %t_sdiv32_p2(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: v1 -> v4
+    return v1
+}
+
+; shift
+function %t_sdiv32_n2(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: irsub_imm v4, 0
+    return v1
+}
+
+; shift
+function %t_sdiv32_p4(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 4
+    ; check: v2 = sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: v5 = sshr_imm v4, 2
+    ; check: v1 -> v5
+
+    return v1
+}
+
+; shift
+function %t_sdiv32_n4(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 2
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; shift
+function %t_sdiv32_p2p30(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, 0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: v5 = sshr_imm v4, 30
+    ; check: v1 -> v5
+    return v1
+}
+
+; shift
+function %t_sdiv32_n2p30(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 30
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000) isn't
+; representable.
+function %t_sdiv32_n2p31(i32) -> i32 {
+block0(v0: i32):
+    v1 = sdiv_imm v0, -0x8000_0000
+    ; check: sshr_imm v0, 30
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 31
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+
+; -------- S64 --------
+
+; ignored
+function %t_sdiv64_p0(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 0
+    ; check: sdiv_imm v0, 0
+    return v1
+}
+
+; converted to a nop
+function %t_sdiv64_p1(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 1
+    ; check: nop
+    return v1
+}
+
+; ignored
+function %t_sdiv64_n1(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -1
+    ; check: sdiv_imm v0, -1
+    return v1
+}
+
+; shift
+function %t_sdiv64_p2(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: v4 = sshr_imm v3, 1
+    ; check: v1 -> v4
+    return v1
+}
+
+; shift
+function %t_sdiv64_n2(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: irsub_imm v4, 0
+    return v1
+}
+
+; shift
+function %t_sdiv64_p4(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: v5 = sshr_imm v4, 2
+    ; check: v1 -> v5
+    return v1
+}
+
+; shift
+function %t_sdiv64_n4(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 2
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; shift
+function %t_sdiv64_p2p62(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, 0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: v5 = sshr_imm v4, 62
+    ; check: v1 -> v5
+    return v1
+}
+
+; shift
+function %t_sdiv64_n2p62(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 62
+    ; check: irsub_imm v5, 0
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't
+; representable.
+function %t_sdiv64_n2p63(i64) -> i64 {
+block0(v0: i64):
+    v1 = sdiv_imm v0, -0x8000_0000_0000_0000
+    ; check: sshr_imm v0, 62
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: sshr_imm v4, 63
+    ; check: irsub_imm v5, 0
+    return v1
+}
diff --git a/cranelift/filetests/filetests/peepmatic/do_not_keep_applying_optimizations_after_replacing_with_an_alias.clif b/cranelift/filetests/filetests/peepmatic/do_not_keep_applying_optimizations_after_replacing_with_an_alias.clif
index ceefd5bd1c..cc24167267 100644
--- a/cranelift/filetests/filetests/peepmatic/do_not_keep_applying_optimizations_after_replacing_with_an_alias.clif
+++ b/cranelift/filetests/filetests/peepmatic/do_not_keep_applying_optimizations_after_replacing_with_an_alias.clif
@@ -1,4 +1,4 @@
-test simple_preopt
+test peepmatic
 target x86_64
 
 ;; This file used to trigger assertions where we would keep trying to
diff --git a/cranelift/filetests/filetests/peepmatic/do_not_reorder_instructions_when_transplanting.clif b/cranelift/filetests/filetests/peepmatic/do_not_reorder_instructions_when_transplanting.clif
new file mode 100644
index 0000000000..7fc95f0fdb
--- /dev/null
+++ b/cranelift/filetests/filetests/peepmatic/do_not_reorder_instructions_when_transplanting.clif
@@ -0,0 +1,22 @@
+test peepmatic
+target x86_64
+
+;; Test that although v5 can be replaced with v1, we don't transplant `load.i32
+;; v0` on top of `iadd v3, v4`, because that would move the load past other uses
+;; of its result.
+
+function %foo(i64) -> i32 {
+block0(v0: i64):
+    v1 = load.i32 v0
+    v2 = iconst.i32 16
+    v3 = iadd_imm v1, -16
+    v4 = iconst.i32 16
+    v5 = iadd v3, v4
+    ; check:  v1 = load.i32 v0
+    ; nextln: v5 -> v1
+    ; nextln: v2 = iconst.i32 16
+    ; nextln: v3 = iadd_imm v1, -16
+    ; nextln: v4 = iconst.i32 16
+    ; nextln: nop
+    return v5
+}
diff --git a/cranelift/filetests/filetests/peepmatic/fold-extended-move-wraparound.clif b/cranelift/filetests/filetests/peepmatic/fold-extended-move-wraparound.clif
new file mode 100644
index 0000000000..e48b91a4b1
--- /dev/null
+++ b/cranelift/filetests/filetests/peepmatic/fold-extended-move-wraparound.clif
@@ -0,0 +1,14 @@
+test peepmatic
+target x86_64
+
+function %wraparound(i64 vmctx) -> f32 system_v {
+    gv0 = vmctx
+    gv1 = iadd_imm.i64 gv0, 48
+
+block35(v0: i64):
+    v88 = iconst.i64 0
+    v89 = iconst.i64 0x8000_0000_0000_0000
+    v90 = ishl_imm v88, 0x8000_0000_0000_0000
+    v91 = sshr v90, v89; check: sshr_imm v90, 0x8000_0000_0000_0000
+    trap user0
+}
diff --git a/cranelift/filetests/filetests/peepmatic/rem_by_const_non_power_of_2.clif b/cranelift/filetests/filetests/peepmatic/rem_by_const_non_power_of_2.clif
new file mode 100644
index 0000000000..7df5baf4e3
--- /dev/null
+++ b/cranelift/filetests/filetests/peepmatic/rem_by_const_non_power_of_2.clif
@@ -0,0 +1,285 @@
+test peepmatic
+target i686 baseline
+
+; -------- U32 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_urem32_p7(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 7
+    ; check: iconst.i32 0x2492_4925
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 2
+    ; check: imul_imm v7, 7
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_urem32_p125(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 125
+    ; check: iconst.i32 0x1062_4dd3
+    ; check: umulhi v0, v2
+    ; check: ushr_imm v3, 3
+    ; check: imul_imm v4, 125
+    ; check: isub v0, v5
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_urem32_p641(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 641
+    ; check: iconst.i32 0x0066_3d81
+    ; check: umulhi v0, v2
+    ; check: imul_imm v3, 641
+    ; check: isub v0, v4
+    return v1
+}
+
+
+; -------- S32 --------
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_srem32_n6(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -6
+    ; check: iconst.i32 0xffff_ffff_d555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, -6
+    ; check: isub v0, v6
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem32_n5(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -5
+    ; check: iconst.i32 0xffff_ffff_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 31
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, -5
+    ; check: isub v0, v7
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_srem32_n3(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -3
+    ; check: iconst.i32 0x5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, -3
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case w/ shift by zero (mul, add-sign-bit)
+function %t_srem32_p6(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 6
+    ; check: iconst.i32 0x2aaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 31
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, 6
+    ; check: isub v0, v6
+    return v1
+}
+
+; case d > 0 && M < 0 (mull, add, shift, add-sign-bit)
+function %t_srem32_p7(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 7
+    ; check: iconst.i32 0xffff_ffff_9249_2493
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 2
+    ; check: ushr_imm v5, 31
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, 7
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem32_p625(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 625
+    ; check: iconst.i32 0x68db_8bad
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 8
+    ; check: ushr_imm v4, 31
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, 625
+    ; check: isub v0, v7
+    return v1
+}
+
+
+; -------- U64 --------
+
+; complex case (mul, sub, shift, add, shift)
+function %t_urem64_p7(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 7
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 2
+    ; check: imul_imm v7, 7
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift)
+function %t_urem64_p9(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 9
+    ; check: iconst.i64 0xe38e_38e3_8e38_e38f
+    ; check: umulhi v0, v2
+    ; check: ushr_imm v3, 3
+    ; check: imul_imm v4, 9
+    ; check: isub v0, v5
+    return v1
+}
+
+; complex case (mul, sub, shift, add, shift)
+function %t_urem64_p125(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 125
+    ; check: iconst.i64 0x0624_dd2f_1a9f_be77
+    ; check: umulhi v0, v2
+    ; check: isub v0, v3
+    ; check: ushr_imm v4, 1
+    ; check: iadd v5, v3
+    ; check: ushr_imm v6, 6
+    ; check: imul_imm v7, 125
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case w/ shift by zero (mul)
+function %t_urem64_p274177(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 274177
+    ; check: iconst.i64 0x3d30_f19c_d101
+    ; check: umulhi v0, v2
+    ; check: imul_imm v3, 0x0004_2f01
+    ; check: isub v0, v4
+    return v1
+}
+
+
+; -------- S64 --------
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem64_n625(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -625
+    ; check: iconst.i64 0xcb92_3a29_c779_a6b5
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, -625
+    ; check: isub v0, v7
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_srem64_n6(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -6
+    ; check: iconst.i64 0xd555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, -6
+    ; check: isub v0, v6
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_srem64_n5(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -5
+    ; check: iconst.i64 0x9999_9999_9999_9999
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 1
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, -5
+    ; check: isub v0, v7
+    return v1
+}
+
+; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit)
+function %t_srem64_n3(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -3
+    ; check: iconst.i64 0x5555_5555_5555_5555
+    ; check: smulhi v0, v2
+    ; check: isub v3, v0
+    ; check: sshr_imm v4, 1
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, -3
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case w/ zero shift (mul, add-sign-bit)
+function %t_srem64_p6(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 6
+    ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab
+    ; check: smulhi v0, v2
+    ; check: ushr_imm v3, 63
+    ; check: iadd v3, v4
+    ; check: imul_imm v5, 6
+    ; check: isub v0, v6
+    return v1
+}
+
+; case d > 0 && M < 0 (mul, add, shift, add-sign-bit)
+function %t_srem64_p15(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 15
+    ; check: iconst.i64 0x8888_8888_8888_8889
+    ; check: smulhi v0, v2
+    ; check: iadd v3, v0
+    ; check: sshr_imm v4, 3
+    ; check: ushr_imm v5, 63
+    ; check: iadd v5, v6
+    ; check: imul_imm v7, 15
+    ; check: isub v0, v8
+    return v1
+}
+
+; simple case (mul, shift, add-sign-bit)
+function %t_srem64_p625(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 625
+    ; check: iconst.i64 0x346d_c5d6_3886_594b
+    ; check: smulhi v0, v2
+    ; check: sshr_imm v3, 7
+    ; check: ushr_imm v4, 63
+    ; check: iadd v4, v5
+    ; check: imul_imm v6, 625
+    ; check: isub v0, v7
+    return v1
+}
diff --git a/cranelift/filetests/filetests/peepmatic/rem_by_const_power_of_2.clif b/cranelift/filetests/filetests/peepmatic/rem_by_const_power_of_2.clif
new file mode 100644
index 0000000000..c795b73c19
--- /dev/null
+++ b/cranelift/filetests/filetests/peepmatic/rem_by_const_power_of_2.clif
@@ -0,0 +1,291 @@
+test peepmatic
+target i686 baseline
+
+; -------- U32 --------
+
+; ignored
+function %t_urem32_p0(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 0
+    ; check: urem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_urem32_p1(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 1
+    ; check: iconst.i32 0
+    return v1
+}
+
+; shift
+function %t_urem32_p2(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 2
+    ; check: band_imm v0, 1
+    return v1
+}
+
+; shift
+function %t_urem32_p2p31(i32) -> i32 {
+block0(v0: i32):
+    v1 = urem_imm v0, 0x8000_0000
+    ; check: band_imm v0, 0x7fff_ffff
+    return v1
+}
+
+
+; -------- U64 --------
+
+; ignored
+function %t_urem64_p0(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 0
+    ; check: urem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_urem64_p1(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 1
+    ; check: iconst.i64 0
+    return v1
+}
+
+; shift
+function %t_urem64_p2(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 2
+    ; check: band_imm v0, 1
+   return v1
+}
+
+; shift
+function %t_urem64_p2p63(i64) -> i64 {
+block0(v0: i64):
+    v1 = urem_imm v0, 0x8000_0000_0000_0000
+    ; check: band_imm v0, 0x7fff_ffff_ffff_ffff
+    return v1
+}
+
+
+; -------- S32 --------
+
+; ignored
+function %t_srem32_n1(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -1
+    ; check: srem_imm v0, -1
+    return v1
+}
+
+; ignored
+function %t_srem32_p0(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 0
+    ; check: srem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_srem32_p1(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 1
+    ; check: iconst.i32 0
+    return v1
+}
+
+; shift
+function %t_srem32_p2(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem32_n2(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -2
+    ; check: ushr_imm v0, 31
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem32_p4(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem32_n4(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 30
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem32_p2p30(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, 0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xffff_ffff_c000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem32_n2p30(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -0x4000_0000
+    ; check: sshr_imm v0, 29
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xffff_ffff_c000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000) isn't
+; representable.
+function %t_srem32_n2p31(i32) -> i32 {
+block0(v0: i32):
+    v1 = srem_imm v0, -0x8000_0000
+    ; check: sshr_imm v0, 30
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xffff_ffff_8000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+
+; -------- S64 --------
+
+; ignored
+function %t_srem64_n1(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -1
+    ; check: srem_imm v0, -1
+    return v1
+}
+
+; ignored
+function %t_srem64_p0(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 0
+    ; check: srem_imm v0, 0
+    return v1
+}
+
+; converted to constant zero
+function %t_srem64_p1(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 1
+    ; check: iconst.i64 0
+    return v1
+}
+
+; shift
+function %t_srem64_p2(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem64_n2(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -2
+    ; check: ushr_imm v0, 63
+    ; check: iadd v0, v2
+    ; check: band_imm v3, -2
+    ; check: isub v0, v4
+    return v1
+}
+
+; shift
+function %t_srem64_p4(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem64_n4(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -4
+    ; check: sshr_imm v0, 1
+    ; check: ushr_imm v2, 62
+    ; check: iadd v0, v3
+    ; check: band_imm v4, -4
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem64_p2p62(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, 0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xc000_0000_0000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; shift
+function %t_srem64_n2p62(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -0x4000_0000_0000_0000
+    ; check: sshr_imm v0, 61
+    ; check: ushr_imm v2, 2
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0xc000_0000_0000_0000
+    ; check: isub v0, v5
+    return v1
+}
+
+; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't
+; representable.
+function %t_srem64_n2p63(i64) -> i64 {
+block0(v0: i64):
+    v1 = srem_imm v0, -0x8000_0000_0000_0000
+    ; check: sshr_imm v0, 62
+    ; check: ushr_imm v2, 1
+    ; check: iadd v0, v3
+    ; check: band_imm v4, 0x8000_0000_0000_0000
+    ; check: isub v0, v5
+   return v1
+}
diff --git a/cranelift/filetests/filetests/peepmatic/replace_branching_instructions_and_cfg_predecessors.clif b/cranelift/filetests/filetests/peepmatic/replace_branching_instructions_and_cfg_predecessors.clif
new file mode 100644
index 0000000000..17ca472b7e
--- /dev/null
+++ b/cranelift/filetests/filetests/peepmatic/replace_branching_instructions_and_cfg_predecessors.clif
@@ -0,0 +1,22 @@
+test peepmatic
+target x86_64
+
+function u0:2(i64 , i64) {
+    gv1 = load.i64 notrap aligned gv0
+    heap0 = static gv1
+    block0(v0: i64, v1: i64):
+        v16 = iconst.i32 6
+        v17 = heap_addr.i64 heap0, v16, 1
+        v18 = load.i32 v17
+        v19 = iconst.i32 4
+        v20 = icmp ne v18, v19
+        v21 = bint.i32 v20
+        brnz v21, block2
+        jump block4
+    block4:
+        jump block1
+    block2:
+        jump block1
+    block1:
+        return
+}
diff --git a/cranelift/filetests/filetests/peepmatic/simplify32.clif b/cranelift/filetests/filetests/peepmatic/simplify32.clif
new file mode 100644
index 0000000000..b1c6786a05
--- /dev/null
+++ b/cranelift/filetests/filetests/peepmatic/simplify32.clif
@@ -0,0 +1,60 @@
+test peepmatic
+target i686
+
+;; 32-bits platforms.
+
+function %iadd_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = iadd v0, v1
+    return v2
+}
+; sameln: function %iadd_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, 2
+; nextln:     return v2
+; nextln: }
+
+function %isub_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = isub v0, v1
+    return v2
+}
+; sameln: function %isub_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, -2
+; nextln:     return v2
+; nextln: }
+
+function %icmp_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = icmp slt v0, v1
+    v3 = bint.i32 v2
+    return v3
+}
+; sameln: function %icmp_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = icmp_imm slt v0, 2
+; nextln:     v3 = bint.i32 v2
+; nextln:     return v3
+; nextln: }
+
+;; Don't simplify operations that would get illegal because of lack of native
+;; support.
+function %iadd_imm(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 2
+    v2 = iadd v0, v1
+    return v2
+}
+; sameln: function %iadd_imm
+; nextln: block0(v0: i64):
+; nextln:     v1 = iconst.i64 2
+; nextln:     v2 = iadd v0, v1
+; nextln:     return v2
+; nextln: }
diff --git a/cranelift/filetests/filetests/peepmatic/simplify64.clif b/cranelift/filetests/filetests/peepmatic/simplify64.clif
new file mode 100644
index 0000000000..93c289ccdd
--- /dev/null
+++ b/cranelift/filetests/filetests/peepmatic/simplify64.clif
@@ -0,0 +1,326 @@
+test peepmatic
+target x86_64
+
+;; 64-bits platforms.
+
+function %iadd_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = iadd v0, v1
+    return v2
+}
+; sameln: function %iadd_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, 2
+; nextln:     return v2
+; nextln: }
+
+function %isub_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = isub v0, v1
+    return v2
+}
+; sameln: function %isub_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, -2
+; nextln:     return v2
+; nextln: }
+
+function %icmp_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = icmp slt v0, v1
+    v3 = bint.i32 v2
+    return v3
+}
+; sameln: function %icmp_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = icmp_imm slt v0, 2
+; nextln:     v3 = bint.i32 v2
+; nextln:     return v3
+; nextln: }
+
+function %ifcmp_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = ifcmp v0, v1
+    brif eq v2, block1
+    jump block2
+
+block1:
+    v3 = iconst.i32 1
+    return v3
+
+block2:
+    v4 = iconst.i32 2
+    return v4
+}
+; sameln: function %ifcmp_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = ifcmp_imm v0, 2
+; nextln:     brif eq v2, block1
+; nextln:     jump block2
+; nextln: 
+; nextln: block1:
+; nextln:     v3 = iconst.i32 1
+; nextln:     return v3
+; nextln: 
+; nextln: block2:
+; nextln:     v4 = iconst.i32 2
+; nextln:     return v4
+; nextln: }
+
+function %brz_bint(i32) {
+block0(v0: i32):
+    v3 = icmp_imm slt v0, 0
+    v1 = bint.i32 v3
+    v2 = select v1, v1, v1
+    trapz v1, user0
+    brz v1, block1
+    jump block2
+
+block1:
+    return
+
+block2:
+    return
+}
+; sameln: function %brz_bint
+; nextln: (v0: i32):
+; nextln:    v3 = icmp_imm slt v0, 0
+; nextln:    v1 = bint.i32 v3
+; nextln:    v2 = select v3, v1, v1
+; nextln:    trapz v3, user0
+; nextln:    brnz v3, block2
+; nextln:    jump block1
+
+function %irsub_imm(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = isub v1, v0
+    return v2
+}
+; sameln: function %irsub_imm
+; nextln: block0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = irsub_imm v0, 2
+; nextln:     return v2
+; nextln: }
+
+;; Sign-extensions.
+
+;; 8 -> 16
+function %uextend_8_16() -> i16 {
+block0:
+    v0 = iconst.i16 37
+    v1 = ishl_imm v0, 8
+    v2 = ushr_imm v1, 8
+    return v2
+}
+; sameln: function %uextend_8_16
+; nextln: block0:
+; nextln:     v0 = iconst.i16 37
+; nextln:     v1 = ishl_imm v0, 8
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = uextend.i16 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_8_16() -> i16 {
+block0:
+    v0 = iconst.i16 37
+    v1 = ishl_imm v0, 8
+    v2 = sshr_imm v1, 8
+    return v2
+}
+; sameln: function %sextend_8_16
+; nextln: block0:
+; nextln:     v0 = iconst.i16 37
+; nextln:     v1 = ishl_imm v0, 8
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = sextend.i16 v3
+; nextln:     return v2
+; nextln: }
+
+;; 8 -> 32
+function %uextend_8_32() -> i32 {
+block0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 24
+    v2 = ushr_imm v1, 24
+    return v2
+}
+; sameln: function %uextend_8_32
+; nextln: block0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 24
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = uextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_8_32() -> i32 {
+block0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 24
+    v2 = sshr_imm v1, 24
+    return v2
+}
+; sameln: function %sextend_8_32
+; nextln: block0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 24
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = sextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+;; 16 -> 32
+function %uextend_16_32() -> i32 {
+block0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 16
+    v2 = ushr_imm v1, 16
+    return v2
+}
+; sameln: function %uextend_16_32
+; nextln: block0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 16
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = uextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_16_32() -> i32 {
+block0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 16
+    v2 = sshr_imm v1, 16
+    return v2
+}
+; sameln: function %sextend_16_32
+; nextln: block0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 16
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = sextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+;; 8 -> 64
+function %uextend_8_64() -> i64 {
+block0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 56
+    v2 = ushr_imm v1, 56
+    return v2
+}
+; sameln: function %uextend_8_64
+; nextln: block0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 56
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = uextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_8_64() -> i64 {
+block0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 56
+    v2 = sshr_imm v1, 56
+    return v2
+}
+; sameln: function %sextend_8_64
+; nextln: block0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 56
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = sextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+;; 16 -> 64
+function %uextend_16_64() -> i64 {
+block0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 48
+    v2 = ushr_imm v1, 48
+    return v2
+}
+; sameln: function %uextend_16_64
+; nextln: block0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 48
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = uextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_16_64() -> i64 {
+block0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 48
+    v2 = sshr_imm v1, 48
+    return v2
+}
+; sameln: function %sextend_16_64
+; nextln: block0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 48
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = sextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+;; 32 -> 64
+function %uextend_32_64() -> i64 {
+block0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 32
+    v2 = ushr_imm v1, 32
+    return v2
+}
+; sameln: function %uextend_32_64
+; nextln: block0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 32
+; nextln:     v3 = ireduce.i32 v0
+; nextln:     v2 = uextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_32_64() -> i64 {
+block0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 32
+    v2 = sshr_imm v1, 32
+    return v2
+}
+; sameln: function %sextend_32_64
+; nextln: block0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 32
+; nextln:     v3 = ireduce.i32 v0
+; nextln:     v2 = sextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %add_imm_fold(i32) -> i32 {
+block0(v0: i32):
+  v1 = iadd_imm v0, 42
+  v2 = iadd_imm v1, -42
+  return v2
+}
+; sameln: function %add_imm_fold(i32)
+; nextln: block0(v0: i32):
+; nextln:    v2 -> v0
+; nextln:    v1 = iadd_imm v0, 42
+; nextln:    nop
+; nextln:    return v2
diff --git a/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif
new file mode 100644
index 0000000000..3000369bb5
--- /dev/null
+++ b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif
@@ -0,0 +1,17 @@
+test peepmatic
+target x86_64
+
+;; The `isub` is a no-op, but we can't replace the whole `isub` instruction with
+;; its `v2` operand's instruction because `v2` is one of many results. Instead,
+;; we need to make an alias `v3 -> v2`.
+
+function %replace_inst_with_alias() -> i32 {
+block0:
+    v0 = iconst.i32 0
+    v1, v2 = x86_smulx v0, v0
+    v3 = isub v2, v0
+    ; check:  v0 = iconst.i32 0
+    ; nextln: v1, v2 = x86_smulx v0, v0
+    ; nextln: v3 -> v2
+    return v3
+}
diff --git a/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif b/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif
index 4a4b7a80b6..101e4eb201 100644
--- a/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif
+++ b/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif
@@ -7,13 +7,14 @@ function %indir_udiv32(i32) -> i32 {
 block0(v0: i32):
     v1 = iconst.i32 7
     v2 = udiv v0, v1
-    ; check:  v4 = iconst.i32 0x2492_4925
-    ; nextln: v5 = umulhi v0, v4
-    ; nextln: v6 = isub v0, v5
-    ; nextln: v7 = ushr_imm v6, 1
-    ; nextln: v8 = iadd v7, v5
-    ; nextln: v9 = ushr_imm v8, 2
-    ; nextln: v2 -> v9
+    ; check: iconst.i32 7
+    ; check: iconst.i32 0x2492_4925
+    ; check: umulhi v0, v3
+    ; check: isub v0, v4
+    ; check: ushr_imm v5, 1
+    ; check: iadd v6, v4
+    ; check: v8 = ushr_imm v7, 2
+    ; check: v2 -> v8
     return v2
 }
 
@@ -21,12 +22,13 @@ function %indir_sdiv32(i32) -> i32 {
 block0(v0: i32):
     v1 = iconst.i32 -17
     v2 = sdiv v0, v1
-    ; check:  v4 = iconst.i32 0xffff_ffff_8787_8787
-    ; nextln: v5 = smulhi v0, v4
-    ; nextln: v6 = sshr_imm v5, 3
-    ; nextln: v7 = ushr_imm v6, 31
-    ; nextln: v8 = iadd v6, v7
-    ; nextln: v2 -> v8
+    ; check: iconst.i32 -17
+    ; check: iconst.i32 0xffff_ffff_8787_8787
+    ; check: smulhi v0, v3
+    ; check: sshr_imm v4, 3
+    ; check: ushr_imm v5, 31
+    ; check: v7 = iadd v5, v6
+    ; check: v2 -> v7
     return v2
 }
 
@@ -34,10 +36,11 @@ function %indir_udiv64(i64) -> i64 {
 block0(v0: i64):
     v1 = iconst.i64 1337
     v2 = udiv v0, v1
-    ; check:  v4 = iconst.i64 0xc411_9d95_2866_a139
-    ; nextln: v5 = umulhi v0, v4
-    ; nextln: v6 = ushr_imm v5, 10
-    ; nextln: v2 -> v6
+    ; check: iconst.i64 1337
+    ; check: iconst.i64 0xc411_9d95_2866_a139
+    ; check: umulhi v0, v3
+    ; check: v5 = ushr_imm v4, 10
+    ; check: v2 -> v5
     return v2
 }
 
@@ -45,11 +48,12 @@ function %indir_sdiv64(i64) -> i64 {
 block0(v0: i64):
     v1 = iconst.i64 -90210
     v2 = sdiv v0, v1
-    ; check:  v4 = iconst.i64 0xd181_4ee8_939c_b8bb
-    ; nextln: v5 = smulhi v0, v4
-    ; nextln: v6 = sshr_imm v5, 14
-    ; nextln: v7 = ushr_imm v6, 63
-    ; nextln: v8 = iadd v6, v7
-    ; nextln: v2 -> v8
+    ; check: iconst.i64 0xffff_ffff_fffe_9f9e
+    ; check: iconst.i64 0xd181_4ee8_939c_b8bb
+    ; check: smulhi v0, v3
+    ; check: sshr_imm v4, 14
+    ; check: ushr_imm v5, 63
+    ; check: v7 = iadd v5, v6
+    ; check: v2 -> v7
     return v2
 }
diff --git a/cranelift/filetests/filetests/simple_preopt/simplify32.clif b/cranelift/filetests/filetests/simple_preopt/simplify32.clif
index cf238fb5ed..2582fd69aa 100644
--- a/cranelift/filetests/filetests/simple_preopt/simplify32.clif
+++ b/cranelift/filetests/filetests/simple_preopt/simplify32.clif
@@ -58,3 +58,4 @@ block0(v0: i64):
 ; nextln:     v2 = iadd v0, v1
 ; nextln:     return v2
 ; nextln: }
+
diff --git a/cranelift/filetests/filetests/simple_preopt/simplify64.clif b/cranelift/filetests/filetests/simple_preopt/simplify64.clif
index 6489c3bd1e..4ceabdc335 100644
--- a/cranelift/filetests/filetests/simple_preopt/simplify64.clif
+++ b/cranelift/filetests/filetests/simple_preopt/simplify64.clif
@@ -44,37 +44,6 @@ block0(v0: i32):
 ; nextln:     return v3
 ; nextln: }
 
-function %ifcmp_imm(i32) -> i32 {
-block0(v0: i32):
-    v1 = iconst.i32 2
-    v2 = ifcmp v0, v1
-    brif eq v2, block1
-    jump block2
-
-block1:
-    v3 = iconst.i32 1
-    return v3
-
-block2:
-    v4 = iconst.i32 2
-    return v4
-}
-; sameln: function %ifcmp_imm
-; nextln: block0(v0: i32):
-; nextln:     v1 = iconst.i32 2
-; nextln:     v2 = ifcmp_imm v0, 2
-; nextln:     brif eq v2, block1
-; nextln:     jump block2
-; nextln: 
-; nextln: block1:
-; nextln:     v3 = iconst.i32 1
-; nextln:     return v3
-; nextln: 
-; nextln: block2:
-; nextln:     v4 = iconst.i32 2
-; nextln:     return v4
-; nextln: }
-
 function %brz_bint(i32) {
 block0(v0: i32):
     v3 = icmp_imm slt v0, 0
diff --git a/cranelift/filetests/src/lib.rs b/cranelift/filetests/src/lib.rs
index bc1a6df1e2..5cf7331225 100644
--- a/cranelift/filetests/src/lib.rs
+++ b/cranelift/filetests/src/lib.rs
@@ -45,6 +45,7 @@ mod test_domtree;
 mod test_interpret;
 mod test_legalizer;
 mod test_licm;
+mod test_peepmatic;
 mod test_postopt;
 mod test_preopt;
 mod test_print_cfg;
@@ -128,6 +129,7 @@ fn new_subtest(parsed: &TestCommand) -> subtest::SubtestResult<Box<dyn subtest::
         "interpret" => test_interpret::subtest(parsed),
         "legalizer" => test_legalizer::subtest(parsed),
         "licm" => test_licm::subtest(parsed),
+        "peepmatic" => test_peepmatic::subtest(parsed),
         "postopt" => test_postopt::subtest(parsed),
         "preopt" => test_preopt::subtest(parsed),
         "print-cfg" => test_print_cfg::subtest(parsed),
diff --git a/cranelift/filetests/src/test_peepmatic.rs b/cranelift/filetests/src/test_peepmatic.rs
new file mode 100644
index 0000000000..fc701c7046
--- /dev/null
+++ b/cranelift/filetests/src/test_peepmatic.rs
@@ -0,0 +1,56 @@
+//! Test command for `peepmatic`-generated peephole optimizers.
+
+use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult};
+use cranelift_codegen;
+use cranelift_codegen::ir::Function;
+use cranelift_codegen::print_errors::pretty_error;
+use cranelift_reader::TestCommand;
+use std::borrow::Cow;
+
+struct TestPreopt;
+
+pub fn subtest(parsed: &TestCommand) -> SubtestResult<Box<dyn SubTest>> {
+    assert_eq!(parsed.command, "peepmatic");
+    if parsed.options.is_empty() {
+        Ok(Box::new(TestPreopt))
+    } else {
+        Err(format!("No options allowed on {}", parsed))
+    }
+}
+
+impl SubTest for TestPreopt {
+    fn name(&self) -> &'static str {
+        "peepmatic"
+    }
+
+    fn is_mutating(&self) -> bool {
+        true
+    }
+
+    fn needs_isa(&self) -> bool {
+        true
+    }
+
+    fn run(&self, func: Cow<Function>, context: &Context) -> SubtestResult<()> {
+        let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned());
+        let isa = context.isa.expect("preopt needs an ISA");
+
+        comp_ctx.compute_cfg();
+        comp_ctx
+            .preopt(isa)
+            .map_err(|e| pretty_error(&comp_ctx.func, context.isa, Into::into(e)))?;
+        let text = &comp_ctx.func.display(isa).to_string();
+        log::debug!("After peepmatic-based simple_preopt:\n{}", text);
+
+        // Only actually run the filecheck if peepmatic is enabled, because it
+        // can generate slightly different code (alias a result vs replace an
+        // instruction) than the non-peepmatic versions of peephole
+        // optimizations. Note that the non-`peepmatic` results can be tested
+        // with the `test simple_preopt` subtest.
+        if cfg!(feature = "enable-peepmatic") {
+            run_filecheck(&text, context)
+        } else {
+            Ok(())
+        }
+    }
+}
diff --git a/cranelift/filetests/src/test_simple_preopt.rs b/cranelift/filetests/src/test_simple_preopt.rs
index 1463d1c69a..f6cdec391f 100644
--- a/cranelift/filetests/src/test_simple_preopt.rs
+++ b/cranelift/filetests/src/test_simple_preopt.rs
@@ -39,6 +39,16 @@ impl SubTest for TestSimplePreopt {
             .map_err(|e| pretty_error(&comp_ctx.func, context.isa, Into::into(e)))?;
         let text = &comp_ctx.func.display(isa).to_string();
         log::debug!("After simple_preopt:\n{}", text);
-        run_filecheck(&text, context)
+
+        // Only actually run the filecheck if peepmatic is *not* enabled,
+        // because it can generate slightly different code (alias a result vs
+        // replace an instruction) than the non-peepmatic versions of peephole
+        // optimizations. Note that the `peepmatic`-based results can be tested
+        // with the `test peepmatic` subtest.
+        if cfg!(feature = "enable-peepmatic") {
+            Ok(())
+        } else {
+            run_filecheck(&text, context)
+        }
     }
 }