x64: Migrate {s,u}{div,rem} to ISLE (#6008)

* x64: Add precise-output tests for div traps This adds a suite of `*.clif` files which are intended to test the `avoid_div_traps=true` compilation of the `{s,u}{div,rem}` instructions. * x64: Remove conditional regalloc in `Div` instruction Move the 8-bit `Div` logic into a dedicated `Div8` instruction to avoid having conditionally-used registers with respect to regalloc. * x64: Migrate non-trapping, `udiv`/`urem` to ISLE * x64: Port checked `udiv` to ISLE * x64: Migrate urem entirely to ISLE * x64: Use `test` instead of `cmp` to compare-to-zero * x64: Port `sdiv` lowering to ISLE * x64: Port `srem` lowering to ISLE * Tidy up regalloc behavior and fix tests * Update docs and winch * Review comments * Reword again * More refactoring test fixes * More test fixes
2023-03-13 20:44:06 -05:00
parent 188f712025
commit 5c1b468648
52 changed files with 2178 additions and 835 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -957,13 +957,10 @@
            (result Reg (a64_sdiv $I64 valid_x64 y64)))
        result))

-;; Helper for extracting an immediate that's not 0 and not -1 from an imm64.
-(decl safe_divisor_from_imm64 (u64) Imm64)
-(extern extractor safe_divisor_from_imm64 safe_divisor_from_imm64)
-
 ;; Special case for `sdiv` where no checks are needed due to division by a
 ;; constant meaning the checks are always passed.
-(rule 1 (lower (has_type (fits_in_64 ty) (sdiv x (iconst (safe_divisor_from_imm64 y)))))
+(rule 1 (lower (has_type (fits_in_64 ty) (sdiv x (iconst imm))))
+      (if-let y (safe_divisor_from_imm64 ty imm))
      (a64_sdiv $I64 (put_in_reg_sext64 x) (imm ty (ImmExtend.Sign) y)))

 ;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
--- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs
@@ -392,13 +392,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
        writable_zero_reg()
    }

-    fn safe_divisor_from_imm64(&mut self, val: Imm64) -> Option<u64> {
-        match val.bits() {
-            0 | -1 => None,
-            n => Some(n as u64),
-        }
-    }
-
    fn shift_mask(&mut self, ty: Type) -> ImmLogic {
        debug_assert!(ty.lane_bits().is_power_of_two());

--- a/cranelift/codegen/src/isa/x64/inst.isle
+++ b/cranelift/codegen/src/isa/x64/inst.isle
@@ -58,14 +58,23 @@
            (dst WritableGpr))

       ;; Integer quotient and remainder: (div idiv) $rax $rdx (reg addr)
-       (Div (size OperandSize) ;; 1, 2, 4, or 8
-            (signed bool)
+       ;;
+       ;; Note that this isn't used for 8-bit division which has its own `Div8`
+       ;; instruction.
+       (Div (size OperandSize) ;; 2, 4, or 8
+            (sign DivSignedness)
            (divisor GprMem)
            (dividend_lo Gpr)
            (dividend_hi Gpr)
            (dst_quotient WritableGpr)
            (dst_remainder WritableGpr))

+       ;; Same as `Div`, but for 8-bits where the regalloc behavior is different
+       (Div8 (sign DivSignedness)
+             (divisor GprMem)
+             (dividend Gpr)
+             (dst WritableGpr))
+
       ;; The high (and low) bits of a (un)signed multiply: `RDX:RAX := RAX *
       ;; rhs`.
       (MulHi (size OperandSize)
@@ -75,19 +84,47 @@
              (dst_lo WritableGpr)
              (dst_hi WritableGpr))

-       ;; A synthetic sequence to implement the right inline checks for
-       ;; remainder and division, assuming the dividend is in %rax.
+       ;; A synthetic instruction sequence used as part of the lowering of the
+       ;; `srem` instruction which returns 0 if the divisor is -1 and
+       ;; otherwise executes an `idiv` instruction.
       ;;
-       ;; The generated code sequence is described in the emit's function match
-       ;; arm for this instruction.
-       (CheckedDivOrRemSeq (kind DivOrRemKind)
-                           (size OperandSize)
-                           (dividend_lo Gpr)
-                           (dividend_hi Gpr)
-                           (divisor Gpr)
-                           (dst_quotient WritableGpr)
-                           (dst_remainder WritableGpr)
-                           (tmp OptionWritableGpr))
+       ;; Note that this does not check for 0 as that's expected to be done
+       ;; separately. Also note that 8-bit types don't use this and use
+       ;; `CheckedSRemSeq8` instead.
+       (CheckedSRemSeq (size OperandSize)
+                       (dividend_lo Gpr)
+                       (dividend_hi Gpr)
+                       (divisor Gpr)
+                       (dst_quotient WritableGpr)
+                       (dst_remainder WritableGpr))
+
+       ;; Same as above but for 8-bit types.
+       (CheckedSRemSeq8 (dividend Gpr)
+                        (divisor Gpr)
+                        (dst WritableGpr))
+
+       ;; Validates that the `divisor` can be safely divided into the
+       ;; `dividend`.
+       ;;
+       ;; This is a separate pseudo-instruction because it has some jumps in
+       ;; ways that can't be modeled otherwise with instructions right now. This
+       ;; will trap if the `divisor` is zero or if it's -1 and `dividend` is
+       ;; INT_MIN for the associated type.
+       ;;
+       ;; Note that 64-bit types must use `ValidateSdivDivisor64`.
+       (ValidateSdivDivisor (size OperandSize)
+                            (dividend Gpr)
+                            (divisor Gpr))
+
+       ;; Same as `ValidateSdivDivisor` but for 64-bit types.
+       ;;
+       ;; This is a distinct instruction because the emission in `emit.rs`
+       ;; requires a temporary register to load an immediate into, hence the
+       ;; `tmp` field in this instruction not present in the non-64-bit one.
+       (ValidateSdivDivisor64 (dividend Gpr)
+                              (divisor Gpr)
+                              (tmp WritableGpr))
+

       ;; Do a sign-extend based on the sign of the value in rax into rdx: (cwd
       ;; cdq cqo) or al into ah: (cbw)
@@ -628,6 +665,10 @@
            Size32
            Size64))

+(type DivSignedness
+      (enum Signed
+            Unsigned))
+
 (type FenceKind extern
      (enum MFence
            LFence
@@ -690,12 +731,6 @@
            Tzcnt
            Popcnt))

-(type DivOrRemKind extern
-      (enum SignedDiv
-            UnsignedDiv
-            SignedRem
-            UnsignedRem))
-
 (type SseOpcode extern
      (enum Addps
            Addpd
@@ -4521,15 +4556,70 @@

 ;;;; Division/Remainders ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(decl emit_div_or_rem (DivOrRemKind Type WritableGpr Gpr Gpr) Unit)
-(extern constructor emit_div_or_rem emit_div_or_rem)
+;; Helper for creating `CheckedSRemSeq` instructions.
+(decl x64_checked_srem_seq (OperandSize Gpr Gpr Gpr) ValueRegs)
+(rule (x64_checked_srem_seq size dividend_lo dividend_hi divisor)
+      (let ((dst_quotient WritableGpr (temp_writable_gpr))
+            (dst_remainder WritableGpr (temp_writable_gpr))
+            (_ Unit (emit (MInst.CheckedSRemSeq size dividend_lo dividend_hi divisor dst_quotient dst_remainder))))
+        (value_regs dst_quotient dst_remainder)))

-(decl div_or_rem (DivOrRemKind Value Value) Gpr)
-(rule (div_or_rem kind a @ (value_type ty) b)
+(decl x64_checked_srem_seq8 (Gpr Gpr) Gpr)
+(rule (x64_checked_srem_seq8 dividend divisor)
      (let ((dst WritableGpr (temp_writable_gpr))
-            (_ Unit (emit_div_or_rem kind ty dst a b)))
+            (_ Unit (emit (MInst.CheckedSRemSeq8 dividend divisor dst))))
        dst))

+;; Helper for creating `Div8` instructions
+(decl x64_div8 (Gpr GprMem DivSignedness) Gpr)
+(rule (x64_div8 dividend divisor sign)
+      (let ((dst WritableGpr (temp_writable_gpr))
+            (_ Unit (emit (MInst.Div8 sign divisor dividend dst))))
+        dst))
+
+;; Helper for creating `Div` instructions
+;;
+;; Two registers are returned through `ValueRegs` where the first is the
+;; quotient and the second is the remainder.
+(decl x64_div (Gpr Gpr GprMem OperandSize DivSignedness) ValueRegs)
+(rule (x64_div dividend_lo dividend_hi divisor size sign)
+      (let ((dst_quotient WritableGpr (temp_writable_gpr))
+            (dst_remainder WritableGpr (temp_writable_gpr))
+            (_ Unit (emit (MInst.Div size sign divisor dividend_lo dividend_hi dst_quotient dst_remainder))))
+        (value_regs dst_quotient dst_remainder)))
+
+;; Helper for `Div`, returning the quotient and discarding the remainder.
+(decl x64_div_quotient (Gpr Gpr GprMem OperandSize DivSignedness) ValueRegs)
+(rule (x64_div_quotient dividend_lo dividend_hi divisor size sign)
+      (value_regs_get (x64_div dividend_lo dividend_hi divisor size sign) 0))
+
+;; Helper for `Div`, returning the remainder and discarding the quotient.
+(decl x64_div_remainder (Gpr Gpr GprMem OperandSize DivSignedness) ValueRegs)
+(rule (x64_div_remainder dividend_lo dividend_hi divisor size sign)
+      (value_regs_get (x64_div dividend_lo dividend_hi divisor size sign) 1))
+
+;; Helper for creating `SignExtendData` instructions
+(decl x64_sign_extend_data (Gpr OperandSize) Gpr)
+(rule (x64_sign_extend_data src size)
+      (let ((dst WritableGpr (temp_writable_gpr))
+            (_ Unit (emit (MInst.SignExtendData size src dst))))
+        dst))
+
+;; Helper for creating `ValidateSdivDivisor` instructions.
+(decl validate_sdiv_divisor (OperandSize Gpr Gpr) Gpr)
+(rule (validate_sdiv_divisor size dividend divisor)
+      (let ((_ Unit (emit (MInst.ValidateSdivDivisor size dividend divisor))))
+        divisor))
+
+;; Helper for creating `ValidateSdivDivisor64` instructions.
+(decl validate_sdiv_divisor64 (Gpr Gpr) Gpr)
+(rule (validate_sdiv_divisor64 dividend divisor)
+      (let (
+          (tmp WritableGpr (temp_writable_gpr))
+          (_ Unit (emit (MInst.ValidateSdivDivisor64 dividend divisor tmp)))
+        )
+        divisor))
+
 ;;;; Pinned Register ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (decl read_pinned_gpr () Gpr)
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -12,6 +12,8 @@ use smallvec::{smallvec, SmallVec};
 use std::fmt;
 use std::string::String;

+pub use crate::isa::x64::lower::isle::generated_code::DivSignedness;
+
 /// An extenstion trait for converting `Writable{Xmm,Gpr}` to `Writable<Reg>`.
 pub trait ToWritableReg {
    /// Convert `Writable{Xmm,Gpr}` to `Writable<Reg>`.
@@ -1878,35 +1880,6 @@ impl fmt::Display for ShiftKind {
    }
 }

-/// What kind of division or remainder instruction this is?
-#[derive(Clone, Eq, PartialEq)]
-pub enum DivOrRemKind {
-    /// Signed division.
-    SignedDiv,
-    /// Unsigned division.
-    UnsignedDiv,
-    /// Signed remainder.
-    SignedRem,
-    /// Unsigned remainder.
-    UnsignedRem,
-}
-
-impl DivOrRemKind {
-    pub(crate) fn is_signed(&self) -> bool {
-        match self {
-            DivOrRemKind::SignedDiv | DivOrRemKind::SignedRem => true,
-            _ => false,
-        }
-    }
-
-    pub(crate) fn is_div(&self) -> bool {
-        match self {
-            DivOrRemKind::SignedDiv | DivOrRemKind::UnsignedDiv => true,
-            _ => false,
-        }
-    }
-}
-
 /// These indicate condition code tests.  Not all are represented since not all are useful in
 /// compiler-generated code.
 #[derive(Copy, Clone)]
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -399,25 +399,36 @@ pub(crate) fn emit(
            emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_src, rex_flags)
        }

-        Inst::Div {
-            size,
-            signed,
-            dividend_lo,
-            dividend_hi,
-            divisor,
-            dst_quotient,
-            dst_remainder,
-        } => {
-            let dividend_lo = allocs.next(dividend_lo.to_reg());
-            let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg());
-            debug_assert_eq!(dividend_lo, regs::rax());
-            debug_assert_eq!(dst_quotient, regs::rax());
-            if size.to_bits() > 8 {
-                let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg());
-                debug_assert_eq!(dst_remainder, regs::rdx());
-                let dividend_hi = allocs.next(dividend_hi.to_reg());
-                debug_assert_eq!(dividend_hi, regs::rdx());
-            }
+        Inst::Div { sign, divisor, .. } | Inst::Div8 { sign, divisor, .. } => {
+            let divisor = divisor.clone().to_reg_mem().with_allocs(allocs);
+            let size = match inst {
+                Inst::Div {
+                    size,
+                    dividend_lo,
+                    dividend_hi,
+                    dst_quotient,
+                    dst_remainder,
+                    ..
+                } => {
+                    let dividend_lo = allocs.next(dividend_lo.to_reg());
+                    let dividend_hi = allocs.next(dividend_hi.to_reg());
+                    let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg());
+                    let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg());
+                    debug_assert_eq!(dividend_lo, regs::rax());
+                    debug_assert_eq!(dividend_hi, regs::rdx());
+                    debug_assert_eq!(dst_quotient, regs::rax());
+                    debug_assert_eq!(dst_remainder, regs::rdx());
+                    *size
+                }
+                Inst::Div8 { dividend, dst, .. } => {
+                    let dividend = allocs.next(dividend.to_reg());
+                    let dst = allocs.next(dst.to_reg().to_reg());
+                    debug_assert_eq!(dividend, regs::rax());
+                    debug_assert_eq!(dst, regs::rax());
+                    OperandSize::Size8
+                }
+                _ => unreachable!(),
+            };

            let (opcode, prefix) = match size {
                OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
@@ -428,10 +439,12 @@ pub(crate) fn emit(

            sink.add_trap(TrapCode::IntegerDivisionByZero);

-            let subopcode = if *signed { 7 } else { 6 };
-            match divisor.clone().to_reg_mem() {
+            let subopcode = match sign {
+                DivSignedness::Signed => 7,
+                DivSignedness::Unsigned => 6,
+            };
+            match divisor {
                RegMem::Reg { reg } => {
-                    let reg = allocs.next(reg);
                    let src = int_reg_enc(reg);
                    emit_std_enc_enc(
                        sink,
@@ -440,11 +453,11 @@ pub(crate) fn emit(
                        1,
                        subopcode,
                        src,
-                        RexFlags::from((*size, reg)),
+                        RexFlags::from((size, reg)),
                    )
                }
                RegMem::Mem { addr: src } => {
-                    let amode = src.finalize(state, sink).with_allocs(allocs);
+                    let amode = src.finalize(state, sink);
                    emit_std_enc_mem(
                        sink,
                        prefix,
@@ -452,7 +465,7 @@ pub(crate) fn emit(
                        1,
                        subopcode,
                        &amode,
-                        RexFlags::from(*size),
+                        RexFlags::from(size),
                        0,
                    );
                }
@@ -522,164 +535,149 @@ pub(crate) fn emit(
            }
        }

-        Inst::CheckedDivOrRemSeq {
-            kind,
-            size,
-            dividend_lo,
-            dividend_hi,
-            divisor,
-            tmp,
-            dst_quotient,
-            dst_remainder,
-        } => {
-            let dividend_lo = allocs.next(dividend_lo.to_reg());
-            let dividend_hi = allocs.next(dividend_hi.to_reg());
+        Inst::CheckedSRemSeq { divisor, .. } | Inst::CheckedSRemSeq8 { divisor, .. } => {
            let divisor = allocs.next(divisor.to_reg());
-            let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg());
-            let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg());
-            let tmp = tmp.map(|tmp| allocs.next(tmp.to_reg().to_reg()));
-            debug_assert_eq!(dividend_lo, regs::rax());
-            debug_assert_eq!(dividend_hi, regs::rdx());
-            debug_assert_eq!(dst_quotient, regs::rax());
-            debug_assert_eq!(dst_remainder, regs::rdx());
+
+            // Validate that the register constraints of the dividend and the
+            // destination are all as expected.
+            let (dst, size) = match inst {
+                Inst::CheckedSRemSeq {
+                    dividend_lo,
+                    dividend_hi,
+                    dst_quotient,
+                    dst_remainder,
+                    size,
+                    ..
+                } => {
+                    let dividend_lo = allocs.next(dividend_lo.to_reg());
+                    let dividend_hi = allocs.next(dividend_hi.to_reg());
+                    let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg());
+                    let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg());
+                    debug_assert_eq!(dividend_lo, regs::rax());
+                    debug_assert_eq!(dividend_hi, regs::rdx());
+                    debug_assert_eq!(dst_quotient, regs::rax());
+                    debug_assert_eq!(dst_remainder, regs::rdx());
+                    (regs::rdx(), *size)
+                }
+                Inst::CheckedSRemSeq8 { dividend, dst, .. } => {
+                    let dividend = allocs.next(dividend.to_reg());
+                    let dst = allocs.next(dst.to_reg().to_reg());
+                    debug_assert_eq!(dividend, regs::rax());
+                    debug_assert_eq!(dst, regs::rax());
+                    (regs::rax(), OperandSize::Size8)
+                }
+                _ => unreachable!(),
+            };

            // Generates the following code sequence:
            //
-            // ;; check divide by zero:
-            // cmp 0 %divisor
-            // jnz $after_trap
-            // ud2
-            // $after_trap:
-            //
-            // ;; for signed modulo/div:
            // cmp -1 %divisor
            // jnz $do_op
-            // ;;   for signed modulo, result is 0
-            //    mov #0, %rdx
-            //    j $done
-            // ;;   for signed div, check for integer overflow against INT_MIN of the right size
-            // cmp INT_MIN, %rax
-            // jnz $do_op
-            // ud2
+            //
+            // ;; for srem, result is 0
+            // mov #0, %dst
+            // j $done
            //
            // $do_op:
-            // ;; if signed
-            //     cdq ;; sign-extend from rax into rdx
-            // ;; else
-            //     mov #0, %rdx
            // idiv %divisor
            //
            // $done:

-            // Check if the divisor is zero, first.
-            let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), divisor);
+            let do_op = sink.get_label();
+            let done_label = sink.get_label();
+
+            // Check if the divisor is -1, and if it isn't then immediately
+            // go to the `idiv`.
+            let inst = Inst::cmp_rmi_r(size, RegMemImm::imm(0xffffffff), divisor);
+            inst.emit(&[], sink, info, state);
+            one_way_jmp(sink, CC::NZ, do_op);
+
+            // ... otherwise the divisor is -1 and the result is always 0. This
+            // is written to the destination register which will be %rax for
+            // 8-bit srem and %rdx otherwise.
+            //
+            // Note that for 16-to-64-bit srem operations this leaves the
+            // second destination, %rax, unchanged. This isn't semantically
+            // correct if a lowering actually tries to use the `dst_quotient`
+            // output but for srem only the `dst_remainder` output is used for
+            // now.
+            let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(dst));
+            inst.emit(&[], sink, info, state);
+            let inst = Inst::jmp_known(done_label);
            inst.emit(&[], sink, info, state);

+            // Here the `idiv` is executed, which is different depending on the
+            // size
+            sink.bind_label(do_op);
+            let inst = match size {
+                OperandSize::Size8 => Inst::div8(
+                    DivSignedness::Signed,
+                    RegMem::reg(divisor),
+                    Gpr::new(regs::rax()).unwrap(),
+                    Writable::from_reg(Gpr::new(regs::rax()).unwrap()),
+                ),
+                _ => Inst::div(
+                    size,
+                    DivSignedness::Signed,
+                    RegMem::reg(divisor),
+                    Gpr::new(regs::rax()).unwrap(),
+                    Gpr::new(regs::rdx()).unwrap(),
+                    Writable::from_reg(Gpr::new(regs::rax()).unwrap()),
+                    Writable::from_reg(Gpr::new(regs::rdx()).unwrap()),
+                ),
+            };
+            inst.emit(&[], sink, info, state);
+
+            sink.bind_label(done_label);
+        }
+
+        Inst::ValidateSdivDivisor {
+            dividend, divisor, ..
+        }
+        | Inst::ValidateSdivDivisor64 {
+            dividend, divisor, ..
+        } => {
+            let orig_inst = &inst;
+            let divisor = allocs.next(divisor.to_reg());
+            let dividend = allocs.next(dividend.to_reg());
+            let size = match inst {
+                Inst::ValidateSdivDivisor { size, .. } => *size,
+                _ => OperandSize::Size64,
+            };
+
+            // First trap if the divisor is zero
+            let inst = Inst::cmp_rmi_r(size, RegMemImm::imm(0), divisor);
+            inst.emit(&[], sink, info, state);
            let inst = Inst::trap_if(CC::Z, TrapCode::IntegerDivisionByZero);
            inst.emit(&[], sink, info, state);

-            let (do_op, done_label) = if kind.is_signed() {
-                // Now check if the divisor is -1.
-                let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), divisor);
-                inst.emit(&[], sink, info, state);
-                let do_op = sink.get_label();
-
-                // If not equal, jump to do-op.
-                one_way_jmp(sink, CC::NZ, do_op);
-
-                // Here, divisor == -1.
-                if !kind.is_div() {
-                    // x % -1 = 0; put the result into the destination, $rax.
-                    let done_label = sink.get_label();
-
-                    let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rax()));
+            // Now check if the divisor is -1. If it is then additionally
+            // check if the dividend is INT_MIN. If it isn't then jump to the
+            // end. If both conditions here are true then trap.
+            let inst = Inst::cmp_rmi_r(size, RegMemImm::imm(0xffffffff), divisor);
+            inst.emit(&[], sink, info, state);
+            let done = sink.get_label();
+            one_way_jmp(sink, CC::NZ, done);
+            let int_min = match orig_inst {
+                Inst::ValidateSdivDivisor64 { tmp, .. } => {
+                    let tmp = allocs.next(tmp.to_reg().to_reg());
+                    let inst = Inst::imm(size, i64::MIN as u64, Writable::from_reg(tmp));
                    inst.emit(&[], sink, info, state);
-
-                    let inst = Inst::jmp_known(done_label);
-                    inst.emit(&[], sink, info, state);
-
-                    (Some(do_op), Some(done_label))
-                } else {
-                    // Check for integer overflow.
-                    if *size == OperandSize::Size64 {
-                        let tmp = tmp.expect("temporary for i64 sdiv");
-
-                        let inst = Inst::imm(
-                            OperandSize::Size64,
-                            0x8000000000000000,
-                            Writable::from_reg(tmp),
-                        );
-                        inst.emit(&[], sink, info, state);
-
-                        let inst =
-                            Inst::cmp_rmi_r(OperandSize::Size64, RegMemImm::reg(tmp), regs::rax());
-                        inst.emit(&[], sink, info, state);
-                    } else {
-                        let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax());
-                        inst.emit(&[], sink, info, state);
-                    }
-
-                    // If not equal, jump over the trap.
-                    let inst = Inst::trap_if(CC::Z, TrapCode::IntegerOverflow);
-                    inst.emit(&[], sink, info, state);
-
-                    (Some(do_op), None)
+                    RegMemImm::reg(tmp)
                }
-            } else {
-                (None, None)
+                _ => RegMemImm::imm(match size {
+                    OperandSize::Size8 => 0x80,
+                    OperandSize::Size16 => 0x8000,
+                    OperandSize::Size32 => 0x80000000,
+                    OperandSize::Size64 => unreachable!(),
+                }),
            };
-
-            if let Some(do_op) = do_op {
-                sink.bind_label(do_op);
-            }
-
-            let dividend_lo = Gpr::new(regs::rax()).unwrap();
-            let dst_quotient = WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap());
-            let (dividend_hi, dst_remainder) = if *size == OperandSize::Size8 {
-                (
-                    Gpr::new(regs::rax()).unwrap(),
-                    Writable::from_reg(Gpr::new(regs::rax()).unwrap()),
-                )
-            } else {
-                (
-                    Gpr::new(regs::rdx()).unwrap(),
-                    Writable::from_reg(Gpr::new(regs::rdx()).unwrap()),
-                )
-            };
-
-            // Fill in the high parts:
-            if kind.is_signed() {
-                // sign-extend the sign-bit of rax into rdx, for signed opcodes.
-                let inst =
-                    Inst::sign_extend_data(*size, dividend_lo, WritableGpr::from_reg(dividend_hi));
-                inst.emit(&[], sink, info, state);
-            } else if *size != OperandSize::Size8 {
-                // zero for unsigned opcodes.
-                let inst = Inst::imm(
-                    OperandSize::Size64,
-                    0,
-                    Writable::from_reg(dividend_hi.to_reg()),
-                );
-                inst.emit(&[], sink, info, state);
-            }
-
-            let inst = Inst::div(
-                *size,
-                kind.is_signed(),
-                RegMem::reg(divisor),
-                dividend_lo,
-                dividend_hi,
-                dst_quotient,
-                dst_remainder,
-            );
+            let inst = Inst::cmp_rmi_r(size, int_min, dividend);
+            inst.emit(&[], sink, info, state);
+            let inst = Inst::trap_if(CC::Z, TrapCode::IntegerOverflow);
            inst.emit(&[], sink, info, state);

-            // Lowering takes care of moving the result back into the right register, see comment
-            // there.
-
-            if let Some(done) = done_label {
-                sink.bind_label(done);
-            }
+            sink.bind_label(done);
        }

        Inst::Imm {
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -1749,7 +1749,7 @@ fn test_x64_emit() {
    insns.push((
        Inst::div(
            OperandSize::Size32,
-            true, /*signed*/
+            DivSignedness::Signed,
            RegMem::reg(regs::rsi()),
            Gpr::new(regs::rax()).unwrap(),
            Gpr::new(regs::rdx()).unwrap(),
@@ -1762,7 +1762,7 @@ fn test_x64_emit() {
    insns.push((
        Inst::div(
            OperandSize::Size64,
-            true, /*signed*/
+            DivSignedness::Signed,
            RegMem::reg(regs::r15()),
            Gpr::new(regs::rax()).unwrap(),
            Gpr::new(regs::rdx()).unwrap(),
@@ -1775,7 +1775,7 @@ fn test_x64_emit() {
    insns.push((
        Inst::div(
            OperandSize::Size32,
-            false, /*signed*/
+            DivSignedness::Unsigned,
            RegMem::reg(regs::r14()),
            Gpr::new(regs::rax()).unwrap(),
            Gpr::new(regs::rdx()).unwrap(),
@@ -1788,7 +1788,7 @@ fn test_x64_emit() {
    insns.push((
        Inst::div(
            OperandSize::Size64,
-            false, /*signed*/
+            DivSignedness::Unsigned,
            RegMem::reg(regs::rdi()),
            Gpr::new(regs::rax()).unwrap(),
            Gpr::new(regs::rdx()).unwrap(),
@@ -1799,30 +1799,24 @@ fn test_x64_emit() {
        "div     %rax, %rdx, %rdi, %rax, %rdx",
    ));
    insns.push((
-        Inst::div(
-            OperandSize::Size8,
-            false,
+        Inst::div8(
+            DivSignedness::Unsigned,
            RegMem::reg(regs::rax()),
            Gpr::new(regs::rax()).unwrap(),
-            Gpr::new(regs::rdx()).unwrap(),
            WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
-            WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
        ),
        "F6F0",
-        "div     %al, (none), %al, %al, (none)",
+        "div     %al, %al, %al",
    ));
    insns.push((
-        Inst::div(
-            OperandSize::Size8,
-            false,
+        Inst::div8(
+            DivSignedness::Unsigned,
            RegMem::reg(regs::rsi()),
            Gpr::new(regs::rax()).unwrap(),
-            Gpr::new(regs::rdx()).unwrap(),
            WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
-            WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
        ),
        "40F6F6",
-        "div     %al, (none), %sil, %al, (none)",
+        "div     %al, %sil, %al",
    ));

    // ========================================================
@@ -1864,48 +1858,6 @@ fn test_x64_emit() {
        "mul     %rax, %rdi, %rax, %rdx",
    ));

-    // ========================================================
-    // cbw
-    insns.push((
-        Inst::sign_extend_data(
-            OperandSize::Size8,
-            Gpr::new(regs::rax()).unwrap(),
-            WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
-        ),
-        "6698",
-        "cbw %al, %al",
-    ));
-
-    // ========================================================
-    // cdq family: SignExtendRaxRdx
-    insns.push((
-        Inst::sign_extend_data(
-            OperandSize::Size16,
-            Gpr::new(regs::rax()).unwrap(),
-            WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
-        ),
-        "6699",
-        "cwd %ax, %dx",
-    ));
-    insns.push((
-        Inst::sign_extend_data(
-            OperandSize::Size32,
-            Gpr::new(regs::rax()).unwrap(),
-            WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
-        ),
-        "99",
-        "cdq %eax, %edx",
-    ));
-    insns.push((
-        Inst::sign_extend_data(
-            OperandSize::Size64,
-            Gpr::new(regs::rax()).unwrap(),
-            WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
-        ),
-        "4899",
-        "cqo %rax, %rdx",
-    ));
-
    // ========================================================
    // Imm_R
    //
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -71,13 +71,17 @@ impl Inst {
            | Inst::Bswap { .. }
            | Inst::CallKnown { .. }
            | Inst::CallUnknown { .. }
-            | Inst::CheckedDivOrRemSeq { .. }
+            | Inst::CheckedSRemSeq { .. }
+            | Inst::CheckedSRemSeq8 { .. }
+            | Inst::ValidateSdivDivisor { .. }
+            | Inst::ValidateSdivDivisor64 { .. }
            | Inst::Cmove { .. }
            | Inst::CmpRmiR { .. }
            | Inst::CvtFloatToSintSeq { .. }
            | Inst::CvtFloatToUintSeq { .. }
            | Inst::CvtUint64ToFloatSeq { .. }
            | Inst::Div { .. }
+            | Inst::Div8 { .. }
            | Inst::Fence { .. }
            | Inst::Hlt
            | Inst::Imm { .. }
@@ -220,7 +224,7 @@ impl Inst {

    pub(crate) fn div(
        size: OperandSize,
-        signed: bool,
+        sign: DivSignedness,
        divisor: RegMem,
        dividend_lo: Gpr,
        dividend_hi: Gpr,
@@ -230,7 +234,7 @@ impl Inst {
        divisor.assert_regclass_is(RegClass::Int);
        Inst::Div {
            size,
-            signed,
+            sign,
            divisor: GprMem::new(divisor).unwrap(),
            dividend_lo,
            dividend_hi,
@@ -239,36 +243,21 @@ impl Inst {
        }
    }

-    pub(crate) fn checked_div_or_rem_seq(
-        kind: DivOrRemKind,
-        size: OperandSize,
-        divisor: Reg,
-        dividend_lo: Gpr,
-        dividend_hi: Gpr,
-        dst_quotient: WritableGpr,
-        dst_remainder: WritableGpr,
-        tmp: Option<Writable<Reg>>,
+    pub(crate) fn div8(
+        sign: DivSignedness,
+        divisor: RegMem,
+        dividend: Gpr,
+        dst: WritableGpr,
    ) -> Inst {
-        debug_assert!(divisor.class() == RegClass::Int);
-        debug_assert!(tmp
-            .map(|tmp| tmp.to_reg().class() == RegClass::Int)
-            .unwrap_or(true));
-        Inst::CheckedDivOrRemSeq {
-            kind,
-            size,
-            divisor: Gpr::new(divisor).unwrap(),
-            dividend_lo,
-            dividend_hi,
-            dst_quotient,
-            dst_remainder,
-            tmp: tmp.map(|tmp| WritableGpr::from_writable_reg(tmp).unwrap()),
+        divisor.assert_regclass_is(RegClass::Int);
+        Inst::Div8 {
+            sign,
+            divisor: GprMem::new(divisor).unwrap(),
+            dividend,
+            dst,
        }
    }

-    pub(crate) fn sign_extend_data(size: OperandSize, src: Gpr, dst: WritableGpr) -> Inst {
-        Inst::SignExtendData { size, src, dst }
-    }
-
    pub(crate) fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
        debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
        debug_assert!(dst.to_reg().class() == RegClass::Int);
@@ -780,33 +769,25 @@ impl PrettyPrint for Inst {

            Inst::Div {
                size,
-                signed,
+                sign,
                divisor,
                dividend_lo,
                dividend_hi,
                dst_quotient,
                dst_remainder,
            } => {
+                let divisor = divisor.pretty_print(size.to_bytes(), allocs);
                let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs);
+                let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs);
                let dst_quotient =
                    pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs);
-                let dst_remainder = if size.to_bits() > 8 {
-                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs)
-                } else {
-                    "(none)".to_string()
-                };
-                let dividend_hi = if size.to_bits() > 8 {
-                    pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs)
-                } else {
-                    "(none)".to_string()
-                };
-                let divisor = divisor.pretty_print(size.to_bytes(), allocs);
+                let dst_remainder =
+                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs);
                format!(
                    "{} {}, {}, {}, {}, {}",
-                    ljustify(if *signed {
-                        "idiv".to_string()
-                    } else {
-                        "div".into()
+                    ljustify(match sign {
+                        DivSignedness::Signed => "idiv".to_string(),
+                        DivSignedness::Unsigned => "div".to_string(),
                    }),
                    dividend_lo,
                    dividend_hi,
@@ -816,6 +797,24 @@ impl PrettyPrint for Inst {
                )
            }

+            Inst::Div8 {
+                sign,
+                divisor,
+                dividend,
+                dst,
+            } => {
+                let divisor = divisor.pretty_print(1, allocs);
+                let dividend = pretty_print_reg(dividend.to_reg(), 1, allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1, allocs);
+                format!(
+                    "{} {dividend}, {divisor}, {dst}",
+                    ljustify(match sign {
+                        DivSignedness::Signed => "idiv".to_string(),
+                        DivSignedness::Unsigned => "div".to_string(),
+                    }),
+                )
+            }
+
            Inst::MulHi {
                size,
                signed,
@@ -842,43 +841,59 @@ impl PrettyPrint for Inst {
                )
            }

-            Inst::CheckedDivOrRemSeq {
-                kind,
+            Inst::CheckedSRemSeq {
                size,
                divisor,
                dividend_lo,
                dividend_hi,
                dst_quotient,
                dst_remainder,
-                tmp,
            } => {
+                let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes(), allocs);
                let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs);
                let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs);
-                let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes(), allocs);
                let dst_quotient =
                    pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs);
                let dst_remainder =
                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs);
-                let tmp = tmp
-                    .map(|tmp| pretty_print_reg(tmp.to_reg().to_reg(), size.to_bytes(), allocs))
-                    .unwrap_or("(none)".to_string());
                format!(
-                    "{} {}, {}, {}, {}, {}, tmp={}",
-                    match kind {
-                        DivOrRemKind::SignedDiv => "sdiv_seq",
-                        DivOrRemKind::UnsignedDiv => "udiv_seq",
-                        DivOrRemKind::SignedRem => "srem_seq",
-                        DivOrRemKind::UnsignedRem => "urem_seq",
-                    },
-                    dividend_lo,
-                    dividend_hi,
-                    divisor,
-                    dst_quotient,
-                    dst_remainder,
-                    tmp,
+                    "checked_srem_seq {dividend_lo}, {dividend_hi}, \
+                        {divisor}, {dst_quotient}, {dst_remainder}",
                )
            }

+            Inst::CheckedSRemSeq8 {
+                divisor,
+                dividend,
+                dst,
+            } => {
+                let divisor = pretty_print_reg(divisor.to_reg(), 1, allocs);
+                let dividend = pretty_print_reg(dividend.to_reg(), 1, allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1, allocs);
+                format!("checked_srem_seq {dividend}, {divisor}, {dst}")
+            }
+
+            Inst::ValidateSdivDivisor {
+                dividend,
+                divisor,
+                size,
+            } => {
+                let dividend = pretty_print_reg(dividend.to_reg(), size.to_bytes(), allocs);
+                let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes(), allocs);
+                format!("validate_sdiv_divisor {dividend}, {divisor}")
+            }
+
+            Inst::ValidateSdivDivisor64 {
+                dividend,
+                divisor,
+                tmp,
+            } => {
+                let dividend = pretty_print_reg(dividend.to_reg(), 8, allocs);
+                let divisor = pretty_print_reg(divisor.to_reg(), 8, allocs);
+                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8, allocs);
+                format!("validate_sdiv_divisor {dividend}, {divisor} {tmp}")
+            }
+
            Inst::SignExtendData { size, src, dst } => {
                let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs);
                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
@@ -1857,21 +1872,37 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
            collector.reg_reuse_def(dst.to_writable_reg(), 0);
        }
        Inst::Div {
-            divisor,
            dividend_lo,
            dividend_hi,
            dst_quotient,
            dst_remainder,
-            size,
+            ..
+        }
+        | Inst::CheckedSRemSeq {
+            dividend_lo,
+            dividend_hi,
+            dst_quotient,
+            dst_remainder,
            ..
        } => {
-            collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax());
-            collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax());
-            if size.to_bits() > 8 {
-                collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx());
-                collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx());
+            match inst {
+                Inst::Div { divisor, .. } => divisor.get_operands(collector),
+                Inst::CheckedSRemSeq { divisor, .. } => collector.reg_use(divisor.to_reg()),
+                _ => {}
            }
-            divisor.get_operands(collector);
+            collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax());
+            collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx());
+            collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax());
+            collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx());
+        }
+        Inst::Div8 { dividend, dst, .. } | Inst::CheckedSRemSeq8 { dividend, dst, .. } => {
+            match inst {
+                Inst::Div8 { divisor, .. } => divisor.get_operands(collector),
+                Inst::CheckedSRemSeq8 { divisor, .. } => collector.reg_use(divisor.to_reg()),
+                _ => {}
+            }
+            collector.reg_fixed_use(dividend.to_reg(), regs::rax());
+            collector.reg_fixed_def(dst.to_writable_reg(), regs::rax());
        }
        Inst::MulHi {
            src1,
@@ -1885,25 +1916,20 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
            collector.reg_fixed_def(dst_hi.to_writable_reg(), regs::rdx());
            src2.get_operands(collector);
        }
-        Inst::CheckedDivOrRemSeq {
-            divisor,
-            dividend_lo,
-            dividend_hi,
-            dst_quotient,
-            dst_remainder,
-            tmp,
-            ..
+        Inst::ValidateSdivDivisor {
+            dividend, divisor, ..
        } => {
-            collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax());
-            collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx());
            collector.reg_use(divisor.to_reg());
-            collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax());
-            collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx());
-            if let Some(tmp) = tmp {
-                // Early def so that the temporary register does not
-                // conflict with inputs or outputs.
-                collector.reg_early_def(tmp.to_writable_reg());
-            }
+            collector.reg_use(dividend.to_reg());
+        }
+        Inst::ValidateSdivDivisor64 {
+            dividend,
+            divisor,
+            tmp,
+        } => {
+            collector.reg_use(divisor.to_reg());
+            collector.reg_use(dividend.to_reg());
+            collector.reg_early_def(tmp.to_writable_reg());
        }
        Inst::SignExtendData { size, src, dst } => {
            match size {
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -3491,23 +3491,154 @@

 ;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(rule (lower (udiv a @ (value_type ty) b))
-      (div_or_rem (DivOrRemKind.UnsignedDiv) a b))
+;; The inputs to the `div` instruction are different for 8-bit division so
+;; it needs a special case here since the instruction being crafted has a
+;; different shape.
+(rule 2 (lower (udiv a @ (value_type $I8) b))
+        (x64_div8 (extend_to_gpr a $I32 (ExtendKind.Zero))
+                  (nonzero_divisor $I8 b)
+                  (DivSignedness.Unsigned)))
+
+;; 16-to-64-bit division is all done with a similar instruction and the only
+;; tricky requirement here is that when div traps are disallowed the divisor
+;; must not be zero.
+(rule 1 (lower (udiv a @ (value_type (fits_in_64 ty)) b))
+        (x64_div_quotient a
+                          (imm $I64 0)
+                          (nonzero_divisor ty b)
+                          (raw_operand_size_of_type ty)
+                          (DivSignedness.Unsigned)))
+
+;; Helper to place `Value` into a `Gpr` while possibly trapping if it's zero.
+;;
+;; If the `avoid_div_traps=true` codegen setting is specified then the value
+;; is checked for zero and a trap happens before the value is returned as a
+;; register here.
+(decl nonzero_divisor (Type Value) Gpr)
+
+;; As a special-case if the divisor is a constant number which is nonzero then
+;; no matter what there's no checks necessary.
+(rule 2 (nonzero_divisor ty (iconst (u64_from_imm64 (u64_nonzero n))))
+        (imm ty n))
+
+;; No checks necessary when `avoid_div_traps=false`
+(rule 1 (nonzero_divisor ty val)
+        (if-let $false (avoid_div_traps))
+        val)
+
+;; Base case traps if `val` is zero by using a `test` + `trap_if` combo
+(rule (nonzero_divisor ty val)
+      (let (
+          (val Reg val)
+          (_ InstOutput (side_effect (with_flags_side_effect
+            (x64_test (raw_operand_size_of_type ty) val val)
+            (trap_if (CC.Z) (TrapCode.IntegerDivisionByZero)))))
+        )
+        val))

 ;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(rule (lower (sdiv a @ (value_type ty) b))
-      (div_or_rem (DivOrRemKind.SignedDiv) a b))
+(rule 2 (lower (sdiv a @ (value_type $I8) b))
+        (let (
+            (a Gpr (x64_sign_extend_data a (OperandSize.Size8)))
+          )
+          (x64_div8 a (safe_sdiv_divisor $I8 a b) (DivSignedness.Signed))))
+
+(rule 1 (lower (sdiv a @ (value_type (fits_in_64 ty)) b))
+        (let (
+            (a Gpr a)
+            (size OperandSize (raw_operand_size_of_type ty))
+            (b Gpr (safe_sdiv_divisor ty a b))
+          )
+          (x64_div_quotient a (x64_sign_extend_data a size) b size (DivSignedness.Signed))))
+
+;; Similar to `nonzero_divisor` except this checks to make sure that the divisor
+;; provided as a `Value` is safe to divide into the dividend `Gpr` provided.
+(decl safe_sdiv_divisor (Type Gpr Value) Reg)
+
+;; If the divisor is a constant that isn't 0 or -1, then it's always safe so
+;; materialize it into a register.
+(rule 3 (safe_sdiv_divisor ty a (iconst imm))
+        (if-let n (safe_divisor_from_imm64 ty imm))
+        (imm ty n))
+
+;; With `avoid_div_traps=false` the divisor can be plumbed through.
+;;
+;; Note that CLIF semantics dictate that division-by-zero and INT_MIN/-1 both
+;; trap, but this matches the hardware semantics of `idiv` on x64 so they're
+;; fine to get plumbed through as-is.
+(rule 2 (safe_sdiv_divisor ty a b)
+        (if-let $false (avoid_div_traps))
+        b)
+
+;; The base cases here rely on some pseudo-instructions to do the checks to
+;; jump around with labels and such.
+(rule 1 (safe_sdiv_divisor $I64 a b) (validate_sdiv_divisor64 a b))
+(rule 0 (safe_sdiv_divisor ty a b) (validate_sdiv_divisor (raw_operand_size_of_type ty) a b))

 ;; Rules for `urem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(rule (lower (urem a @ (value_type ty) b))
-      (div_or_rem (DivOrRemKind.UnsignedRem) a b))
+;; The remainder is in AH, so take the result of the division and right-shift
+;; by 8.
+(rule 2 (lower (urem a @ (value_type $I8) b))
+        (let (
+            (a Gpr (extend_to_gpr a $I32 (ExtendKind.Zero)))
+            (b Gpr (nonzero_divisor $I8 b))
+            (result Gpr (x64_div8 a b (DivSignedness.Unsigned)))
+          )
+          (x64_shr $I64 result (Imm8Reg.Imm8 8))))
+
+(rule 1 (lower (urem a @ (value_type (fits_in_64 ty)) b))
+        (x64_div_remainder a
+                           (imm $I64 0)
+                           (nonzero_divisor ty b)
+                           (raw_operand_size_of_type ty)
+                           (DivSignedness.Unsigned)))

 ;; Rules for `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

+;; Special-cases first for constant `srem` where the checks for 0 and -1 aren't
+;; applicable.
+;;
+;; Note that like `urem` for i8 types the result is in AH so to get the result
+;; it's right-shifted down.
+(rule 3 (lower (srem a @ (value_type $I8) (iconst imm)))
+        (if-let n (safe_divisor_from_imm64 $I8 imm))
+        (let (
+            (a Gpr (x64_sign_extend_data a (OperandSize.Size8)))
+            (result Gpr (x64_div8 a (imm $I8 n) (DivSignedness.Signed)))
+          )
+          (x64_shr $I64 result (Imm8Reg.Imm8 8))))
+
+;; Same as the above rule but for 16-to-64 bit types.
+(rule 2 (lower (srem a @ (value_type ty) (iconst imm)))
+        (if-let n (safe_divisor_from_imm64 ty imm))
+        (let (
+            (a Gpr a)
+            (size OperandSize (raw_operand_size_of_type ty))
+          )
+          (x64_div_remainder a
+                             (x64_sign_extend_data a size)
+                             (imm ty n)
+                             size
+                             (DivSignedness.Signed))))
+
+(rule 1 (lower (srem a @ (value_type $I8) b))
+        (let (
+            (a Gpr (x64_sign_extend_data a (OperandSize.Size8)))
+            (b Gpr (nonzero_divisor $I8 b))
+          )
+          (x64_shr $I64 (x64_checked_srem_seq8 a b) (Imm8Reg.Imm8 8))))
+
 (rule (lower (srem a @ (value_type ty) b))
-      (div_or_rem (DivOrRemKind.SignedRem) a b))
+      (let (
+          (a Gpr a)
+          (b Gpr (nonzero_divisor ty b))
+          (size OperandSize (raw_operand_size_of_type ty))
+          (hi Gpr (x64_sign_extend_data a size))
+          (tmp ValueRegs (x64_checked_srem_seq size a hi b))
+        )
+        (value_regs_get tmp 1)))

 ;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

--- a/cranelift/codegen/src/isa/x64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle.rs
@@ -848,138 +848,6 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
            .use_constant(VCodeConstantData::WellKnown(&UINT_MASK))
    }

-    fn emit_div_or_rem(
-        &mut self,
-        kind: &DivOrRemKind,
-        ty: Type,
-        dst: WritableGpr,
-        dividend: Gpr,
-        divisor: Gpr,
-    ) {
-        let is_div = kind.is_div();
-        let size = OperandSize::from_ty(ty);
-
-        let dst_quotient = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
-        let dst_remainder = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
-
-        // Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly.
-        if self.backend.flags.avoid_div_traps() || *kind == DivOrRemKind::SignedRem {
-            // A vcode meta-instruction is used to lower the inline checks, since they embed
-            // pc-relative offsets that must not change, thus requiring regalloc to not
-            // interfere by introducing spills and reloads.
-            let tmp = if *kind == DivOrRemKind::SignedDiv && size == OperandSize::Size64 {
-                Some(self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap())
-            } else {
-                None
-            };
-            let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
-            self.lower_ctx.emit(MInst::AluConstOp {
-                op: AluRmiROpcode::Xor,
-                size: OperandSize::Size32,
-                dst: WritableGpr::from_reg(Gpr::new(dividend_hi.to_reg()).unwrap()),
-            });
-            self.lower_ctx.emit(MInst::checked_div_or_rem_seq(
-                kind.clone(),
-                size,
-                divisor.to_reg(),
-                Gpr::new(dividend.to_reg()).unwrap(),
-                Gpr::new(dividend_hi.to_reg()).unwrap(),
-                WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()),
-                WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()),
-                tmp,
-            ));
-        } else {
-            // We don't want more than one trap record for a single instruction,
-            // so let's not allow the "mem" case (load-op merging) here; force
-            // divisor into a register instead.
-            let divisor = RegMem::reg(divisor.to_reg());
-
-            let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
-
-            // Fill in the high parts:
-            let dividend_lo = if kind.is_signed() && ty == types::I8 {
-                let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
-                // 8-bit div takes its dividend in only the `lo` reg.
-                self.lower_ctx.emit(MInst::sign_extend_data(
-                    size,
-                    Gpr::new(dividend.to_reg()).unwrap(),
-                    WritableGpr::from_reg(Gpr::new(dividend_lo.to_reg()).unwrap()),
-                ));
-                // `dividend_hi` is not used by the Div below, so we
-                // don't def it here.
-
-                dividend_lo.to_reg()
-            } else if kind.is_signed() {
-                // 16-bit and higher div takes its operand in hi:lo
-                // with half in each (64:64, 32:32 or 16:16).
-                self.lower_ctx.emit(MInst::sign_extend_data(
-                    size,
-                    Gpr::new(dividend.to_reg()).unwrap(),
-                    WritableGpr::from_reg(Gpr::new(dividend_hi.to_reg()).unwrap()),
-                ));
-
-                dividend.to_reg()
-            } else if ty == types::I8 {
-                let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
-                self.lower_ctx.emit(MInst::movzx_rm_r(
-                    ExtMode::BL,
-                    RegMem::reg(dividend.to_reg()),
-                    dividend_lo,
-                ));
-
-                dividend_lo.to_reg()
-            } else {
-                // zero for unsigned opcodes.
-                self.lower_ctx
-                    .emit(MInst::imm(OperandSize::Size64, 0, dividend_hi));
-
-                dividend.to_reg()
-            };
-
-            // Emit the actual idiv.
-            self.lower_ctx.emit(MInst::div(
-                size,
-                kind.is_signed(),
-                divisor,
-                Gpr::new(dividend_lo).unwrap(),
-                Gpr::new(dividend_hi.to_reg()).unwrap(),
-                WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()),
-                WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()),
-            ));
-        }
-
-        // Move the result back into the destination reg.
-        if is_div {
-            // The quotient is in rax.
-            self.lower_ctx.emit(MInst::gen_move(
-                dst.to_writable_reg(),
-                dst_quotient.to_reg(),
-                ty,
-            ));
-        } else {
-            if size == OperandSize::Size8 {
-                let tmp = self.temp_writable_reg(ty);
-                // The remainder is in AH. Right-shift by 8 bits then move from rax.
-                self.lower_ctx.emit(MInst::shift_r(
-                    OperandSize::Size64,
-                    ShiftKind::ShiftRightLogical,
-                    Imm8Gpr::new(Imm8Reg::Imm8 { imm: 8 }).unwrap(),
-                    dst_quotient.to_reg(),
-                    tmp,
-                ));
-                self.lower_ctx
-                    .emit(MInst::gen_move(dst.to_writable_reg(), tmp.to_reg(), ty));
-            } else {
-                // The remainder is in rdx.
-                self.lower_ctx.emit(MInst::gen_move(
-                    dst.to_writable_reg(),
-                    dst_remainder.to_reg(),
-                    ty,
-                ));
-            }
-        }
-    }
-
    fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned {
        match XmmMemAligned::new(arg.clone().into()) {
            Some(aligned) => aligned,
--- a/cranelift/codegen/src/machinst/isle.rs
+++ b/cranelift/codegen/src/machinst/isle.rs
@@ -285,12 +285,8 @@ macro_rules! isle_lower_prelude_methods {
            }
        }

-        fn avoid_div_traps(&mut self, _: Type) -> Option<()> {
-            if self.backend.flags().avoid_div_traps() {
-                Some(())
-            } else {
-                None
-            }
+        fn avoid_div_traps(&mut self) -> bool {
+            self.backend.flags().avoid_div_traps()
        }

        #[inline]
@@ -637,6 +633,20 @@ macro_rules! isle_lower_prelude_methods {
                shuffle_imm_as_le_lane_idx(2, &bytes[14..16])?,
            ))
        }
+
+        fn safe_divisor_from_imm64(&mut self, ty: Type, val: Imm64) -> Option<u64> {
+            let minus_one = if ty.bytes() == 8 {
+                -1
+            } else {
+                (1 << (ty.bytes() * 8)) - 1
+            };
+            let bits = val.bits() & minus_one;
+            if bits == 0 || bits == minus_one {
+                None
+            } else {
+                Some(bits as u64)
+            }
+        }
    };
 }

--- a/cranelift/codegen/src/prelude_lower.isle
+++ b/cranelift/codegen/src/prelude_lower.isle
@@ -530,8 +530,8 @@

 ;;;; Helpers for accessing compilation flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(decl avoid_div_traps () Type)
-(extern extractor avoid_div_traps avoid_div_traps)
+(decl pure avoid_div_traps () bool)
+(extern constructor avoid_div_traps avoid_div_traps)

 ;; This definition should be kept up to date with the values defined in
 ;; cranelift/codegen/meta/src/shared/settings.rs
@@ -722,6 +722,10 @@
 (decl gen_return (ValueSlice) Unit)
 (extern constructor gen_return gen_return)

+;; Helper for extracting an immediate that's not 0 and not -1 from an imm64.
+(decl pure partial safe_divisor_from_imm64 (Type Imm64) u64)
+(extern constructor safe_divisor_from_imm64 safe_divisor_from_imm64)
+
 ;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (convert Inst Value def_inst)
--- a/cranelift/filetests/filetests/isa/x64/div-checks.clif
+++ b/cranelift/filetests/filetests/isa/x64/div-checks.clif
@@ -19,8 +19,8 @@ block0(v0: i8, v1: i8):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   xorl    %edx, %edx, %edx
-;   srem_seq %al, %dl, %sil, %al, %dl, tmp=(none)
+;   cbw %al, %al
+;   checked_srem_seq %al, %sil, %al
 ;   shrq    $8, %rax, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -32,15 +32,11 @@ block0(v0: i8, v1: i8):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   xorl %edx, %edx
-;   cmpb $0, %sil
-;   jne 0x15
-;   ud2 ; trap: int_divz
-;   cmpb $0xff, %sil
-;   jne 0x29
-;   movl $0, %eax
-;   jmp 0x2e
 ;   cbtw
+;   cmpb $0xff, %sil
+;   jne 0x1d
+;   movl $0, %eax
+;   jmp 0x20
 ;   idivb %sil ; trap: int_divz
 ;   shrq $8, %rax
 ;   movq %rbp, %rsp
@@ -59,8 +55,8 @@ block0(v0: i16, v1: i16):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   xorl    %edx, %edx, %edx
-;   srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none)
+;   cwd %ax, %dx
+;   checked_srem_seq %ax, %dx, %si, %ax, %dx
 ;   movq    %rdx, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -72,15 +68,11 @@ block0(v0: i16, v1: i16):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   xorl %edx, %edx
-;   cmpw $0, %si
-;   jne 0x15
-;   ud2 ; trap: int_divz
-;   cmpw $-1, %si
-;   jne 0x29
-;   movl $0, %eax
-;   jmp 0x2e
 ;   cwtd
+;   cmpw $-1, %si
+;   jne 0x1d
+;   movl $0, %edx
+;   jmp 0x20
 ;   idivw %si ; trap: int_divz
 ;   movq %rdx, %rax
 ;   movq %rbp, %rsp
@@ -99,8 +91,8 @@ block0(v0: i32, v1: i32):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   xorl    %edx, %edx, %edx
-;   srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none)
+;   cdq %eax, %edx
+;   checked_srem_seq %eax, %edx, %esi, %eax, %edx
 ;   movq    %rdx, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -112,15 +104,11 @@ block0(v0: i32, v1: i32):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   xorl %edx, %edx
-;   cmpl $0, %esi
-;   jne 0x14
-;   ud2 ; trap: int_divz
-;   cmpl $-1, %esi
-;   jne 0x27
-;   movl $0, %eax
-;   jmp 0x2a
 ;   cltd
+;   cmpl $-1, %esi
+;   jne 0x1b
+;   movl $0, %edx
+;   jmp 0x1d
 ;   idivl %esi ; trap: int_divz
 ;   movq %rdx, %rax
 ;   movq %rbp, %rsp
@@ -139,8 +127,8 @@ block0(v0: i64, v1: i64):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   xorl    %edx, %edx, %edx
-;   srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none)
+;   cqo %rax, %rdx
+;   checked_srem_seq %rax, %rdx, %rsi, %rax, %rdx
 ;   movq    %rdx, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -152,15 +140,11 @@ block0(v0: i64, v1: i64):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   xorl %edx, %edx
-;   cmpq $0, %rsi
-;   jne 0x15
-;   ud2 ; trap: int_divz
-;   cmpq $-1, %rsi
-;   jne 0x29
-;   movl $0, %eax
-;   jmp 0x2e
 ;   cqto
+;   cmpq $-1, %rsi
+;   jne 0x1d
+;   movl $0, %edx
+;   jmp 0x20
 ;   idivq %rsi ; trap: int_divz
 ;   movq %rdx, %rax
 ;   movq %rbp, %rsp
--- a/cranelift/filetests/filetests/isa/x64/sdiv-checked.clif
+++ b/cranelift/filetests/filetests/isa/x64/sdiv-checked.clif
@@ -0,0 +1,285 @@
+test compile precise-output
+set avoid_div_traps=true
+target x86_64
+
+function %f1(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = sdiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cbw %al, %al
+;   validate_sdiv_divisor %sil, %al
+;   idiv    %al, %sil, %al
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   cbtw
+;   cmpb $0, %sil
+;   jne 0x15
+;   ud2 ; trap: int_divz
+;   cmpb $0xff, %sil
+;   jne 0x2a
+;   cmpb $0x80, %al
+;   jne 0x2a
+;   ud2 ; trap: int_ovf
+;   idivb %sil ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %f2(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = sdiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   validate_sdiv_divisor %si, %di
+;   movq    %rdi, %rax
+;   cwd %ax, %dx
+;   idiv    %ax, %dx, %si, %ax, %dx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   cmpw $0, %si
+;   jne 0x10
+;   ud2 ; trap: int_divz
+;   cmpw $-1, %si
+;   jne 0x27
+;   cmpw $0x8000, %di
+;   jne 0x27
+;   ud2 ; trap: int_ovf
+;   movq %rdi, %rax
+;   cwtd
+;   idivw %si ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %f3(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = sdiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   validate_sdiv_divisor %esi, %edi
+;   movq    %rdi, %rax
+;   cdq %eax, %edx
+;   idiv    %eax, %edx, %esi, %eax, %edx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   cmpl $0, %esi
+;   jne 0xf
+;   ud2 ; trap: int_divz
+;   cmpl $-1, %esi
+;   jne 0x26
+;   cmpl $0x80000000, %edi
+;   jne 0x26
+;   ud2 ; trap: int_ovf
+;   movq %rdi, %rax
+;   cltd
+;   idivl %esi ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %f4(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = sdiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   validate_sdiv_divisor %rsi, %rdi %rcx
+;   movq    %rdi, %rax
+;   cqo %rax, %rdx
+;   idiv    %rax, %rdx, %rsi, %rax, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   cmpq $0, %rsi
+;   jne 0x10
+;   ud2 ; trap: int_divz
+;   cmpq $-1, %rsi
+;   jne 0x2f
+;   movabsq $9223372036854775808, %rcx
+;   cmpq %rcx, %rdi
+;   jne 0x2f
+;   ud2 ; trap: int_ovf
+;   movq %rdi, %rax
+;   cqto
+;   idivq %rsi ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i8_imm(i8) -> i8 {
+block0(v0: i8):
+  v1 = iconst.i8 17
+  v2 = sdiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cbw %al, %al
+;   movl    $17, %edx
+;   idiv    %al, %dl, %al
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   cbtw
+;   movl $0x11, %edx
+;   idivb %dl ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i16_imm(i16) -> i16 {
+block0(v0: i16):
+  v1 = iconst.i16 17
+  v2 = sdiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    $17, %ecx
+;   movq    %rdi, %rax
+;   cwd %ax, %dx
+;   idiv    %ax, %dx, %cx, %ax, %dx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movl $0x11, %ecx
+;   movq %rdi, %rax
+;   cwtd
+;   idivw %cx ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i32_imm(i32) -> i32 {
+block0(v0: i32):
+  v1 = iconst.i32 17
+  v2 = sdiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    $17, %ecx
+;   movq    %rdi, %rax
+;   cdq %eax, %edx
+;   idiv    %eax, %edx, %ecx, %eax, %edx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movl $0x11, %ecx
+;   movq %rdi, %rax
+;   cltd
+;   idivl %ecx ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i64_imm(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 17
+  v2 = sdiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    $17, %ecx
+;   movq    %rdi, %rax
+;   cqo %rax, %rdx
+;   idiv    %rax, %rdx, %rcx, %rax, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movl $0x11, %ecx
+;   movq %rdi, %rax
+;   cqto
+;   idivq %rcx ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
--- a/cranelift/filetests/filetests/isa/x64/sdiv.clif
+++ b/cranelift/filetests/filetests/isa/x64/sdiv.clif
@@ -13,7 +13,7 @@ block0(v0: i8, v1: i8):
 ; block0:
 ;   movq    %rdi, %rax
 ;   cbw %al, %al
-;   idiv    %al, (none), %sil, %al, (none)
+;   idiv    %al, %sil, %al
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
--- a/cranelift/filetests/filetests/isa/x64/srem-checked.clif
+++ b/cranelift/filetests/filetests/isa/x64/srem-checked.clif
@@ -0,0 +1,300 @@
+test compile precise-output
+set avoid_div_traps=true
+target x86_64
+
+function %f1(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = srem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cbw %al, %al
+;   testb   %sil, %sil
+;   jnz ; ud2 int_divz ;
+;   checked_srem_seq %al, %sil, %al
+;   shrq    $8, %rax, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   cbtw
+;   testb %sil, %sil
+;   jne 0x14
+;   ud2 ; trap: int_divz
+;   cmpb $0xff, %sil
+;   jne 0x28
+;   movl $0, %eax
+;   jmp 0x2b
+;   idivb %sil ; trap: int_divz
+;   shrq $8, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %f2(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = srem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   testw   %si, %si
+;   jnz ; ud2 int_divz ;
+;   movq    %rdi, %rax
+;   cwd %ax, %dx
+;   checked_srem_seq %ax, %dx, %si, %ax, %dx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   testw %si, %si
+;   jne 0xf
+;   ud2 ; trap: int_divz
+;   movq %rdi, %rax
+;   cwtd
+;   cmpw $-1, %si
+;   jne 0x28
+;   movl $0, %edx
+;   jmp 0x2b
+;   idivw %si ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %f3(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = srem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   testl   %esi, %esi
+;   jnz ; ud2 int_divz ;
+;   movq    %rdi, %rax
+;   cdq %eax, %edx
+;   checked_srem_seq %eax, %edx, %esi, %eax, %edx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   testl %esi, %esi
+;   jne 0xe
+;   ud2 ; trap: int_divz
+;   movq %rdi, %rax
+;   cltd
+;   cmpl $-1, %esi
+;   jne 0x25
+;   movl $0, %edx
+;   jmp 0x27
+;   idivl %esi ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %f4(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = srem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   testq   %rsi, %rsi
+;   jnz ; ud2 int_divz ;
+;   movq    %rdi, %rax
+;   cqo %rax, %rdx
+;   checked_srem_seq %rax, %rdx, %rsi, %rax, %rdx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   testq %rsi, %rsi
+;   jne 0xf
+;   ud2 ; trap: int_divz
+;   movq %rdi, %rax
+;   cqto
+;   cmpq $-1, %rsi
+;   jne 0x28
+;   movl $0, %edx
+;   jmp 0x2b
+;   idivq %rsi ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i8_imm(i8) -> i8 {
+block0(v0: i8):
+  v1 = iconst.i8 17
+  v2 = srem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cbw %al, %al
+;   movl    $17, %edx
+;   idiv    %al, %dl, %al
+;   shrq    $8, %rax, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   cbtw
+;   movl $0x11, %edx
+;   idivb %dl ; trap: int_divz
+;   shrq $8, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i16_imm(i16) -> i16 {
+block0(v0: i16):
+  v1 = iconst.i16 17
+  v2 = srem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cwd %ax, %dx
+;   movl    $17, %r8d
+;   idiv    %ax, %dx, %r8w, %ax, %dx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   cwtd
+;   movl $0x11, %r8d
+;   idivw %r8w ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i32_imm(i32) -> i32 {
+block0(v0: i32):
+  v1 = iconst.i32 17
+  v2 = srem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cdq %eax, %edx
+;   movl    $17, %r8d
+;   idiv    %eax, %edx, %r8d, %eax, %edx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   cltd
+;   movl $0x11, %r8d
+;   idivl %r8d ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i64_imm(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 17
+  v2 = srem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cqo %rax, %rdx
+;   movl    $17, %r8d
+;   idiv    %rax, %rdx, %r8, %rax, %rdx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   cqto
+;   movl $0x11, %r8d
+;   idivq %r8 ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
--- a/cranelift/filetests/filetests/isa/x64/srem.clif
+++ b/cranelift/filetests/filetests/isa/x64/srem.clif
@@ -12,8 +12,8 @@ block0(v0: i8, v1: i8):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   xorl    %edx, %edx, %edx
-;   srem_seq %al, %dl, %sil, %al, %dl, tmp=(none)
+;   cbw %al, %al
+;   checked_srem_seq %al, %sil, %al
 ;   shrq    $8, %rax, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -25,15 +25,11 @@ block0(v0: i8, v1: i8):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   xorl %edx, %edx
-;   cmpb $0, %sil
-;   jne 0x15
-;   ud2 ; trap: int_divz
-;   cmpb $0xff, %sil
-;   jne 0x29
-;   movl $0, %eax
-;   jmp 0x2e
 ;   cbtw
+;   cmpb $0xff, %sil
+;   jne 0x1d
+;   movl $0, %eax
+;   jmp 0x20
 ;   idivb %sil ; trap: int_divz
 ;   shrq $8, %rax
 ;   movq %rbp, %rsp
@@ -51,8 +47,8 @@ block0(v0: i16, v1: i16):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   xorl    %edx, %edx, %edx
-;   srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none)
+;   cwd %ax, %dx
+;   checked_srem_seq %ax, %dx, %si, %ax, %dx
 ;   movq    %rdx, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -64,15 +60,11 @@ block0(v0: i16, v1: i16):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   xorl %edx, %edx
-;   cmpw $0, %si
-;   jne 0x15
-;   ud2 ; trap: int_divz
-;   cmpw $-1, %si
-;   jne 0x29
-;   movl $0, %eax
-;   jmp 0x2e
 ;   cwtd
+;   cmpw $-1, %si
+;   jne 0x1d
+;   movl $0, %edx
+;   jmp 0x20
 ;   idivw %si ; trap: int_divz
 ;   movq %rdx, %rax
 ;   movq %rbp, %rsp
@@ -90,8 +82,8 @@ block0(v0: i32, v1: i32):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   xorl    %edx, %edx, %edx
-;   srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none)
+;   cdq %eax, %edx
+;   checked_srem_seq %eax, %edx, %esi, %eax, %edx
 ;   movq    %rdx, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -103,15 +95,11 @@ block0(v0: i32, v1: i32):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   xorl %edx, %edx
-;   cmpl $0, %esi
-;   jne 0x14
-;   ud2 ; trap: int_divz
-;   cmpl $-1, %esi
-;   jne 0x27
-;   movl $0, %eax
-;   jmp 0x2a
 ;   cltd
+;   cmpl $-1, %esi
+;   jne 0x1b
+;   movl $0, %edx
+;   jmp 0x1d
 ;   idivl %esi ; trap: int_divz
 ;   movq %rdx, %rax
 ;   movq %rbp, %rsp
@@ -129,8 +117,8 @@ block0(v0: i64, v1: i64):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   xorl    %edx, %edx, %edx
-;   srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none)
+;   cqo %rax, %rdx
+;   checked_srem_seq %rax, %rdx, %rsi, %rax, %rdx
 ;   movq    %rdx, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -142,18 +130,150 @@ block0(v0: i64, v1: i64):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   xorl %edx, %edx
-;   cmpq $0, %rsi
-;   jne 0x15
-;   ud2 ; trap: int_divz
-;   cmpq $-1, %rsi
-;   jne 0x29
-;   movl $0, %eax
-;   jmp 0x2e
 ;   cqto
+;   cmpq $-1, %rsi
+;   jne 0x1d
+;   movl $0, %edx
+;   jmp 0x20
 ;   idivq %rsi ; trap: int_divz
 ;   movq %rdx, %rax
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq

+function %i8_imm(i8) -> i8 {
+block0(v0: i8):
+  v1 = iconst.i8 17
+  v2 = srem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cbw %al, %al
+;   movl    $17, %edx
+;   idiv    %al, %dl, %al
+;   shrq    $8, %rax, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   cbtw
+;   movl $0x11, %edx
+;   idivb %dl ; trap: int_divz
+;   shrq $8, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i16_imm(i16) -> i16 {
+block0(v0: i16):
+  v1 = iconst.i16 17
+  v2 = srem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cwd %ax, %dx
+;   movl    $17, %r8d
+;   idiv    %ax, %dx, %r8w, %ax, %dx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   cwtd
+;   movl $0x11, %r8d
+;   idivw %r8w ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i32_imm(i32) -> i32 {
+block0(v0: i32):
+  v1 = iconst.i32 17
+  v2 = srem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cdq %eax, %edx
+;   movl    $17, %r8d
+;   idiv    %eax, %edx, %r8d, %eax, %edx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   cltd
+;   movl $0x11, %r8d
+;   idivl %r8d ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i64_imm(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 17
+  v2 = srem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cqo %rax, %rdx
+;   movl    $17, %r8d
+;   idiv    %rax, %rdx, %r8, %rax, %rdx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   cqto
+;   movl $0x11, %r8d
+;   idivq %r8 ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
--- a/cranelift/filetests/filetests/isa/x64/udiv-checked.clif
+++ b/cranelift/filetests/filetests/isa/x64/udiv-checked.clif
@@ -0,0 +1,264 @@
+test compile precise-output
+set avoid_div_traps=true
+target x86_64
+
+function %f1(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = udiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movzbl  %dil, %eax
+;   testb   %sil, %sil
+;   jnz ; ud2 int_divz ;
+;   div     %al, %sil, %al
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movzbl %dil, %eax
+;   testb %sil, %sil
+;   jne 0x13
+;   ud2 ; trap: int_divz
+;   divb %sil ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %f2(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = udiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   testw   %si, %si
+;   jnz ; ud2 int_divz ;
+;   div     %ax, %dx, %si, %ax, %dx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
+;   testw %si, %si
+;   jne 0x15
+;   ud2 ; trap: int_divz
+;   divw %si ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %f3(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = udiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   testl   %esi, %esi
+;   jnz ; ud2 int_divz ;
+;   div     %eax, %edx, %esi, %eax, %edx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
+;   testl %esi, %esi
+;   jne 0x14
+;   ud2 ; trap: int_divz
+;   divl %esi ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %f4(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = udiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   testq   %rsi, %rsi
+;   jnz ; ud2 int_divz ;
+;   div     %rax, %rdx, %rsi, %rax, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
+;   testq %rsi, %rsi
+;   jne 0x15
+;   ud2 ; trap: int_divz
+;   divq %rsi ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i8_imm(i8) -> i8 {
+block0(v0: i8):
+  v1 = iconst.i8 17
+  v2 = udiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movzbl  %dil, %eax
+;   movl    $17, %edx
+;   div     %al, %dl, %al
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movzbl %dil, %eax
+;   movl $0x11, %edx
+;   divb %dl ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i16_imm(i16) -> i16 {
+block0(v0: i16):
+  v1 = iconst.i16 17
+  v2 = udiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   movl    $17, %r8d
+;   div     %ax, %dx, %r8w, %ax, %dx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
+;   movl $0x11, %r8d
+;   divw %r8w ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i32_imm(i32) -> i32 {
+block0(v0: i32):
+  v1 = iconst.i32 17
+  v2 = udiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   movl    $17, %r8d
+;   div     %eax, %edx, %r8d, %eax, %edx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
+;   movl $0x11, %r8d
+;   divl %r8d ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i64_imm(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 17
+  v2 = udiv v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   movl    $17, %r8d
+;   div     %rax, %rdx, %r8, %rax, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
+;   movl $0x11, %r8d
+;   divq %r8 ; trap: int_divz
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
--- a/cranelift/filetests/filetests/isa/x64/udiv.clif
+++ b/cranelift/filetests/filetests/isa/x64/udiv.clif
@@ -12,7 +12,7 @@ block0(v0: i8, v1: i8):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movzbl  %dil, %eax
-;   div     %al, (none), %sil, %al, (none)
+;   div     %al, %sil, %al
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
@@ -39,7 +39,7 @@ block0(v0: i16, v1: i16):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   movl    $0, %edx
+;   xorq    %rdx, %rdx, %rdx
 ;   div     %ax, %dx, %si, %ax, %dx
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -51,7 +51,7 @@ block0(v0: i16, v1: i16):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   movl $0, %edx
+;   xorq %rdx, %rdx
 ;   divw %si ; trap: int_divz
 ;   movq %rbp, %rsp
 ;   popq %rbp
@@ -68,7 +68,7 @@ block0(v0: i32, v1: i32):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   movl    $0, %edx
+;   xorq    %rdx, %rdx, %rdx
 ;   div     %eax, %edx, %esi, %eax, %edx
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -80,7 +80,7 @@ block0(v0: i32, v1: i32):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   movl $0, %edx
+;   xorq %rdx, %rdx
 ;   divl %esi ; trap: int_divz
 ;   movq %rbp, %rsp
 ;   popq %rbp
@@ -97,7 +97,7 @@ block0(v0: i64, v1: i64):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   movl    $0, %edx
+;   xorq    %rdx, %rdx, %rdx
 ;   div     %rax, %rdx, %rsi, %rax, %rdx
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -109,7 +109,7 @@ block0(v0: i64, v1: i64):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   movl $0, %edx
+;   xorq %rdx, %rdx
 ;   divq %rsi ; trap: int_divz
 ;   movq %rbp, %rsp
 ;   popq %rbp
--- a/cranelift/filetests/filetests/isa/x64/udivrem.clif
+++ b/cranelift/filetests/filetests/isa/x64/udivrem.clif
@@ -15,13 +15,13 @@ block0(v0: i8, v1: i8):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movzbl  %dil, %eax
-;   div     %al, (none), %sil, %al, (none)
-;   movq    %rax, %rcx
+;   div     %al, %sil, %al
+;   movq    %rax, %r11
 ;   movzbl  %dil, %eax
-;   div     %al, (none), %sil, %al, (none)
+;   div     %al, %sil, %al
 ;   movq    %rax, %rdx
 ;   shrq    $8, %rdx, %rdx
-;   movq    %rcx, %rax
+;   movq    %r11, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
@@ -33,12 +33,12 @@ block0(v0: i8, v1: i8):
 ; block1: ; offset 0x4
 ;   movzbl %dil, %eax
 ;   divb %sil ; trap: int_divz
-;   movq %rax, %rcx
+;   movq %rax, %r11
 ;   movzbl %dil, %eax
 ;   divb %sil ; trap: int_divz
 ;   movq %rax, %rdx
 ;   shrq $8, %rdx
-;   movq %rcx, %rax
+;   movq %r11, %rax
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq
@@ -54,15 +54,14 @@ block0(v0: i16, v1: i16):
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   movl    $0, %edx
+;   xorq    %rdx, %rdx, %rdx
 ;   movq    %rdi, %rax
 ;   div     %ax, %dx, %si, %ax, %dx
-;   movq    %rdi, %rcx
-;   movq    %rax, %r8
-;   movl    $0, %edx
-;   movq    %rcx, %rax
+;   movq    %rax, %rcx
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
 ;   div     %ax, %dx, %si, %ax, %dx
-;   movq    %r8, %rax
+;   movq    %rcx, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
@@ -72,15 +71,14 @@ block0(v0: i16, v1: i16):
 ;   pushq %rbp
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
-;   movl $0, %edx
+;   xorq %rdx, %rdx
 ;   movq %rdi, %rax
 ;   divw %si ; trap: int_divz
-;   movq %rdi, %rcx
-;   movq %rax, %r8
-;   movl $0, %edx
-;   movq %rcx, %rax
+;   movq %rax, %rcx
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
 ;   divw %si ; trap: int_divz
-;   movq %r8, %rax
+;   movq %rcx, %rax
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq
@@ -96,15 +94,14 @@ block0(v0: i32, v1: i32):
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   movl    $0, %edx
+;   xorq    %rdx, %rdx, %rdx
 ;   movq    %rdi, %rax
 ;   div     %eax, %edx, %esi, %eax, %edx
-;   movq    %rdi, %rcx
-;   movq    %rax, %r8
-;   movl    $0, %edx
-;   movq    %rcx, %rax
+;   movq    %rax, %rcx
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
 ;   div     %eax, %edx, %esi, %eax, %edx
-;   movq    %r8, %rax
+;   movq    %rcx, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
@@ -114,15 +111,14 @@ block0(v0: i32, v1: i32):
 ;   pushq %rbp
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
-;   movl $0, %edx
+;   xorq %rdx, %rdx
 ;   movq %rdi, %rax
 ;   divl %esi ; trap: int_divz
-;   movq %rdi, %rcx
-;   movq %rax, %r8
-;   movl $0, %edx
-;   movq %rcx, %rax
+;   movq %rax, %rcx
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
 ;   divl %esi ; trap: int_divz
-;   movq %r8, %rax
+;   movq %rcx, %rax
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq
@@ -138,15 +134,14 @@ block0(v0: i64, v1: i64):
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   movl    $0, %edx
+;   xorq    %rdx, %rdx, %rdx
 ;   movq    %rdi, %rax
 ;   div     %rax, %rdx, %rsi, %rax, %rdx
-;   movq    %rdi, %rcx
-;   movq    %rax, %r8
-;   movl    $0, %edx
-;   movq    %rcx, %rax
+;   movq    %rax, %rcx
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
 ;   div     %rax, %rdx, %rsi, %rax, %rdx
-;   movq    %r8, %rax
+;   movq    %rcx, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
@@ -156,15 +151,14 @@ block0(v0: i64, v1: i64):
 ;   pushq %rbp
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
-;   movl $0, %edx
+;   xorq %rdx, %rdx
 ;   movq %rdi, %rax
 ;   divq %rsi ; trap: int_divz
-;   movq %rdi, %rcx
-;   movq %rax, %r8
-;   movl $0, %edx
-;   movq %rcx, %rax
+;   movq %rax, %rcx
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
 ;   divq %rsi ; trap: int_divz
-;   movq %r8, %rax
+;   movq %rcx, %rax
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq
--- a/cranelift/filetests/filetests/isa/x64/urem-checked.clif
+++ b/cranelift/filetests/filetests/isa/x64/urem-checked.clif
@@ -0,0 +1,280 @@
+test compile precise-output
+set avoid_div_traps=true
+target x86_64
+
+function %f1(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = urem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movzbl  %dil, %eax
+;   testb   %sil, %sil
+;   jnz ; ud2 int_divz ;
+;   div     %al, %sil, %al
+;   shrq    $8, %rax, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movzbl %dil, %eax
+;   testb %sil, %sil
+;   jne 0x13
+;   ud2 ; trap: int_divz
+;   divb %sil ; trap: int_divz
+;   shrq $8, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %f2(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = urem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   testw   %si, %si
+;   jnz ; ud2 int_divz ;
+;   div     %ax, %dx, %si, %ax, %dx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
+;   testw %si, %si
+;   jne 0x15
+;   ud2 ; trap: int_divz
+;   divw %si ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %f3(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = urem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   testl   %esi, %esi
+;   jnz ; ud2 int_divz ;
+;   div     %eax, %edx, %esi, %eax, %edx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
+;   testl %esi, %esi
+;   jne 0x14
+;   ud2 ; trap: int_divz
+;   divl %esi ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %f4(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = urem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   testq   %rsi, %rsi
+;   jnz ; ud2 int_divz ;
+;   div     %rax, %rdx, %rsi, %rax, %rdx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
+;   testq %rsi, %rsi
+;   jne 0x15
+;   ud2 ; trap: int_divz
+;   divq %rsi ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i8_imm(i8) -> i8 {
+block0(v0: i8):
+  v1 = iconst.i8 17
+  v2 = urem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movzbl  %dil, %eax
+;   movl    $17, %edx
+;   div     %al, %dl, %al
+;   shrq    $8, %rax, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movzbl %dil, %eax
+;   movl $0x11, %edx
+;   divb %dl ; trap: int_divz
+;   shrq $8, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i16_imm(i16) -> i16 {
+block0(v0: i16):
+  v1 = iconst.i16 17
+  v2 = urem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   movl    $17, %r8d
+;   div     %ax, %dx, %r8w, %ax, %dx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
+;   movl $0x11, %r8d
+;   divw %r8w ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i32_imm(i32) -> i32 {
+block0(v0: i32):
+  v1 = iconst.i32 17
+  v2 = urem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   movl    $17, %r8d
+;   div     %eax, %edx, %r8d, %eax, %edx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
+;   movl $0x11, %r8d
+;   divl %r8d ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
+function %i64_imm(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 17
+  v2 = urem v0, v1
+  return v2
+}
+
+; VCode:
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   xorq    %rdx, %rdx, %rdx
+;   movl    $17, %r8d
+;   div     %rax, %rdx, %r8, %rax, %rdx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   pushq %rbp
+;   movq %rsp, %rbp
+; block1: ; offset 0x4
+;   movq %rdi, %rax
+;   xorq %rdx, %rdx
+;   movl $0x11, %r8d
+;   divq %r8 ; trap: int_divz
+;   movq %rdx, %rax
+;   movq %rbp, %rsp
+;   popq %rbp
+;   retq
+
--- a/cranelift/filetests/filetests/isa/x64/urem.clif
+++ b/cranelift/filetests/filetests/isa/x64/urem.clif
@@ -12,7 +12,7 @@ block0(v0: i8, v1: i8):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movzbl  %dil, %eax
-;   div     %al, (none), %sil, %al, (none)
+;   div     %al, %sil, %al
 ;   shrq    $8, %rax, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -41,7 +41,7 @@ block0(v0: i16, v1: i16):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   movl    $0, %edx
+;   xorq    %rdx, %rdx, %rdx
 ;   div     %ax, %dx, %si, %ax, %dx
 ;   movq    %rdx, %rax
 ;   movq    %rbp, %rsp
@@ -54,7 +54,7 @@ block0(v0: i16, v1: i16):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   movl $0, %edx
+;   xorq %rdx, %rdx
 ;   divw %si ; trap: int_divz
 ;   movq %rdx, %rax
 ;   movq %rbp, %rsp
@@ -72,7 +72,7 @@ block0(v0: i32, v1: i32):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   movl    $0, %edx
+;   xorq    %rdx, %rdx, %rdx
 ;   div     %eax, %edx, %esi, %eax, %edx
 ;   movq    %rdx, %rax
 ;   movq    %rbp, %rsp
@@ -85,7 +85,7 @@ block0(v0: i32, v1: i32):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   movl $0, %edx
+;   xorq %rdx, %rdx
 ;   divl %esi ; trap: int_divz
 ;   movq %rdx, %rax
 ;   movq %rbp, %rsp
@@ -103,7 +103,7 @@ block0(v0: i64, v1: i64):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movq    %rdi, %rax
-;   movl    $0, %edx
+;   xorq    %rdx, %rdx, %rdx
 ;   div     %rax, %rdx, %rsi, %rax, %rdx
 ;   movq    %rdx, %rax
 ;   movq    %rbp, %rsp
@@ -116,7 +116,7 @@ block0(v0: i64, v1: i64):
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
 ;   movq %rdi, %rax
-;   movl $0, %edx
+;   xorq %rdx, %rdx
 ;   divq %rsi ; trap: int_divz
 ;   movq %rdx, %rax
 ;   movq %rbp, %rsp