aarch64: Migrate {s,u}{div,rem} to ISLE (#3572)

* aarch64: Migrate {s,u}{div,rem} to ISLE This commit migrates four different instructions at once to ISLE: * `sdiv` * `udiv` * `srem` * `urem` These all share similar codegen and center around the `div` instruction to use internally. The main feature of these was to model the manual traps since the `div` instruction doesn't trap on overflow, instead requiring manual checks to adhere to the semantics of the instruction itself. While I was here I went ahead and implemented an optimization for these instructions when the right-hand-side is a constant with a known value. For `udiv`, `srem`, and `urem` if the right-hand-side is a nonzero constant then the checks for traps can be skipped entirely. For `sdiv` if the constant is not 0 and not -1 then additionally all checks can be elided. Finally if the right-hand-side of `sdiv` is -1 the zero-check is elided, but it still needs a check for `i64::MIN` on the left-hand-side and currently there's a TODO where `-1` is still checked too. * Rebasing and review conflicts
2021-12-13 17:27:11 -06:00
parent f1225dfd93
commit 20e090b114
12 changed files with 567 additions and 215 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -1307,6 +1307,12 @@
 (decl imm12_from_u64 (Imm12) u64)
 (extern extractor imm12_from_u64 imm12_from_u64)

+(decl u8_into_uimm5 (u8) UImm5)
+(extern constructor u8_into_uimm5 u8_into_uimm5)
+
+(decl u8_into_imm12 (u8) Imm12)
+(extern constructor u8_into_imm12 u8_into_imm12)
+
 (decl imm12_from_negated_u64 (Imm12) u64)
 (extern extractor imm12_from_negated_u64 imm12_from_negated_u64)

@@ -1339,6 +1345,15 @@
 (decl get_extended_op (ExtendedValue) ExtendOp)
 (extern constructor get_extended_op get_extended_op)

+(decl nzcv (bool bool bool bool) NZCV)
+(extern constructor nzcv nzcv)
+
+(decl cond_br_zero (Reg) CondBrKind)
+(extern constructor cond_br_zero cond_br_zero)
+
+(decl cond_br_cond (Cond) CondBrKind)
+(extern constructor cond_br_cond cond_br_cond)
+
 ;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; Emit an instruction.
@@ -1352,6 +1367,9 @@
 (decl zero_reg () Reg)
 (extern constructor zero_reg zero_reg)

+(decl writable_zero_reg () WritableReg)
+(extern constructor writable_zero_reg writable_zero_reg)
+
 ;; Helper for emitting `MInst.MovZ` instructions.
 (decl movz (MoveWideConst OperandSize) Reg)
 (rule (movz imm size)
@@ -1543,3 +1561,41 @@

 ;; 64-bit passthrough.
 (rule (put_in_reg_zext64 val @ (value_type $I64)) (put_in_reg val))
+
+;; Misc instruction helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl trap_if_zero_divisor (Reg) Reg)
+(rule (trap_if_zero_divisor reg)
+      (let ((_ Unit (emit (MInst.TrapIf (cond_br_zero reg) (trap_code_division_by_zero)))))
+        reg))
+
+(decl size_from_ty (Type) OperandSize)
+(rule (size_from_ty (fits_in_32 _ty)) (OperandSize.Size32))
+(rule (size_from_ty $I64) (OperandSize.Size64))
+
+;; Check for signed overflow. The only case is min_value / -1.
+;; The following checks must be done in 32-bit or 64-bit, depending
+;; on the input type.
+(decl trap_if_div_overflow (Type Reg Reg) Reg)
+(rule (trap_if_div_overflow ty x y)
+      (let (
+          ;; Check RHS is -1.
+          (_1 Unit (emit (MInst.AluRRImm12 (adds_op ty) (writable_zero_reg) y (u8_into_imm12 1))))
+
+          ;; Check LHS is min_value, by subtracting 1 and branching if
+          ;; there is overflow.
+          (_2 Unit (emit (MInst.CCmpImm (size_from_ty ty)
+                                        x
+                                        (u8_into_uimm5 1)
+                                        (nzcv $false $false $false $false)
+                                        (Cond.Eq))))
+          (_3 Unit (emit (MInst.TrapIf (cond_br_cond (Cond.Vs))
+                                      (trap_code_integer_overflow))))
+        )
+        x))
+
+;; Helper to use either a 32 or 64-bit adds depending on the input type.
+(decl adds_op (Type) ALUOp)
+(rule (adds_op (fits_in_32 _ty)) (ALUOp.AddS32))
+(rule (adds_op $I64) (ALUOp.AddS64))
+
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -398,3 +398,107 @@
        )
        (value_reg result)))

+;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; TODO: Add UDiv32 to implement 32-bit directly, rather
+;; than extending the input.
+;;
+;; Note that aarch64's `udiv` doesn't trap so to respect the semantics of
+;; CLIF's `udiv` the check for zero needs to be manually performed.
+(rule (lower (has_type (fits_in_64 ty) (udiv x y)))
+      (value_reg (alu_rrr (ALUOp.UDiv64)
+                          (put_in_reg_zext64 x)
+                          (put_nonzero_in_reg_zext64 y))))
+
+;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
+(decl put_nonzero_in_reg_zext64 (Value) Reg)
+(rule (put_nonzero_in_reg_zext64 val)
+      (trap_if_zero_divisor (put_in_reg_zext64 val)))
+
+;; Special case where if a `Value` is known to be nonzero we can trivially
+;; move it into a register.
+(rule (put_nonzero_in_reg_zext64 (and (value_type ty)
+                                      (def_inst (iconst (nonzero_u64_from_imm64 n)))))
+      (imm ty n))
+
+;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; TODO: Add SDiv32 to implement 32-bit directly, rather
+;; than extending the input.
+;;
+;; The sequence of checks here should look like:
+;;
+;;   cbnz rm, #8
+;;   udf ; divide by zero
+;;   cmn rm, 1
+;;   ccmp rn, 1, #nzcv, eq
+;;   b.vc #8
+;;   udf ; signed overflow
+;;
+;; Note The div instruction does not trap on divide by zero or overflow, so
+;; checks need to be manually inserted.
+;;
+;; TODO: if `y` is -1 then a check that `x` is not INT_MIN is all that's
+;; necessary, but right now `y` is checked to not be -1 as well.
+(rule (lower (has_type (fits_in_64 ty) (sdiv x y)))
+      (let (
+          (x64 Reg (put_in_reg_sext64 x))
+          (y64 Reg (put_nonzero_in_reg_sext64 y))
+          (valid_x64 Reg (trap_if_div_overflow ty x64 y64))
+          (result Reg (alu_rrr (ALUOp.SDiv64) valid_x64 y64))
+        )
+        (value_reg result)))
+
+;; Helper for extracting an immediate that's not 0 and not -1 from an imm64.
+(decl safe_divisor_from_imm64 (u64) Imm64)
+(extern extractor safe_divisor_from_imm64 safe_divisor_from_imm64)
+
+;; Special case for `sdiv` where no checks are needed due to division by a
+;; constant meaning the checks are always passed.
+(rule (lower (has_type (fits_in_64 ty) (sdiv x (def_inst (iconst (safe_divisor_from_imm64 y))))))
+      (value_reg (alu_rrr (ALUOp.SDiv64)
+                          (put_in_reg_sext64 x)
+                          (imm ty y))))
+
+;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
+(decl put_nonzero_in_reg_sext64 (Value) Reg)
+(rule (put_nonzero_in_reg_sext64 val)
+      (trap_if_zero_divisor (put_in_reg_sext64 val)))
+
+;; Note that this has a special case where if the `Value` is a constant that's
+;; not zero we can skip the zero check.
+(rule (put_nonzero_in_reg_sext64 (and (value_type ty)
+                                      (def_inst (iconst (nonzero_u64_from_imm64 n)))))
+      (imm ty n))
+
+;;;; Rules for `urem` and `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Remainder (x % y) is implemented as:
+;;
+;;   tmp = x / y
+;;   result = x - (tmp*y)
+;;
+;; use 'result' for tmp and you have:
+;;
+;;   cbnz y, #8         ; branch over trap
+;;   udf                ; divide by zero
+;;   div rd, x, y       ; rd = x / y
+;;   msub rd, rd, y, x  ; rd = x - rd * y
+
+(rule (lower (has_type (fits_in_64 ty) (urem x y)))
+      (let (
+          (x64 Reg (put_in_reg_zext64 x))
+          (y64 Reg (put_nonzero_in_reg_zext64 y))
+          (div Reg (alu_rrr (ALUOp.UDiv64) x64 y64))
+          (result Reg (alu_rrrr (ALUOp3.MSub64) div y64 x64))
+        )
+        (value_reg result)))
+
+(rule (lower (has_type (fits_in_64 ty) (srem x y)))
+      (let (
+          (x64 Reg (put_in_reg_sext64 x))
+          (y64 Reg (put_nonzero_in_reg_sext64 y))
+          (div Reg (alu_rrr (ALUOp.SDiv64) x64 y64))
+          (result Reg (alu_rrrr (ALUOp3.MSub64) div y64 x64))
+        )
+        (value_reg result)))
--- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs
@@ -5,10 +5,10 @@ pub mod generated_code;

 // Types that the generated ISLE code uses via `use super::*`.
 use super::{
-    zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, AtomicRmwOp, BranchTarget, CallIndInfo,
-    CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, Imm12, ImmLogic, ImmShift, Inst as MInst,
-    JTSequenceInfo, MachLabel, MoveWideConst, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg,
-    ScalarSize, ShiftOpAndAmt, UImm5, VectorSize, NZCV,
+    writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, AtomicRmwOp, BranchTarget,
+    CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, Imm12, ImmLogic, ImmShift,
+    Inst as MInst, JTSequenceInfo, MachLabel, MoveWideConst, NarrowValueMode, Opcode, OperandSize,
+    PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VectorSize, NZCV,
 };
 use crate::isa::aarch64::settings as aarch64_settings;
 use crate::machinst::isle::*;
@@ -244,4 +244,35 @@ where
    fn emit(&mut self, inst: &MInst) -> Unit {
        self.emitted_insts.push(inst.clone());
    }
+
+    fn cond_br_zero(&mut self, reg: Reg) -> CondBrKind {
+        CondBrKind::Zero(reg)
+    }
+
+    fn cond_br_cond(&mut self, cond: &Cond) -> CondBrKind {
+        CondBrKind::Cond(*cond)
+    }
+
+    fn nzcv(&mut self, n: bool, z: bool, c: bool, v: bool) -> NZCV {
+        NZCV::new(n, z, c, v)
+    }
+
+    fn u8_into_uimm5(&mut self, x: u8) -> UImm5 {
+        UImm5::maybe_from_u8(x).unwrap()
+    }
+
+    fn u8_into_imm12(&mut self, x: u8) -> Imm12 {
+        Imm12::maybe_from_u64(x.into()).unwrap()
+    }
+
+    fn writable_zero_reg(&mut self) -> WritableReg {
+        writable_zero_reg()
+    }
+
+    fn safe_divisor_from_imm64(&mut self, val: Imm64) -> Option<u64> {
+        match val.bits() {
+            0 | -1 => None,
+            n => Some(n as u64),
+        }
+    }
 }
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle be1359b4b6b153f378517c1dd95cd80f4a6bed0c7b86eaba11c088fd71b7bfe80a3c868ace245b2da0bfbbd6ded262ea9576c8e0eeacbf89d03c34a17a709602
-src/prelude.isle 9bd1fcb6a3604a24cf2e05e6b7eb04dcb3b9dc8fa9a2f1c8f29c25b6e3bf7f679b3b1b72dff87501497b72bc30fc92fd755b898db7e03f380235fae931b6a74b
-src/isa/aarch64/inst.isle 6e042ec14166fceae4b7133f681fdf604e20a2997e1d60f797e40acd683ccb34e33376189f6b7ed2f5eb441dc61d592cad2592256dfea51296330752181b9403
-src/isa/aarch64/lower.isle 64a725771537f69c445f44c728e04bffd8a715d6a4d87a5a2bf2e89714ee290b7497c5ca8b335bdddd775f6734be03318ff9aa67e2e4068949ebae06b0902b3f
+src/prelude.isle d3d2a6a42fb778231a4cdca30995324e1293a9ca8073c5a27a501535759eb51f84a6718322a93dfba4b66ee4f0c9afce7dcec0428516ef0c5bc96e8c8b76925d
+src/isa/aarch64/inst.isle cec03d88680e8da01424eecc05ef73a48e4055d29fe841fceaa3e6ea4e7cb9abb887401bb5acb2e058c9fc993188640990b699e88272d62e243781b231cdfb0d
+src/isa/aarch64/lower.isle e1ae53adc953ad395feeecd8edc8bcfd288491a4e4a71510e5f06e221f767518c6e060ff0d795c7c2510b7d898cc8b9bc0313906412e0176605c33427926f828
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
@@ -42,6 +42,7 @@ pub trait Context {
    fn writable_reg_to_reg(&mut self, arg0: WritableReg) -> Reg;
    fn u8_from_uimm8(&mut self, arg0: Uimm8) -> u8;
    fn u64_from_imm64(&mut self, arg0: Imm64) -> u64;
+    fn nonzero_u64_from_imm64(&mut self, arg0: Imm64) -> Option<u64>;
    fn u64_from_ieee32(&mut self, arg0: Ieee32) -> u64;
    fn u64_from_ieee64(&mut self, arg0: Ieee64) -> u64;
    fn inst_results(&mut self, arg0: Inst) -> ValueSlice;
@@ -50,29 +51,38 @@ pub trait Context {
    fn value_type(&mut self, arg0: Value) -> Type;
    fn multi_lane(&mut self, arg0: Type) -> Option<(u8, u16)>;
    fn def_inst(&mut self, arg0: Value) -> Option<Inst>;
+    fn trap_code_division_by_zero(&mut self) -> TrapCode;
+    fn trap_code_integer_overflow(&mut self) -> TrapCode;
    fn move_wide_const_from_u64(&mut self, arg0: u64) -> Option<MoveWideConst>;
    fn move_wide_const_from_negated_u64(&mut self, arg0: u64) -> Option<MoveWideConst>;
    fn imm_logic_from_u64(&mut self, arg0: u64) -> Option<ImmLogic>;
    fn imm_shift_from_u8(&mut self, arg0: u8) -> ImmShift;
    fn imm12_from_u64(&mut self, arg0: u64) -> Option<Imm12>;
+    fn u8_into_uimm5(&mut self, arg0: u8) -> UImm5;
+    fn u8_into_imm12(&mut self, arg0: u8) -> Imm12;
    fn imm12_from_negated_u64(&mut self, arg0: u64) -> Option<Imm12>;
    fn lshl_from_imm64(&mut self, arg0: Imm64, arg1: Type) -> Option<ShiftOpAndAmt>;
    fn integral_ty(&mut self, arg0: Type) -> Option<Type>;
    fn extended_value_from_value(&mut self, arg0: Value) -> Option<ExtendedValue>;
    fn put_extended_in_reg(&mut self, arg0: &ExtendedValue) -> Reg;
    fn get_extended_op(&mut self, arg0: &ExtendedValue) -> ExtendOp;
+    fn nzcv(&mut self, arg0: bool, arg1: bool, arg2: bool, arg3: bool) -> NZCV;
+    fn cond_br_zero(&mut self, arg0: Reg) -> CondBrKind;
+    fn cond_br_cond(&mut self, arg0: &Cond) -> CondBrKind;
    fn emit(&mut self, arg0: &MInst) -> Unit;
    fn zero_reg(&mut self) -> Reg;
+    fn writable_zero_reg(&mut self) -> WritableReg;
    fn load_constant64_full(&mut self, arg0: u64) -> Reg;
+    fn safe_divisor_from_imm64(&mut self, arg0: Imm64) -> Option<u64>;
 }

-/// Internal type ProducesFlags: defined at src/prelude.isle line 242.
+/// Internal type ProducesFlags: defined at src/prelude.isle line 246.
 #[derive(Clone, Debug)]
 pub enum ProducesFlags {
    ProducesFlags { inst: MInst, result: Reg },
 }

-/// Internal type ConsumesFlags: defined at src/prelude.isle line 245.
+/// Internal type ConsumesFlags: defined at src/prelude.isle line 249.
 #[derive(Clone, Debug)]
 pub enum ConsumesFlags {
    ConsumesFlags { inst: MInst, result: Reg },
@@ -986,7 +996,7 @@ pub fn constructor_with_flags<C: Context>(
            result: pattern3_1,
        } = pattern2_0
        {
-            // Rule at src/prelude.isle line 255.
+            // Rule at src/prelude.isle line 259.
            let expr0_0 = C::emit(ctx, &pattern1_0);
            let expr1_0 = C::emit(ctx, &pattern3_0);
            let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -1014,7 +1024,7 @@ pub fn constructor_with_flags_1<C: Context>(
            result: pattern3_1,
        } = pattern2_0
        {
-            // Rule at src/prelude.isle line 263.
+            // Rule at src/prelude.isle line 267.
            let expr0_0 = C::emit(ctx, &pattern1_0);
            let expr1_0 = C::emit(ctx, &pattern3_0);
            return Some(pattern3_1);
@@ -1048,7 +1058,7 @@ pub fn constructor_with_flags_2<C: Context>(
                result: pattern5_1,
            } = pattern4_0
            {
-                // Rule at src/prelude.isle line 273.
+                // Rule at src/prelude.isle line 277.
                let expr0_0 = C::emit(ctx, &pattern1_0);
                let expr1_0 = C::emit(ctx, &pattern3_0);
                let expr2_0 = C::emit(ctx, &pattern5_0);
@@ -1104,7 +1114,7 @@ pub fn constructor_movz<C: Context>(
 ) -> Option<Reg> {
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1357.
+    // Rule at src/isa/aarch64/inst.isle line 1375.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::MovZ {
@@ -1125,7 +1135,7 @@ pub fn constructor_movn<C: Context>(
 ) -> Option<Reg> {
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1364.
+    // Rule at src/isa/aarch64/inst.isle line 1382.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::MovN {
@@ -1148,7 +1158,7 @@ pub fn constructor_alu_rr_imm_logic<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1371.
+    // Rule at src/isa/aarch64/inst.isle line 1389.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRImmLogic {
@@ -1172,7 +1182,7 @@ pub fn constructor_alu_rr_imm_shift<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1378.
+    // Rule at src/isa/aarch64/inst.isle line 1396.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRImmShift {
@@ -1196,7 +1206,7 @@ pub fn constructor_alu_rrr<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1385.
+    // Rule at src/isa/aarch64/inst.isle line 1403.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRR {
@@ -1222,7 +1232,7 @@ pub fn constructor_vec_rrr<C: Context>(
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
    let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1392.
+    // Rule at src/isa/aarch64/inst.isle line 1410.
    let expr0_0: Type = I8X16;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::VecRRR {
@@ -1247,7 +1257,7 @@ pub fn constructor_alu_rr_imm12<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1399.
+    // Rule at src/isa/aarch64/inst.isle line 1417.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRImm12 {
@@ -1273,7 +1283,7 @@ pub fn constructor_alu_rrr_shift<C: Context>(
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
    let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1406.
+    // Rule at src/isa/aarch64/inst.isle line 1424.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRRShift {
@@ -1300,7 +1310,7 @@ pub fn constructor_alu_rrr_extend<C: Context>(
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
    let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1413.
+    // Rule at src/isa/aarch64/inst.isle line 1431.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRRExtend {
@@ -1325,7 +1335,7 @@ pub fn constructor_alu_rr_extend_reg<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1421.
+    // Rule at src/isa/aarch64/inst.isle line 1439.
    let expr0_0 = C::put_extended_in_reg(ctx, pattern2_0);
    let expr1_0 = C::get_extended_op(ctx, pattern2_0);
    let expr2_0 = constructor_alu_rrr_extend(ctx, pattern0_0, pattern1_0, expr0_0, &expr1_0)?;
@@ -1344,7 +1354,7 @@ pub fn constructor_alu_rrrr<C: Context>(
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
    let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1428.
+    // Rule at src/isa/aarch64/inst.isle line 1446.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRRR {
@@ -1367,7 +1377,7 @@ pub fn constructor_add64_with_flags<C: Context>(
 ) -> Option<ProducesFlags> {
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1435.
+    // Rule at src/isa/aarch64/inst.isle line 1453.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = ALUOp::AddS64;
@@ -1389,7 +1399,7 @@ pub fn constructor_add64_with_flags<C: Context>(
 pub fn constructor_adc64<C: Context>(ctx: &mut C, arg0: Reg, arg1: Reg) -> Option<ConsumesFlags> {
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1442.
+    // Rule at src/isa/aarch64/inst.isle line 1460.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = ALUOp::Adc64;
@@ -1415,7 +1425,7 @@ pub fn constructor_sub64_with_flags<C: Context>(
 ) -> Option<ProducesFlags> {
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1449.
+    // Rule at src/isa/aarch64/inst.isle line 1467.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = ALUOp::SubS64;
@@ -1437,7 +1447,7 @@ pub fn constructor_sub64_with_flags<C: Context>(
 pub fn constructor_sbc64<C: Context>(ctx: &mut C, arg0: Reg, arg1: Reg) -> Option<ConsumesFlags> {
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1456.
+    // Rule at src/isa/aarch64/inst.isle line 1474.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = ALUOp::Sbc64;
@@ -1465,7 +1475,7 @@ pub fn constructor_vec_misc<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1463.
+    // Rule at src/isa/aarch64/inst.isle line 1481.
    let expr0_0: Type = I8X16;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::VecMisc {
@@ -1491,7 +1501,7 @@ pub fn constructor_vec_rrr_long<C: Context>(
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
    let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1470.
+    // Rule at src/isa/aarch64/inst.isle line 1488.
    let expr0_0: Type = I8X16;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::VecRRRLong {
@@ -1520,7 +1530,7 @@ pub fn constructor_vec_rrrr_long<C: Context>(
    let pattern2_0 = arg2;
    let pattern3_0 = arg3;
    let pattern4_0 = arg4;
-    // Rule at src/isa/aarch64/inst.isle line 1480.
+    // Rule at src/isa/aarch64/inst.isle line 1498.
    let expr0_0: Type = I8X16;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::FpuMove128 {
@@ -1550,7 +1560,7 @@ pub fn constructor_vec_rr_narrow<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1488.
+    // Rule at src/isa/aarch64/inst.isle line 1506.
    let expr0_0: Type = I8X16;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::VecRRNarrow {
@@ -1574,7 +1584,7 @@ pub fn constructor_vec_rr_long<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1495.
+    // Rule at src/isa/aarch64/inst.isle line 1513.
    let expr0_0: Type = I8X16;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::VecRRLong {
@@ -1594,25 +1604,25 @@ pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option
    if let Some(pattern1_0) = C::integral_ty(ctx, pattern0_0) {
        let pattern2_0 = arg1;
        if let Some(pattern3_0) = C::imm_logic_from_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1513.
+            // Rule at src/isa/aarch64/inst.isle line 1531.
            let expr0_0 = ALUOp::Orr64;
            let expr1_0 = C::zero_reg(ctx);
            let expr2_0 = constructor_alu_rr_imm_logic(ctx, &expr0_0, expr1_0, pattern3_0)?;
            return Some(expr2_0);
        }
        if let Some(pattern3_0) = C::move_wide_const_from_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1505.
+            // Rule at src/isa/aarch64/inst.isle line 1523.
            let expr0_0 = OperandSize::Size64;
            let expr1_0 = constructor_movz(ctx, pattern3_0, &expr0_0)?;
            return Some(expr1_0);
        }
        if let Some(pattern3_0) = C::move_wide_const_from_negated_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1509.
+            // Rule at src/isa/aarch64/inst.isle line 1527.
            let expr0_0 = OperandSize::Size64;
            let expr1_0 = constructor_movn(ctx, pattern3_0, &expr0_0)?;
            return Some(expr1_0);
        }
-        // Rule at src/isa/aarch64/inst.isle line 1520.
+        // Rule at src/isa/aarch64/inst.isle line 1538.
        let expr0_0 = C::load_constant64_full(ctx, pattern2_0);
        return Some(expr0_0);
    }
@@ -1624,12 +1634,12 @@ pub fn constructor_put_in_reg_sext64<C: Context>(ctx: &mut C, arg0: Value) -> Op
    let pattern0_0 = arg0;
    let pattern1_0 = C::value_type(ctx, pattern0_0);
    if pattern1_0 == I64 {
-        // Rule at src/isa/aarch64/inst.isle line 1534.
+        // Rule at src/isa/aarch64/inst.isle line 1552.
        let expr0_0 = C::put_in_reg(ctx, pattern0_0);
        return Some(expr0_0);
    }
    if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) {
-        // Rule at src/isa/aarch64/inst.isle line 1527.
+        // Rule at src/isa/aarch64/inst.isle line 1545.
        let expr0_0: Type = I32;
        let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
        let expr2_0 = C::put_in_reg(ctx, pattern0_0);
@@ -1655,12 +1665,12 @@ pub fn constructor_put_in_reg_zext64<C: Context>(ctx: &mut C, arg0: Value) -> Op
    let pattern0_0 = arg0;
    let pattern1_0 = C::value_type(ctx, pattern0_0);
    if pattern1_0 == I64 {
-        // Rule at src/isa/aarch64/inst.isle line 1545.
+        // Rule at src/isa/aarch64/inst.isle line 1563.
        let expr0_0 = C::put_in_reg(ctx, pattern0_0);
        return Some(expr0_0);
    }
    if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) {
-        // Rule at src/isa/aarch64/inst.isle line 1538.
+        // Rule at src/isa/aarch64/inst.isle line 1556.
        let expr0_0: Type = I32;
        let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
        let expr2_0 = C::put_in_reg(ctx, pattern0_0);
@@ -1681,6 +1691,102 @@ pub fn constructor_put_in_reg_zext64<C: Context>(ctx: &mut C, arg0: Value) -> Op
    return None;
 }

+// Generated as internal constructor for term trap_if_zero_divisor.
+pub fn constructor_trap_if_zero_divisor<C: Context>(ctx: &mut C, arg0: Reg) -> Option<Reg> {
+    let pattern0_0 = arg0;
+    // Rule at src/isa/aarch64/inst.isle line 1568.
+    let expr0_0 = C::cond_br_zero(ctx, pattern0_0);
+    let expr1_0 = C::trap_code_division_by_zero(ctx);
+    let expr2_0 = MInst::TrapIf {
+        kind: expr0_0,
+        trap_code: expr1_0,
+    };
+    let expr3_0 = C::emit(ctx, &expr2_0);
+    return Some(pattern0_0);
+}
+
+// Generated as internal constructor for term size_from_ty.
+pub fn constructor_size_from_ty<C: Context>(ctx: &mut C, arg0: Type) -> Option<OperandSize> {
+    let pattern0_0 = arg0;
+    if pattern0_0 == I64 {
+        // Rule at src/isa/aarch64/inst.isle line 1574.
+        let expr0_0 = OperandSize::Size64;
+        return Some(expr0_0);
+    }
+    if let Some(pattern1_0) = C::fits_in_32(ctx, pattern0_0) {
+        // Rule at src/isa/aarch64/inst.isle line 1573.
+        let expr0_0 = OperandSize::Size32;
+        return Some(expr0_0);
+    }
+    return None;
+}
+
+// Generated as internal constructor for term trap_if_div_overflow.
+pub fn constructor_trap_if_div_overflow<C: Context>(
+    ctx: &mut C,
+    arg0: Type,
+    arg1: Reg,
+    arg2: Reg,
+) -> Option<Reg> {
+    let pattern0_0 = arg0;
+    let pattern1_0 = arg1;
+    let pattern2_0 = arg2;
+    // Rule at src/isa/aarch64/inst.isle line 1580.
+    let expr0_0 = constructor_adds_op(ctx, pattern0_0)?;
+    let expr1_0 = C::writable_zero_reg(ctx);
+    let expr2_0: u8 = 1;
+    let expr3_0 = C::u8_into_imm12(ctx, expr2_0);
+    let expr4_0 = MInst::AluRRImm12 {
+        alu_op: expr0_0,
+        rd: expr1_0,
+        rn: pattern2_0,
+        imm12: expr3_0,
+    };
+    let expr5_0 = C::emit(ctx, &expr4_0);
+    let expr6_0 = constructor_size_from_ty(ctx, pattern0_0)?;
+    let expr7_0: u8 = 1;
+    let expr8_0 = C::u8_into_uimm5(ctx, expr7_0);
+    let expr9_0: bool = false;
+    let expr10_0: bool = false;
+    let expr11_0: bool = false;
+    let expr12_0: bool = false;
+    let expr13_0 = C::nzcv(ctx, expr9_0, expr10_0, expr11_0, expr12_0);
+    let expr14_0 = Cond::Eq;
+    let expr15_0 = MInst::CCmpImm {
+        size: expr6_0,
+        rn: pattern1_0,
+        imm: expr8_0,
+        nzcv: expr13_0,
+        cond: expr14_0,
+    };
+    let expr16_0 = C::emit(ctx, &expr15_0);
+    let expr17_0 = Cond::Vs;
+    let expr18_0 = C::cond_br_cond(ctx, &expr17_0);
+    let expr19_0 = C::trap_code_integer_overflow(ctx);
+    let expr20_0 = MInst::TrapIf {
+        kind: expr18_0,
+        trap_code: expr19_0,
+    };
+    let expr21_0 = C::emit(ctx, &expr20_0);
+    return Some(pattern1_0);
+}
+
+// Generated as internal constructor for term adds_op.
+pub fn constructor_adds_op<C: Context>(ctx: &mut C, arg0: Type) -> Option<ALUOp> {
+    let pattern0_0 = arg0;
+    if pattern0_0 == I64 {
+        // Rule at src/isa/aarch64/inst.isle line 1600.
+        let expr0_0 = ALUOp::AddS64;
+        return Some(expr0_0);
+    }
+    if let Some(pattern1_0) = C::fits_in_32(ctx, pattern0_0) {
+        // Rule at src/isa/aarch64/inst.isle line 1599.
+        let expr0_0 = ALUOp::AddS32;
+        return Some(expr0_0);
+    }
+    return None;
+}
+
 // Generated as internal constructor for term lower.
 pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueRegs> {
    let pattern0_0 = arg0;
@@ -2843,6 +2949,85 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
                            let expr5_0 = C::value_reg(ctx, expr4_0);
                            return Some(expr5_0);
                        }
+                        &Opcode::Udiv => {
+                            let (pattern7_0, pattern7_1) =
+                                C::unpack_value_array_2(ctx, &pattern5_1);
+                            // Rule at src/isa/aarch64/lower.isle line 408.
+                            let expr0_0 = ALUOp::UDiv64;
+                            let expr1_0 = constructor_put_in_reg_zext64(ctx, pattern7_0)?;
+                            let expr2_0 = constructor_put_nonzero_in_reg_zext64(ctx, pattern7_1)?;
+                            let expr3_0 = constructor_alu_rrr(ctx, &expr0_0, expr1_0, expr2_0)?;
+                            let expr4_0 = C::value_reg(ctx, expr3_0);
+                            return Some(expr4_0);
+                        }
+                        &Opcode::Sdiv => {
+                            let (pattern7_0, pattern7_1) =
+                                C::unpack_value_array_2(ctx, &pattern5_1);
+                            if let Some(pattern8_0) = C::def_inst(ctx, pattern7_1) {
+                                let pattern9_0 = C::inst_data(ctx, pattern8_0);
+                                if let &InstructionData::UnaryImm {
+                                    opcode: ref pattern10_0,
+                                    imm: pattern10_1,
+                                } = &pattern9_0
+                                {
+                                    if let &Opcode::Iconst = &pattern10_0 {
+                                        if let Some(pattern12_0) =
+                                            C::safe_divisor_from_imm64(ctx, pattern10_1)
+                                        {
+                                            // Rule at src/isa/aarch64/lower.isle line 458.
+                                            let expr0_0 = ALUOp::SDiv64;
+                                            let expr1_0 =
+                                                constructor_put_in_reg_sext64(ctx, pattern7_0)?;
+                                            let expr2_0 =
+                                                constructor_imm(ctx, pattern3_0, pattern12_0)?;
+                                            let expr3_0 = constructor_alu_rrr(
+                                                ctx, &expr0_0, expr1_0, expr2_0,
+                                            )?;
+                                            let expr4_0 = C::value_reg(ctx, expr3_0);
+                                            return Some(expr4_0);
+                                        }
+                                    }
+                                }
+                            }
+                            // Rule at src/isa/aarch64/lower.isle line 443.
+                            let expr0_0 = constructor_put_in_reg_sext64(ctx, pattern7_0)?;
+                            let expr1_0 = constructor_put_nonzero_in_reg_sext64(ctx, pattern7_1)?;
+                            let expr2_0 = constructor_trap_if_div_overflow(
+                                ctx, pattern3_0, expr0_0, expr1_0,
+                            )?;
+                            let expr3_0 = ALUOp::SDiv64;
+                            let expr4_0 = constructor_alu_rrr(ctx, &expr3_0, expr2_0, expr1_0)?;
+                            let expr5_0 = C::value_reg(ctx, expr4_0);
+                            return Some(expr5_0);
+                        }
+                        &Opcode::Urem => {
+                            let (pattern7_0, pattern7_1) =
+                                C::unpack_value_array_2(ctx, &pattern5_1);
+                            // Rule at src/isa/aarch64/lower.isle line 488.
+                            let expr0_0 = constructor_put_in_reg_zext64(ctx, pattern7_0)?;
+                            let expr1_0 = constructor_put_nonzero_in_reg_zext64(ctx, pattern7_1)?;
+                            let expr2_0 = ALUOp::UDiv64;
+                            let expr3_0 = constructor_alu_rrr(ctx, &expr2_0, expr0_0, expr1_0)?;
+                            let expr4_0 = ALUOp3::MSub64;
+                            let expr5_0 =
+                                constructor_alu_rrrr(ctx, &expr4_0, expr3_0, expr1_0, expr0_0)?;
+                            let expr6_0 = C::value_reg(ctx, expr5_0);
+                            return Some(expr6_0);
+                        }
+                        &Opcode::Srem => {
+                            let (pattern7_0, pattern7_1) =
+                                C::unpack_value_array_2(ctx, &pattern5_1);
+                            // Rule at src/isa/aarch64/lower.isle line 497.
+                            let expr0_0 = constructor_put_in_reg_sext64(ctx, pattern7_0)?;
+                            let expr1_0 = constructor_put_nonzero_in_reg_sext64(ctx, pattern7_1)?;
+                            let expr2_0 = ALUOp::SDiv64;
+                            let expr3_0 = constructor_alu_rrr(ctx, &expr2_0, expr0_0, expr1_0)?;
+                            let expr4_0 = ALUOp3::MSub64;
+                            let expr5_0 =
+                                constructor_alu_rrrr(ctx, &expr4_0, expr3_0, expr1_0, expr0_0)?;
+                            let expr6_0 = C::value_reg(ctx, expr5_0);
+                            return Some(expr6_0);
+                        }
                        _ => {}
                    }
                }
@@ -3015,3 +3200,55 @@ pub fn constructor_madd_op<C: Context>(ctx: &mut C, arg0: Type) -> Option<ALUOp3
    }
    return None;
 }
+
+// Generated as internal constructor for term put_nonzero_in_reg_zext64.
+pub fn constructor_put_nonzero_in_reg_zext64<C: Context>(ctx: &mut C, arg0: Value) -> Option<Reg> {
+    let pattern0_0 = arg0;
+    let pattern1_0 = C::value_type(ctx, pattern0_0);
+    if let Some(pattern2_0) = C::def_inst(ctx, pattern0_0) {
+        let pattern3_0 = C::inst_data(ctx, pattern2_0);
+        if let &InstructionData::UnaryImm {
+            opcode: ref pattern4_0,
+            imm: pattern4_1,
+        } = &pattern3_0
+        {
+            if let &Opcode::Iconst = &pattern4_0 {
+                if let Some(pattern6_0) = C::nonzero_u64_from_imm64(ctx, pattern4_1) {
+                    // Rule at src/isa/aarch64/lower.isle line 420.
+                    let expr0_0 = constructor_imm(ctx, pattern1_0, pattern6_0)?;
+                    return Some(expr0_0);
+                }
+            }
+        }
+    }
+    // Rule at src/isa/aarch64/lower.isle line 415.
+    let expr0_0 = constructor_put_in_reg_zext64(ctx, pattern0_0)?;
+    let expr1_0 = constructor_trap_if_zero_divisor(ctx, expr0_0)?;
+    return Some(expr1_0);
+}
+
+// Generated as internal constructor for term put_nonzero_in_reg_sext64.
+pub fn constructor_put_nonzero_in_reg_sext64<C: Context>(ctx: &mut C, arg0: Value) -> Option<Reg> {
+    let pattern0_0 = arg0;
+    let pattern1_0 = C::value_type(ctx, pattern0_0);
+    if let Some(pattern2_0) = C::def_inst(ctx, pattern0_0) {
+        let pattern3_0 = C::inst_data(ctx, pattern2_0);
+        if let &InstructionData::UnaryImm {
+            opcode: ref pattern4_0,
+            imm: pattern4_1,
+        } = &pattern3_0
+        {
+            if let &Opcode::Iconst = &pattern4_0 {
+                if let Some(pattern6_0) = C::nonzero_u64_from_imm64(ctx, pattern4_1) {
+                    // Rule at src/isa/aarch64/lower.isle line 470.
+                    let expr0_0 = constructor_imm(ctx, pattern1_0, pattern6_0)?;
+                    return Some(expr0_0);
+                }
+            }
+        }
+    }
+    // Rule at src/isa/aarch64/lower.isle line 465.
+    let expr0_0 = constructor_put_in_reg_sext64(ctx, pattern0_0)?;
+    let expr1_0 = constructor_trap_if_zero_divisor(ctx, expr0_0)?;
+    return Some(expr1_0);
+}
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -75,135 +75,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

        Opcode::Umulhi | Opcode::Smulhi => implemented_in_isle(ctx),

-        Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem => {
-            let ty = ty.unwrap();
-
-            if ty.is_vector() || ty_bits(ty) > 64 {
-                return Err(CodegenError::Unsupported(format!(
-                    "{}: Unsupported type: {:?}",
-                    op, ty
-                )));
-            }
-
-            let is_signed = match op {
-                Opcode::Udiv | Opcode::Urem => false,
-                Opcode::Sdiv | Opcode::Srem => true,
-                _ => unreachable!(),
-            };
-            let is_rem = match op {
-                Opcode::Udiv | Opcode::Sdiv => false,
-                Opcode::Urem | Opcode::Srem => true,
-                _ => unreachable!(),
-            };
-            let narrow_mode = if is_signed {
-                NarrowValueMode::SignExtend64
-            } else {
-                NarrowValueMode::ZeroExtend64
-            };
-            // TODO: Add SDiv32 to implement 32-bit directly, rather
-            // than extending the input.
-            let div_op = if is_signed {
-                ALUOp::SDiv64
-            } else {
-                ALUOp::UDiv64
-            };
-
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
-            let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
-            // The div instruction does not trap on divide by zero or signed overflow
-            // so checks are inserted below.
-            //
-            //   div rd, rn, rm
-            ctx.emit(Inst::AluRRR {
-                alu_op: div_op,
-                rd,
-                rn,
-                rm,
-            });
-
-            if is_rem {
-                // Remainder (rn % rm) is implemented as:
-                //
-                //   tmp = rn / rm
-                //   rd = rn - (tmp*rm)
-                //
-                // use 'rd' for tmp and you have:
-                //
-                //   div rd, rn, rm       ; rd = rn / rm
-                //   cbnz rm, #8          ; branch over trap
-                //   udf                  ; divide by zero
-                //   msub rd, rd, rm, rn  ; rd = rn - rd * rm
-
-                // Check for divide by 0.
-                let trap_code = TrapCode::IntegerDivisionByZero;
-                ctx.emit(Inst::TrapIf {
-                    trap_code,
-                    kind: CondBrKind::Zero(rm),
-                });
-
-                ctx.emit(Inst::AluRRRR {
-                    alu_op: ALUOp3::MSub64,
-                    rd,
-                    rn: rd.to_reg(),
-                    rm,
-                    ra: rn,
-                });
-            } else {
-                if div_op == ALUOp::SDiv64 {
-                    //   cbnz rm, #8
-                    //   udf ; divide by zero
-                    //   cmn rm, 1
-                    //   ccmp rn, 1, #nzcv, eq
-                    //   b.vc #8
-                    //   udf ; signed overflow
-
-                    // Check for divide by 0.
-                    let trap_code = TrapCode::IntegerDivisionByZero;
-                    ctx.emit(Inst::TrapIf {
-                        trap_code,
-                        kind: CondBrKind::Zero(rm),
-                    });
-
-                    // Check for signed overflow. The only case is min_value / -1.
-                    // The following checks must be done in 32-bit or 64-bit, depending
-                    // on the input type. Even though the initial div instruction is
-                    // always done in 64-bit currently.
-                    let size = OperandSize::from_ty(ty);
-                    // Check RHS is -1.
-                    ctx.emit(Inst::AluRRImm12 {
-                        alu_op: choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64),
-                        rd: writable_zero_reg(),
-                        rn: rm,
-                        imm12: Imm12::maybe_from_u64(1).unwrap(),
-                    });
-                    // Check LHS is min_value, by subtracting 1 and branching if
-                    // there is overflow.
-                    ctx.emit(Inst::CCmpImm {
-                        size,
-                        rn,
-                        imm: UImm5::maybe_from_u8(1).unwrap(),
-                        nzcv: NZCV::new(false, false, false, false),
-                        cond: Cond::Eq,
-                    });
-                    let trap_code = TrapCode::IntegerOverflow;
-                    ctx.emit(Inst::TrapIf {
-                        trap_code,
-                        kind: CondBrKind::Cond(Cond::Vs),
-                    });
-                } else {
-                    //   cbnz rm, #8
-                    //   udf ; divide by zero
-
-                    // Check for divide by 0.
-                    let trap_code = TrapCode::IntegerDivisionByZero;
-                    ctx.emit(Inst::TrapIf {
-                        trap_code,
-                        kind: CondBrKind::Zero(rm),
-                    });
-                }
-            }
-        }
+        Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem => implemented_in_isle(ctx),

        Opcode::Uextend | Opcode::Sextend => {
            let output_ty = ty.unwrap();
--- a/cranelift/codegen/src/isa/x64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle.rs
@@ -12,7 +12,7 @@ use crate::isa::x64::inst::regs;
 use crate::isa::x64::settings as x64_settings;
 use crate::machinst::isle::*;
 use crate::{
-    ir::{immediates::*, types::*, Inst, InstructionData, Opcode, Value, ValueList},
+    ir::{immediates::*, types::*, Inst, InstructionData, Opcode, TrapCode, Value, ValueList},
    isa::x64::inst::{
        args::{
            Avx512Opcode, CmpOpcode, ExtMode, FcmpImm, Imm8Reg, RegMem, ShiftKind, SseOpcode, CC,
--- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle be1359b4b6b153f378517c1dd95cd80f4a6bed0c7b86eaba11c088fd71b7bfe80a3c868ace245b2da0bfbbd6ded262ea9576c8e0eeacbf89d03c34a17a709602
-src/prelude.isle 9bd1fcb6a3604a24cf2e05e6b7eb04dcb3b9dc8fa9a2f1c8f29c25b6e3bf7f679b3b1b72dff87501497b72bc30fc92fd755b898db7e03f380235fae931b6a74b
+src/prelude.isle d3d2a6a42fb778231a4cdca30995324e1293a9ca8073c5a27a501535759eb51f84a6718322a93dfba4b66ee4f0c9afce7dcec0428516ef0c5bc96e8c8b76925d
 src/isa/x64/inst.isle b151120df3c356ac697122a8557becd8857eb725851506e844edeb85d831d461322a96d280ad84f9a23518e1e4efb607aebc0e249004148675e4cc19e89f0655
 src/isa/x64/lower.isle c9b408df0a089fb4f207838973ac775b0f9b56c86f056867c28e6bae317873d3844f74f713f9acd6fed98d3d11a2f9d19d392fe5049169dad33b1fc703b9b766
--- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
@@ -42,6 +42,7 @@ pub trait Context {
    fn writable_reg_to_reg(&mut self, arg0: WritableReg) -> Reg;
    fn u8_from_uimm8(&mut self, arg0: Uimm8) -> u8;
    fn u64_from_imm64(&mut self, arg0: Imm64) -> u64;
+    fn nonzero_u64_from_imm64(&mut self, arg0: Imm64) -> Option<u64>;
    fn u64_from_ieee32(&mut self, arg0: Ieee32) -> u64;
    fn u64_from_ieee64(&mut self, arg0: Ieee64) -> u64;
    fn inst_results(&mut self, arg0: Inst) -> ValueSlice;
@@ -50,6 +51,8 @@ pub trait Context {
    fn value_type(&mut self, arg0: Value) -> Type;
    fn multi_lane(&mut self, arg0: Type) -> Option<(u8, u16)>;
    fn def_inst(&mut self, arg0: Value) -> Option<Inst>;
+    fn trap_code_division_by_zero(&mut self) -> TrapCode;
+    fn trap_code_integer_overflow(&mut self) -> TrapCode;
    fn operand_size_of_type(&mut self, arg0: Type) -> OperandSize;
    fn put_in_reg_mem(&mut self, arg0: Value) -> RegMem;
    fn encode_fcmp_imm(&mut self, arg0: &FcmpImm) -> u8;
@@ -68,13 +71,13 @@ pub trait Context {
    fn sse_insertps_lane_imm(&mut self, arg0: u8) -> u8;
 }

-/// Internal type ProducesFlags: defined at src/prelude.isle line 242.
+/// Internal type ProducesFlags: defined at src/prelude.isle line 246.
 #[derive(Clone, Debug)]
 pub enum ProducesFlags {
    ProducesFlags { inst: MInst, result: Reg },
 }

-/// Internal type ConsumesFlags: defined at src/prelude.isle line 245.
+/// Internal type ConsumesFlags: defined at src/prelude.isle line 249.
 #[derive(Clone, Debug)]
 pub enum ConsumesFlags {
    ConsumesFlags { inst: MInst, result: Reg },
@@ -124,7 +127,7 @@ pub fn constructor_with_flags<C: Context>(
            result: pattern3_1,
        } = pattern2_0
        {
-            // Rule at src/prelude.isle line 255.
+            // Rule at src/prelude.isle line 259.
            let expr0_0 = C::emit(ctx, &pattern1_0);
            let expr1_0 = C::emit(ctx, &pattern3_0);
            let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -152,7 +155,7 @@ pub fn constructor_with_flags_1<C: Context>(
            result: pattern3_1,
        } = pattern2_0
        {
-            // Rule at src/prelude.isle line 263.
+            // Rule at src/prelude.isle line 267.
            let expr0_0 = C::emit(ctx, &pattern1_0);
            let expr1_0 = C::emit(ctx, &pattern3_0);
            return Some(pattern3_1);
@@ -186,7 +189,7 @@ pub fn constructor_with_flags_2<C: Context>(
                result: pattern5_1,
            } = pattern4_0
            {
-                // Rule at src/prelude.isle line 273.
+                // Rule at src/prelude.isle line 277.
                let expr0_0 = C::emit(ctx, &pattern1_0);
                let expr1_0 = C::emit(ctx, &pattern3_0);
                let expr2_0 = C::emit(ctx, &pattern5_0);
--- a/cranelift/codegen/src/machinst/isle.rs
+++ b/cranelift/codegen/src/machinst/isle.rs
@@ -210,6 +210,21 @@ macro_rules! isle_prelude_methods {
                Some(())
            }
        }
+
+        fn trap_code_division_by_zero(&mut self) -> TrapCode {
+            TrapCode::IntegerDivisionByZero
+        }
+
+        fn trap_code_integer_overflow(&mut self) -> TrapCode {
+            TrapCode::IntegerOverflow
+        }
+
+        fn nonzero_u64_from_imm64(&mut self, val: Imm64) -> Option<u64> {
+            match val.bits() {
+                0 => None,
+                n => Some(n as u64),
+            }
+        }
    };
 }

--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -190,6 +190,10 @@
 (decl u64_from_imm64 (u64) Imm64)
 (extern extractor infallible u64_from_imm64 u64_from_imm64)

+;; Extract a `u64` from an `Imm64` which is not zero.
+(decl nonzero_u64_from_imm64 (u64) Imm64)
+(extern extractor nonzero_u64_from_imm64 nonzero_u64_from_imm64)
+
 ;; Extract a `u64` from an `Ieee32`.
 (decl u64_from_ieee32 (u64) Ieee32)
 (extern extractor infallible u64_from_ieee32 u64_from_ieee32)
@@ -278,3 +282,10 @@
            (_z Unit (emit consumer_inst_2)))
        (value_regs consumer_result_1 consumer_result_2)))

+;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl trap_code_division_by_zero () TrapCode)
+(extern constructor trap_code_division_by_zero trap_code_division_by_zero)
+
+(decl trap_code_integer_overflow () TrapCode)
+(extern constructor trap_code_integer_overflow trap_code_integer_overflow)
--- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
@@ -54,12 +54,11 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  sdiv x2, x0, x1
-; nextln:  cbnz x1, 8 ; udf
+; check:   cbnz x1, 8 ; udf
 ; nextln:  adds xzr, x1, #1
 ; nextln:  ccmp x0, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
-; nextln:  mov x0, x2
+; nextln:  sdiv x0, x0, x1
 ; nextln:  ret

 function %f7(i64) -> i64 {
@@ -69,13 +68,8 @@ block0(v0: i64):
  return v2
 }

-; check:  movz x2, #2
-; nextln:  sdiv x1, x0, x2
-; nextln:  cbnz x2, 8 ; udf
-; nextln:  adds xzr, x2, #1
-; nextln:  ccmp x0, #1, #nzcv, eq
-; nextln:  b.vc 8 ; udf
-; nextln:  mov x0, x1
+; check:   orr x1, xzr, #2
+; nextln:  sdiv x0, x0, x1
 ; nextln:  ret

 function %f8(i64, i64) -> i64 {
@@ -84,8 +78,8 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  udiv x0, x0, x1
-; nextln:  cbnz x1, 8 ; udf
+; check:   cbnz x1, 8 ; udf
+; nextln:  udiv x0, x0, x1
 ; nextln:  ret

 function %f9(i64) -> i64 {
@@ -95,9 +89,8 @@ block0(v0: i64):
  return v2
 }

-; check:  movz x1, #2
+; check:   orr x1, xzr, #2
 ; nextln:  udiv x0, x0, x1
-; nextln:  cbnz x1, 8 ; udf
 ; nextln:  ret

 function %f10(i64, i64) -> i64 {
@@ -106,8 +99,8 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  sdiv x2, x0, x1
-; nextln:  cbnz x1, 8 ; udf
+; check:   cbnz x1, 8 ; udf
+; nextln:  sdiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret

@@ -117,8 +110,8 @@ block0(v0: i64, v1: i64):
  return v2
 }

-; check:  udiv x2, x0, x1
-; nextln:  cbnz x1, 8 ; udf
+; check:   cbnz x1, 8 ; udf
+; nextln:  udiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret

@@ -129,13 +122,13 @@ block0(v0: i32, v1: i32):
  return v2
 }

-; check:  sxtw x3, w0
-; nextln:  sxtw x2, w1
-; nextln:  sdiv x0, x3, x2
-; nextln:  cbnz x2, 8 ; udf
-; nextln:  adds wzr, w2, #1
-; nextln:  ccmp w3, #1, #nzcv, eq
+; check:  sxtw x0, w0
+; nextln:  sxtw x1, w1
+; nextln:  cbnz x1, 8 ; udf
+; nextln:  adds wzr, w1, #1
+; nextln:  ccmp w0, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
+; nextln:  sdiv x0, x0, x1
 ; nextln:  ret

 function %f13(i32) -> i32 {
@@ -145,15 +138,9 @@ block0(v0: i32):
  return v2
 }

-; check: sxtw x0, w0
-; nextln: movz x1, #2
-; nextln: sxtw x2, w1
-; nextln: sdiv x1, x0, x2
-; nextln: cbnz x2, 8 ; udf
-; nextln: adds wzr, w2, #1
-; nextln: ccmp w0, #1, #nzcv, eq
-; nextln: b.vc 8 ; udf
-; nextln: mov x0, x1
+; check:  sxtw x0, w0
+; nextln: orr x1, xzr, #2
+; nextln: sdiv x0, x0, x1
 ; nextln: ret

 function %f14(i32, i32) -> i32 {
@@ -164,8 +151,8 @@ block0(v0: i32, v1: i32):

 ; check: mov w0, w0
 ; nextln: mov w1, w1
-; nextln: udiv x0, x0, x1
 ; nextln: cbnz x1, 8 ; udf
+; nextln: udiv x0, x0, x1
 ; nextln: ret


@@ -176,10 +163,9 @@ block0(v0: i32):
  return v2
 }

-; check:  mov w0, w0
-; nextln:  movz x1, #2
+; check:   mov w0, w0
+; nextln:  orr x1, xzr, #2
 ; nextln:  udiv x0, x0, x1
-; nextln:  cbnz x1, 8 ; udf
 ; nextln:  ret

 function %f16(i32, i32) -> i32 {
@@ -190,8 +176,8 @@ block0(v0: i32, v1: i32):

 ; check:  sxtw x0, w0
 ; nextln:  sxtw x1, w1
-; nextln:  sdiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
+; nextln:  sdiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret

@@ -203,8 +189,8 @@ block0(v0: i32, v1: i32):

 ; check:  mov w0, w0
 ; nextln:  mov w1, w1
-; nextln:  udiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
+; nextln:  udiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret

@@ -389,3 +375,40 @@ block0(v0: i32, v1: i32, v2: i32):
 ; check:  madd w0, w1, w2, w0
 ; nextln: ret

+function %srem_const (i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 2
+  v2 = srem.i64 v0, v1
+  return v2
+}
+
+; check:   orr x1, xzr, #2
+; nextln:  sdiv x2, x0, x1
+; nextln:  msub x0, x2, x1, x0
+; nextln:  ret
+
+function %urem_const (i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 2
+  v2 = urem.i64 v0, v1
+  return v2
+}
+
+; check:   orr x1, xzr, #2
+; nextln:  udiv x2, x0, x1
+; nextln:  msub x0, x2, x1, x0
+; nextln:  ret
+
+function %sdiv_minus_one(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 -1
+  v2 = sdiv.i64 v0, v1
+  return v2
+}
+
+; check:  movn x1, #0
+; nextln:  adds xzr, x1, #1
+; nextln:  ccmp x0, #1, #nzcv, eq
+; nextln:  b.vc 8 ; udf
+; nextln:  sdiv x0, x0, x1
+; nextln:  ret