aarch64: Migrate {s,u}{div,rem} to ISLE (#3572)

* aarch64: Migrate {s,u}{div,rem} to ISLE This commit migrates four different instructions at once to ISLE: * `sdiv` * `udiv` * `srem` * `urem` These all share similar codegen and center around the `div` instruction to use internally. The main feature of these was to model the manual traps since the `div` instruction doesn't trap on overflow, instead requiring manual checks to adhere to the semantics of the instruction itself. While I was here I went ahead and implemented an optimization for these instructions when the right-hand-side is a constant with a known value. For `udiv`, `srem`, and `urem` if the right-hand-side is a nonzero constant then the checks for traps can be skipped entirely. For `sdiv` if the constant is not 0 and not -1 then additionally all checks can be elided. Finally if the right-hand-side of `sdiv` is -1 the zero-check is elided, but it still needs a check for `i64::MIN` on the left-hand-side and currently there's a TODO where `-1` is still checked too. * Rebasing and review conflicts
2021-12-13 17:27:11 -06:00
parent f1225dfd93
commit 20e090b114
12 changed files with 567 additions and 215 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -1307,6 +1307,12 @@
 (decl imm12_from_u64 (Imm12) u64)
 (extern extractor imm12_from_u64 imm12_from_u64)
 (decl u8_into_uimm5 (u8) UImm5)
 (extern constructor u8_into_uimm5 u8_into_uimm5)
 (decl u8_into_imm12 (u8) Imm12)
 (extern constructor u8_into_imm12 u8_into_imm12)
 (decl imm12_from_negated_u64 (Imm12) u64)
 (extern extractor imm12_from_negated_u64 imm12_from_negated_u64)
@@ -1339,6 +1345,15 @@
 (decl get_extended_op (ExtendedValue) ExtendOp)
 (extern constructor get_extended_op get_extended_op)
 (decl nzcv (bool bool bool bool) NZCV)
 (extern constructor nzcv nzcv)
 (decl cond_br_zero (Reg) CondBrKind)
 (extern constructor cond_br_zero cond_br_zero)
 (decl cond_br_cond (Cond) CondBrKind)
 (extern constructor cond_br_cond cond_br_cond)
 ;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; Emit an instruction.
@@ -1352,6 +1367,9 @@
 (decl zero_reg () Reg)
 (extern constructor zero_reg zero_reg)
 (decl writable_zero_reg () WritableReg)
 (extern constructor writable_zero_reg writable_zero_reg)
 ;; Helper for emitting `MInst.MovZ` instructions.
 (decl movz (MoveWideConst OperandSize) Reg)
 (rule (movz imm size)
@@ -1543,3 +1561,41 @@
 ;; 64-bit passthrough.
 (rule (put_in_reg_zext64 val @ (value_type $I64)) (put_in_reg val))
 ;; Misc instruction helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (decl trap_if_zero_divisor (Reg) Reg)
 (rule (trap_if_zero_divisor reg)
      (let ((_ Unit (emit (MInst.TrapIf (cond_br_zero reg) (trap_code_division_by_zero)))))
        reg))
 (decl size_from_ty (Type) OperandSize)
 (rule (size_from_ty (fits_in_32 _ty)) (OperandSize.Size32))
 (rule (size_from_ty $I64) (OperandSize.Size64))
 ;; Check for signed overflow. The only case is min_value / -1.
 ;; The following checks must be done in 32-bit or 64-bit, depending
 ;; on the input type.
 (decl trap_if_div_overflow (Type Reg Reg) Reg)
 (rule (trap_if_div_overflow ty x y)
      (let (
          ;; Check RHS is -1.
          (_1 Unit (emit (MInst.AluRRImm12 (adds_op ty) (writable_zero_reg) y (u8_into_imm12 1))))
          ;; Check LHS is min_value, by subtracting 1 and branching if
          ;; there is overflow.
          (_2 Unit (emit (MInst.CCmpImm (size_from_ty ty)
                                        x
                                        (u8_into_uimm5 1)
                                        (nzcv $false $false $false $false)
                                        (Cond.Eq))))
          (_3 Unit (emit (MInst.TrapIf (cond_br_cond (Cond.Vs))
                                      (trap_code_integer_overflow))))
        )
        x))
 ;; Helper to use either a 32 or 64-bit adds depending on the input type.
 (decl adds_op (Type) ALUOp)
 (rule (adds_op (fits_in_32 _ty)) (ALUOp.AddS32))
 (rule (adds_op $I64) (ALUOp.AddS64))
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -398,3 +398,107 @@
        )
        (value_reg result)))
 ;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; TODO: Add UDiv32 to implement 32-bit directly, rather
 ;; than extending the input.
 ;;
 ;; Note that aarch64's `udiv` doesn't trap so to respect the semantics of
 ;; CLIF's `udiv` the check for zero needs to be manually performed.
 (rule (lower (has_type (fits_in_64 ty) (udiv x y)))
      (value_reg (alu_rrr (ALUOp.UDiv64)
                          (put_in_reg_zext64 x)
                          (put_nonzero_in_reg_zext64 y))))
 ;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
 (decl put_nonzero_in_reg_zext64 (Value) Reg)
 (rule (put_nonzero_in_reg_zext64 val)
      (trap_if_zero_divisor (put_in_reg_zext64 val)))
 ;; Special case where if a `Value` is known to be nonzero we can trivially
 ;; move it into a register.
 (rule (put_nonzero_in_reg_zext64 (and (value_type ty)
                                      (def_inst (iconst (nonzero_u64_from_imm64 n)))))
      (imm ty n))
 ;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; TODO: Add SDiv32 to implement 32-bit directly, rather
 ;; than extending the input.
 ;;
 ;; The sequence of checks here should look like:
 ;;
 ;;   cbnz rm, #8
 ;;   udf ; divide by zero
 ;;   cmn rm, 1
 ;;   ccmp rn, 1, #nzcv, eq
 ;;   b.vc #8
 ;;   udf ; signed overflow
 ;;
 ;; Note The div instruction does not trap on divide by zero or overflow, so
 ;; checks need to be manually inserted.
 ;;
 ;; TODO: if `y` is -1 then a check that `x` is not INT_MIN is all that's
 ;; necessary, but right now `y` is checked to not be -1 as well.
 (rule (lower (has_type (fits_in_64 ty) (sdiv x y)))
      (let (
          (x64 Reg (put_in_reg_sext64 x))
          (y64 Reg (put_nonzero_in_reg_sext64 y))
          (valid_x64 Reg (trap_if_div_overflow ty x64 y64))
          (result Reg (alu_rrr (ALUOp.SDiv64) valid_x64 y64))
        )
        (value_reg result)))
 ;; Helper for extracting an immediate that's not 0 and not -1 from an imm64.
 (decl safe_divisor_from_imm64 (u64) Imm64)
 (extern extractor safe_divisor_from_imm64 safe_divisor_from_imm64)
 ;; Special case for `sdiv` where no checks are needed due to division by a
 ;; constant meaning the checks are always passed.
 (rule (lower (has_type (fits_in_64 ty) (sdiv x (def_inst (iconst (safe_divisor_from_imm64 y))))))
      (value_reg (alu_rrr (ALUOp.SDiv64)
                          (put_in_reg_sext64 x)
                          (imm ty y))))
 ;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
 (decl put_nonzero_in_reg_sext64 (Value) Reg)
 (rule (put_nonzero_in_reg_sext64 val)
      (trap_if_zero_divisor (put_in_reg_sext64 val)))
 ;; Note that this has a special case where if the `Value` is a constant that's
 ;; not zero we can skip the zero check.
 (rule (put_nonzero_in_reg_sext64 (and (value_type ty)
                                      (def_inst (iconst (nonzero_u64_from_imm64 n)))))
      (imm ty n))
 ;;;; Rules for `urem` and `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; Remainder (x % y) is implemented as:
 ;;
 ;;   tmp = x / y
 ;;   result = x - (tmp*y)
 ;;
 ;; use 'result' for tmp and you have:
 ;;
 ;;   cbnz y, #8         ; branch over trap
 ;;   udf                ; divide by zero
 ;;   div rd, x, y       ; rd = x / y
 ;;   msub rd, rd, y, x  ; rd = x - rd * y
 (rule (lower (has_type (fits_in_64 ty) (urem x y)))
      (let (
          (x64 Reg (put_in_reg_zext64 x))
          (y64 Reg (put_nonzero_in_reg_zext64 y))
          (div Reg (alu_rrr (ALUOp.UDiv64) x64 y64))
          (result Reg (alu_rrrr (ALUOp3.MSub64) div y64 x64))
        )
        (value_reg result)))
 (rule (lower (has_type (fits_in_64 ty) (srem x y)))
      (let (
          (x64 Reg (put_in_reg_sext64 x))
          (y64 Reg (put_nonzero_in_reg_sext64 y))
          (div Reg (alu_rrr (ALUOp.SDiv64) x64 y64))
          (result Reg (alu_rrrr (ALUOp3.MSub64) div y64 x64))
        )
        (value_reg result)))
--- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs
@@ -5,10 +5,10 @@ pub mod generated_code;
 // Types that the generated ISLE code uses via `use super::*`.
 use super::{
-    zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, AtomicRmwOp, BranchTarget, CallIndInfo,
+    writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, AtomicRmwOp, BranchTarget,
-    CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, Imm12, ImmLogic, ImmShift, Inst as MInst,
+    CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, Imm12, ImmLogic, ImmShift,
-    JTSequenceInfo, MachLabel, MoveWideConst, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg,
+    Inst as MInst, JTSequenceInfo, MachLabel, MoveWideConst, NarrowValueMode, Opcode, OperandSize,
-    ScalarSize, ShiftOpAndAmt, UImm5, VectorSize, NZCV,
+    PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VectorSize, NZCV,
 };
 use crate::isa::aarch64::settings as aarch64_settings;
 use crate::machinst::isle::*;
@@ -244,4 +244,35 @@ where
    fn emit(&mut self, inst: &MInst) -> Unit {
        self.emitted_insts.push(inst.clone());
    }
    fn cond_br_zero(&mut self, reg: Reg) -> CondBrKind {
        CondBrKind::Zero(reg)
    }
    fn cond_br_cond(&mut self, cond: &Cond) -> CondBrKind {
        CondBrKind::Cond(*cond)
    }
    fn nzcv(&mut self, n: bool, z: bool, c: bool, v: bool) -> NZCV {
        NZCV::new(n, z, c, v)
    }
    fn u8_into_uimm5(&mut self, x: u8) -> UImm5 {
        UImm5::maybe_from_u8(x).unwrap()
    }
    fn u8_into_imm12(&mut self, x: u8) -> Imm12 {
        Imm12::maybe_from_u64(x.into()).unwrap()
    }
    fn writable_zero_reg(&mut self) -> WritableReg {
        writable_zero_reg()
    }
    fn safe_divisor_from_imm64(&mut self, val: Imm64) -> Option<u64> {
        match val.bits() {
            0 | -1 => None,
            n => Some(n as u64),
        }
    }
 }
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle be1359b4b6b153f378517c1dd95cd80f4a6bed0c7b86eaba11c088fd71b7bfe80a3c868ace245b2da0bfbbd6ded262ea9576c8e0eeacbf89d03c34a17a709602
-src/prelude.isle 9bd1fcb6a3604a24cf2e05e6b7eb04dcb3b9dc8fa9a2f1c8f29c25b6e3bf7f679b3b1b72dff87501497b72bc30fc92fd755b898db7e03f380235fae931b6a74b
+src/prelude.isle d3d2a6a42fb778231a4cdca30995324e1293a9ca8073c5a27a501535759eb51f84a6718322a93dfba4b66ee4f0c9afce7dcec0428516ef0c5bc96e8c8b76925d
-src/isa/aarch64/inst.isle 6e042ec14166fceae4b7133f681fdf604e20a2997e1d60f797e40acd683ccb34e33376189f6b7ed2f5eb441dc61d592cad2592256dfea51296330752181b9403
+src/isa/aarch64/inst.isle cec03d88680e8da01424eecc05ef73a48e4055d29fe841fceaa3e6ea4e7cb9abb887401bb5acb2e058c9fc993188640990b699e88272d62e243781b231cdfb0d
-src/isa/aarch64/lower.isle 64a725771537f69c445f44c728e04bffd8a715d6a4d87a5a2bf2e89714ee290b7497c5ca8b335bdddd775f6734be03318ff9aa67e2e4068949ebae06b0902b3f
+src/isa/aarch64/lower.isle e1ae53adc953ad395feeecd8edc8bcfd288491a4e4a71510e5f06e221f767518c6e060ff0d795c7c2510b7d898cc8b9bc0313906412e0176605c33427926f828
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
@@ -42,6 +42,7 @@ pub trait Context {
    fn writable_reg_to_reg(&mut self, arg0: WritableReg) -> Reg;
    fn u8_from_uimm8(&mut self, arg0: Uimm8) -> u8;
    fn u64_from_imm64(&mut self, arg0: Imm64) -> u64;
    fn nonzero_u64_from_imm64(&mut self, arg0: Imm64) -> Option<u64>;
    fn u64_from_ieee32(&mut self, arg0: Ieee32) -> u64;
    fn u64_from_ieee64(&mut self, arg0: Ieee64) -> u64;
    fn inst_results(&mut self, arg0: Inst) -> ValueSlice;
@@ -50,29 +51,38 @@ pub trait Context {
    fn value_type(&mut self, arg0: Value) -> Type;
    fn multi_lane(&mut self, arg0: Type) -> Option<(u8, u16)>;
    fn def_inst(&mut self, arg0: Value) -> Option<Inst>;
    fn trap_code_division_by_zero(&mut self) -> TrapCode;
    fn trap_code_integer_overflow(&mut self) -> TrapCode;
    fn move_wide_const_from_u64(&mut self, arg0: u64) -> Option<MoveWideConst>;
    fn move_wide_const_from_negated_u64(&mut self, arg0: u64) -> Option<MoveWideConst>;
    fn imm_logic_from_u64(&mut self, arg0: u64) -> Option<ImmLogic>;
    fn imm_shift_from_u8(&mut self, arg0: u8) -> ImmShift;
    fn imm12_from_u64(&mut self, arg0: u64) -> Option<Imm12>;
    fn u8_into_uimm5(&mut self, arg0: u8) -> UImm5;
    fn u8_into_imm12(&mut self, arg0: u8) -> Imm12;
    fn imm12_from_negated_u64(&mut self, arg0: u64) -> Option<Imm12>;
    fn lshl_from_imm64(&mut self, arg0: Imm64, arg1: Type) -> Option<ShiftOpAndAmt>;
    fn integral_ty(&mut self, arg0: Type) -> Option<Type>;
    fn extended_value_from_value(&mut self, arg0: Value) -> Option<ExtendedValue>;
    fn put_extended_in_reg(&mut self, arg0: &ExtendedValue) -> Reg;
    fn get_extended_op(&mut self, arg0: &ExtendedValue) -> ExtendOp;
    fn nzcv(&mut self, arg0: bool, arg1: bool, arg2: bool, arg3: bool) -> NZCV;
    fn cond_br_zero(&mut self, arg0: Reg) -> CondBrKind;
    fn cond_br_cond(&mut self, arg0: &Cond) -> CondBrKind;
    fn emit(&mut self, arg0: &MInst) -> Unit;
    fn zero_reg(&mut self) -> Reg;
    fn writable_zero_reg(&mut self) -> WritableReg;
    fn load_constant64_full(&mut self, arg0: u64) -> Reg;
    fn safe_divisor_from_imm64(&mut self, arg0: Imm64) -> Option<u64>;
 }
-/// Internal type ProducesFlags: defined at src/prelude.isle line 242.
+/// Internal type ProducesFlags: defined at src/prelude.isle line 246.
 #[derive(Clone, Debug)]
 pub enum ProducesFlags {
    ProducesFlags { inst: MInst, result: Reg },
 }
-/// Internal type ConsumesFlags: defined at src/prelude.isle line 245.
+/// Internal type ConsumesFlags: defined at src/prelude.isle line 249.
 #[derive(Clone, Debug)]
 pub enum ConsumesFlags {
    ConsumesFlags { inst: MInst, result: Reg },
@@ -986,7 +996,7 @@ pub fn constructor_with_flags<C: Context>(
            result: pattern3_1,
        } = pattern2_0
        {
-            // Rule at src/prelude.isle line 255.
+            // Rule at src/prelude.isle line 259.
            let expr0_0 = C::emit(ctx, &pattern1_0);
            let expr1_0 = C::emit(ctx, &pattern3_0);
            let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -1014,7 +1024,7 @@ pub fn constructor_with_flags_1<C: Context>(
            result: pattern3_1,
        } = pattern2_0
        {
-            // Rule at src/prelude.isle line 263.
+            // Rule at src/prelude.isle line 267.
            let expr0_0 = C::emit(ctx, &pattern1_0);
            let expr1_0 = C::emit(ctx, &pattern3_0);
            return Some(pattern3_1);
@@ -1048,7 +1058,7 @@ pub fn constructor_with_flags_2<C: Context>(
                result: pattern5_1,
            } = pattern4_0
            {
-                // Rule at src/prelude.isle line 273.
+                // Rule at src/prelude.isle line 277.
                let expr0_0 = C::emit(ctx, &pattern1_0);
                let expr1_0 = C::emit(ctx, &pattern3_0);
                let expr2_0 = C::emit(ctx, &pattern5_0);
@@ -1104,7 +1114,7 @@ pub fn constructor_movz<C: Context>(
 ) -> Option<Reg> {
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1357.
+    // Rule at src/isa/aarch64/inst.isle line 1375.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::MovZ {
@@ -1125,7 +1135,7 @@ pub fn constructor_movn<C: Context>(
 ) -> Option<Reg> {
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1364.
+    // Rule at src/isa/aarch64/inst.isle line 1382.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::MovN {
@@ -1148,7 +1158,7 @@ pub fn constructor_alu_rr_imm_logic<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1371.
+    // Rule at src/isa/aarch64/inst.isle line 1389.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRImmLogic {
@@ -1172,7 +1182,7 @@ pub fn constructor_alu_rr_imm_shift<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1378.
+    // Rule at src/isa/aarch64/inst.isle line 1396.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRImmShift {
@@ -1196,7 +1206,7 @@ pub fn constructor_alu_rrr<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1385.
+    // Rule at src/isa/aarch64/inst.isle line 1403.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRR {
@@ -1222,7 +1232,7 @@ pub fn constructor_vec_rrr<C: Context>(
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
    let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1392.
+    // Rule at src/isa/aarch64/inst.isle line 1410.
    let expr0_0: Type = I8X16;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::VecRRR {
@@ -1247,7 +1257,7 @@ pub fn constructor_alu_rr_imm12<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1399.
+    // Rule at src/isa/aarch64/inst.isle line 1417.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRImm12 {
@@ -1273,7 +1283,7 @@ pub fn constructor_alu_rrr_shift<C: Context>(
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
    let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1406.
+    // Rule at src/isa/aarch64/inst.isle line 1424.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRRShift {
@@ -1300,7 +1310,7 @@ pub fn constructor_alu_rrr_extend<C: Context>(
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
    let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1413.
+    // Rule at src/isa/aarch64/inst.isle line 1431.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRRExtend {
@@ -1325,7 +1335,7 @@ pub fn constructor_alu_rr_extend_reg<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1421.
+    // Rule at src/isa/aarch64/inst.isle line 1439.
    let expr0_0 = C::put_extended_in_reg(ctx, pattern2_0);
    let expr1_0 = C::get_extended_op(ctx, pattern2_0);
    let expr2_0 = constructor_alu_rrr_extend(ctx, pattern0_0, pattern1_0, expr0_0, &expr1_0)?;
@@ -1344,7 +1354,7 @@ pub fn constructor_alu_rrrr<C: Context>(
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
    let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1428.
+    // Rule at src/isa/aarch64/inst.isle line 1446.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::AluRRRR {
@@ -1367,7 +1377,7 @@ pub fn constructor_add64_with_flags<C: Context>(
 ) -> Option<ProducesFlags> {
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1435.
+    // Rule at src/isa/aarch64/inst.isle line 1453.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = ALUOp::AddS64;
@@ -1389,7 +1399,7 @@ pub fn constructor_add64_with_flags<C: Context>(
 pub fn constructor_adc64<C: Context>(ctx: &mut C, arg0: Reg, arg1: Reg) -> Option<ConsumesFlags> {
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1442.
+    // Rule at src/isa/aarch64/inst.isle line 1460.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = ALUOp::Adc64;
@@ -1415,7 +1425,7 @@ pub fn constructor_sub64_with_flags<C: Context>(
 ) -> Option<ProducesFlags> {
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1449.
+    // Rule at src/isa/aarch64/inst.isle line 1467.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = ALUOp::SubS64;
@@ -1437,7 +1447,7 @@ pub fn constructor_sub64_with_flags<C: Context>(
 pub fn constructor_sbc64<C: Context>(ctx: &mut C, arg0: Reg, arg1: Reg) -> Option<ConsumesFlags> {
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
-    // Rule at src/isa/aarch64/inst.isle line 1456.
+    // Rule at src/isa/aarch64/inst.isle line 1474.
    let expr0_0: Type = I64;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = ALUOp::Sbc64;
@@ -1465,7 +1475,7 @@ pub fn constructor_vec_misc<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1463.
+    // Rule at src/isa/aarch64/inst.isle line 1481.
    let expr0_0: Type = I8X16;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::VecMisc {
@@ -1491,7 +1501,7 @@ pub fn constructor_vec_rrr_long<C: Context>(
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
    let pattern3_0 = arg3;
-    // Rule at src/isa/aarch64/inst.isle line 1470.
+    // Rule at src/isa/aarch64/inst.isle line 1488.
    let expr0_0: Type = I8X16;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::VecRRRLong {
@@ -1520,7 +1530,7 @@ pub fn constructor_vec_rrrr_long<C: Context>(
    let pattern2_0 = arg2;
    let pattern3_0 = arg3;
    let pattern4_0 = arg4;
-    // Rule at src/isa/aarch64/inst.isle line 1480.
+    // Rule at src/isa/aarch64/inst.isle line 1498.
    let expr0_0: Type = I8X16;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::FpuMove128 {
@@ -1550,7 +1560,7 @@ pub fn constructor_vec_rr_narrow<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1488.
+    // Rule at src/isa/aarch64/inst.isle line 1506.
    let expr0_0: Type = I8X16;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::VecRRNarrow {
@@ -1574,7 +1584,7 @@ pub fn constructor_vec_rr_long<C: Context>(
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
-    // Rule at src/isa/aarch64/inst.isle line 1495.
+    // Rule at src/isa/aarch64/inst.isle line 1513.
    let expr0_0: Type = I8X16;
    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
    let expr2_0 = MInst::VecRRLong {
@@ -1594,25 +1604,25 @@ pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option
    if let Some(pattern1_0) = C::integral_ty(ctx, pattern0_0) {
        let pattern2_0 = arg1;
        if let Some(pattern3_0) = C::imm_logic_from_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1513.
+            // Rule at src/isa/aarch64/inst.isle line 1531.
            let expr0_0 = ALUOp::Orr64;
            let expr1_0 = C::zero_reg(ctx);
            let expr2_0 = constructor_alu_rr_imm_logic(ctx, &expr0_0, expr1_0, pattern3_0)?;
            return Some(expr2_0);
        }
        if let Some(pattern3_0) = C::move_wide_const_from_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1505.
+            // Rule at src/isa/aarch64/inst.isle line 1523.
            let expr0_0 = OperandSize::Size64;
            let expr1_0 = constructor_movz(ctx, pattern3_0, &expr0_0)?;
            return Some(expr1_0);
        }
        if let Some(pattern3_0) = C::move_wide_const_from_negated_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1509.
+            // Rule at src/isa/aarch64/inst.isle line 1527.
            let expr0_0 = OperandSize::Size64;
            let expr1_0 = constructor_movn(ctx, pattern3_0, &expr0_0)?;
            return Some(expr1_0);
        }
-        // Rule at src/isa/aarch64/inst.isle line 1520.
+        // Rule at src/isa/aarch64/inst.isle line 1538.
        let expr0_0 = C::load_constant64_full(ctx, pattern2_0);
        return Some(expr0_0);
    }
@@ -1624,12 +1634,12 @@ pub fn constructor_put_in_reg_sext64<C: Context>(ctx: &mut C, arg0: Value) -> Op
    let pattern0_0 = arg0;
    let pattern1_0 = C::value_type(ctx, pattern0_0);
    if pattern1_0 == I64 {
-        // Rule at src/isa/aarch64/inst.isle line 1534.
+        // Rule at src/isa/aarch64/inst.isle line 1552.
        let expr0_0 = C::put_in_reg(ctx, pattern0_0);
        return Some(expr0_0);
    }
    if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) {
-        // Rule at src/isa/aarch64/inst.isle line 1527.
+        // Rule at src/isa/aarch64/inst.isle line 1545.
        let expr0_0: Type = I32;
        let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
        let expr2_0 = C::put_in_reg(ctx, pattern0_0);
@@ -1655,12 +1665,12 @@ pub fn constructor_put_in_reg_zext64<C: Context>(ctx: &mut C, arg0: Value) -> Op
    let pattern0_0 = arg0;
    let pattern1_0 = C::value_type(ctx, pattern0_0);
    if pattern1_0 == I64 {
-        // Rule at src/isa/aarch64/inst.isle line 1545.
+        // Rule at src/isa/aarch64/inst.isle line 1563.
        let expr0_0 = C::put_in_reg(ctx, pattern0_0);
        return Some(expr0_0);
    }
    if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) {
-        // Rule at src/isa/aarch64/inst.isle line 1538.
+        // Rule at src/isa/aarch64/inst.isle line 1556.
        let expr0_0: Type = I32;
        let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
        let expr2_0 = C::put_in_reg(ctx, pattern0_0);
@@ -1681,6 +1691,102 @@ pub fn constructor_put_in_reg_zext64<C: Context>(ctx: &mut C, arg0: Value) -> Op
    return None;
 }
 // Generated as internal constructor for term trap_if_zero_divisor.
 pub fn constructor_trap_if_zero_divisor<C: Context>(ctx: &mut C, arg0: Reg) -> Option<Reg> {
    let pattern0_0 = arg0;
    // Rule at src/isa/aarch64/inst.isle line 1568.
    let expr0_0 = C::cond_br_zero(ctx, pattern0_0);
    let expr1_0 = C::trap_code_division_by_zero(ctx);
    let expr2_0 = MInst::TrapIf {
        kind: expr0_0,
        trap_code: expr1_0,
    };
    let expr3_0 = C::emit(ctx, &expr2_0);
    return Some(pattern0_0);
 }
 // Generated as internal constructor for term size_from_ty.
 pub fn constructor_size_from_ty<C: Context>(ctx: &mut C, arg0: Type) -> Option<OperandSize> {
    let pattern0_0 = arg0;
    if pattern0_0 == I64 {
        // Rule at src/isa/aarch64/inst.isle line 1574.
        let expr0_0 = OperandSize::Size64;
        return Some(expr0_0);
    }
    if let Some(pattern1_0) = C::fits_in_32(ctx, pattern0_0) {
        // Rule at src/isa/aarch64/inst.isle line 1573.
        let expr0_0 = OperandSize::Size32;
        return Some(expr0_0);
    }
    return None;
 }
 // Generated as internal constructor for term trap_if_div_overflow.
 pub fn constructor_trap_if_div_overflow<C: Context>(
    ctx: &mut C,
    arg0: Type,
    arg1: Reg,
    arg2: Reg,
 ) -> Option<Reg> {
    let pattern0_0 = arg0;
    let pattern1_0 = arg1;
    let pattern2_0 = arg2;
    // Rule at src/isa/aarch64/inst.isle line 1580.
    let expr0_0 = constructor_adds_op(ctx, pattern0_0)?;
    let expr1_0 = C::writable_zero_reg(ctx);
    let expr2_0: u8 = 1;
    let expr3_0 = C::u8_into_imm12(ctx, expr2_0);
    let expr4_0 = MInst::AluRRImm12 {
        alu_op: expr0_0,
        rd: expr1_0,
        rn: pattern2_0,
        imm12: expr3_0,
    };
    let expr5_0 = C::emit(ctx, &expr4_0);
    let expr6_0 = constructor_size_from_ty(ctx, pattern0_0)?;
    let expr7_0: u8 = 1;
    let expr8_0 = C::u8_into_uimm5(ctx, expr7_0);
    let expr9_0: bool = false;
    let expr10_0: bool = false;
    let expr11_0: bool = false;
    let expr12_0: bool = false;
    let expr13_0 = C::nzcv(ctx, expr9_0, expr10_0, expr11_0, expr12_0);
    let expr14_0 = Cond::Eq;
    let expr15_0 = MInst::CCmpImm {
        size: expr6_0,
        rn: pattern1_0,
        imm: expr8_0,
        nzcv: expr13_0,
        cond: expr14_0,
    };
    let expr16_0 = C::emit(ctx, &expr15_0);
    let expr17_0 = Cond::Vs;
    let expr18_0 = C::cond_br_cond(ctx, &expr17_0);
    let expr19_0 = C::trap_code_integer_overflow(ctx);
    let expr20_0 = MInst::TrapIf {
        kind: expr18_0,
        trap_code: expr19_0,
    };
    let expr21_0 = C::emit(ctx, &expr20_0);
    return Some(pattern1_0);
 }
 // Generated as internal constructor for term adds_op.
 pub fn constructor_adds_op<C: Context>(ctx: &mut C, arg0: Type) -> Option<ALUOp> {
    let pattern0_0 = arg0;
    if pattern0_0 == I64 {
        // Rule at src/isa/aarch64/inst.isle line 1600.
        let expr0_0 = ALUOp::AddS64;
        return Some(expr0_0);
    }
    if let Some(pattern1_0) = C::fits_in_32(ctx, pattern0_0) {
        // Rule at src/isa/aarch64/inst.isle line 1599.
        let expr0_0 = ALUOp::AddS32;
        return Some(expr0_0);
    }
    return None;
 }
 // Generated as internal constructor for term lower.
 pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueRegs> {
    let pattern0_0 = arg0;
@@ -2843,6 +2949,85 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
                            let expr5_0 = C::value_reg(ctx, expr4_0);
                            return Some(expr5_0);
                        }
                        &Opcode::Udiv => {
                            let (pattern7_0, pattern7_1) =
                                C::unpack_value_array_2(ctx, &pattern5_1);
                            // Rule at src/isa/aarch64/lower.isle line 408.
                            let expr0_0 = ALUOp::UDiv64;
                            let expr1_0 = constructor_put_in_reg_zext64(ctx, pattern7_0)?;
                            let expr2_0 = constructor_put_nonzero_in_reg_zext64(ctx, pattern7_1)?;
                            let expr3_0 = constructor_alu_rrr(ctx, &expr0_0, expr1_0, expr2_0)?;
                            let expr4_0 = C::value_reg(ctx, expr3_0);
                            return Some(expr4_0);
                        }
                        &Opcode::Sdiv => {
                            let (pattern7_0, pattern7_1) =
                                C::unpack_value_array_2(ctx, &pattern5_1);
                            if let Some(pattern8_0) = C::def_inst(ctx, pattern7_1) {
                                let pattern9_0 = C::inst_data(ctx, pattern8_0);
                                if let &InstructionData::UnaryImm {
                                    opcode: ref pattern10_0,
                                    imm: pattern10_1,
                                } = &pattern9_0
                                {
                                    if let &Opcode::Iconst = &pattern10_0 {
                                        if let Some(pattern12_0) =
                                            C::safe_divisor_from_imm64(ctx, pattern10_1)
                                        {
                                            // Rule at src/isa/aarch64/lower.isle line 458.
                                            let expr0_0 = ALUOp::SDiv64;
                                            let expr1_0 =
                                                constructor_put_in_reg_sext64(ctx, pattern7_0)?;
                                            let expr2_0 =
                                                constructor_imm(ctx, pattern3_0, pattern12_0)?;
                                            let expr3_0 = constructor_alu_rrr(
                                                ctx, &expr0_0, expr1_0, expr2_0,
                                            )?;
                                            let expr4_0 = C::value_reg(ctx, expr3_0);
                                            return Some(expr4_0);
                                        }
                                    }
                                }
                            }
                            // Rule at src/isa/aarch64/lower.isle line 443.
                            let expr0_0 = constructor_put_in_reg_sext64(ctx, pattern7_0)?;
                            let expr1_0 = constructor_put_nonzero_in_reg_sext64(ctx, pattern7_1)?;
                            let expr2_0 = constructor_trap_if_div_overflow(
                                ctx, pattern3_0, expr0_0, expr1_0,
                            )?;
                            let expr3_0 = ALUOp::SDiv64;
                            let expr4_0 = constructor_alu_rrr(ctx, &expr3_0, expr2_0, expr1_0)?;
                            let expr5_0 = C::value_reg(ctx, expr4_0);
                            return Some(expr5_0);
                        }
                        &Opcode::Urem => {
                            let (pattern7_0, pattern7_1) =
                                C::unpack_value_array_2(ctx, &pattern5_1);
                            // Rule at src/isa/aarch64/lower.isle line 488.
                            let expr0_0 = constructor_put_in_reg_zext64(ctx, pattern7_0)?;
                            let expr1_0 = constructor_put_nonzero_in_reg_zext64(ctx, pattern7_1)?;
                            let expr2_0 = ALUOp::UDiv64;
                            let expr3_0 = constructor_alu_rrr(ctx, &expr2_0, expr0_0, expr1_0)?;
                            let expr4_0 = ALUOp3::MSub64;
                            let expr5_0 =
                                constructor_alu_rrrr(ctx, &expr4_0, expr3_0, expr1_0, expr0_0)?;
                            let expr6_0 = C::value_reg(ctx, expr5_0);
                            return Some(expr6_0);
                        }
                        &Opcode::Srem => {
                            let (pattern7_0, pattern7_1) =
                                C::unpack_value_array_2(ctx, &pattern5_1);
                            // Rule at src/isa/aarch64/lower.isle line 497.
                            let expr0_0 = constructor_put_in_reg_sext64(ctx, pattern7_0)?;
                            let expr1_0 = constructor_put_nonzero_in_reg_sext64(ctx, pattern7_1)?;
                            let expr2_0 = ALUOp::SDiv64;
                            let expr3_0 = constructor_alu_rrr(ctx, &expr2_0, expr0_0, expr1_0)?;
                            let expr4_0 = ALUOp3::MSub64;
                            let expr5_0 =
                                constructor_alu_rrrr(ctx, &expr4_0, expr3_0, expr1_0, expr0_0)?;
                            let expr6_0 = C::value_reg(ctx, expr5_0);
                            return Some(expr6_0);
                        }
                        _ => {}
                    }
                }
@@ -3015,3 +3200,55 @@ pub fn constructor_madd_op<C: Context>(ctx: &mut C, arg0: Type) -> Option<ALUOp3
    }
    return None;
 }
 // Generated as internal constructor for term put_nonzero_in_reg_zext64.
 pub fn constructor_put_nonzero_in_reg_zext64<C: Context>(ctx: &mut C, arg0: Value) -> Option<Reg> {
    let pattern0_0 = arg0;
    let pattern1_0 = C::value_type(ctx, pattern0_0);
    if let Some(pattern2_0) = C::def_inst(ctx, pattern0_0) {
        let pattern3_0 = C::inst_data(ctx, pattern2_0);
        if let &InstructionData::UnaryImm {
            opcode: ref pattern4_0,
            imm: pattern4_1,
        } = &pattern3_0
        {
            if let &Opcode::Iconst = &pattern4_0 {
                if let Some(pattern6_0) = C::nonzero_u64_from_imm64(ctx, pattern4_1) {
                    // Rule at src/isa/aarch64/lower.isle line 420.
                    let expr0_0 = constructor_imm(ctx, pattern1_0, pattern6_0)?;
                    return Some(expr0_0);
                }
            }
        }
    }
    // Rule at src/isa/aarch64/lower.isle line 415.
    let expr0_0 = constructor_put_in_reg_zext64(ctx, pattern0_0)?;
    let expr1_0 = constructor_trap_if_zero_divisor(ctx, expr0_0)?;
    return Some(expr1_0);
 }
 // Generated as internal constructor for term put_nonzero_in_reg_sext64.
 pub fn constructor_put_nonzero_in_reg_sext64<C: Context>(ctx: &mut C, arg0: Value) -> Option<Reg> {
    let pattern0_0 = arg0;
    let pattern1_0 = C::value_type(ctx, pattern0_0);
    if let Some(pattern2_0) = C::def_inst(ctx, pattern0_0) {
        let pattern3_0 = C::inst_data(ctx, pattern2_0);
        if let &InstructionData::UnaryImm {
            opcode: ref pattern4_0,
            imm: pattern4_1,
        } = &pattern3_0
        {
            if let &Opcode::Iconst = &pattern4_0 {
                if let Some(pattern6_0) = C::nonzero_u64_from_imm64(ctx, pattern4_1) {
                    // Rule at src/isa/aarch64/lower.isle line 470.
                    let expr0_0 = constructor_imm(ctx, pattern1_0, pattern6_0)?;
                    return Some(expr0_0);
                }
            }
        }
    }
    // Rule at src/isa/aarch64/lower.isle line 465.
    let expr0_0 = constructor_put_in_reg_sext64(ctx, pattern0_0)?;
    let expr1_0 = constructor_trap_if_zero_divisor(ctx, expr0_0)?;
    return Some(expr1_0);
 }
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -75,135 +75,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::Umulhi | Opcode::Smulhi => implemented_in_isle(ctx),
-        Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem => {
+        Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem => implemented_in_isle(ctx),
            let ty = ty.unwrap();
            if ty.is_vector() || ty_bits(ty) > 64 {
                return Err(CodegenError::Unsupported(format!(
                    "{}: Unsupported type: {:?}",
                    op, ty
                )));
            }
            let is_signed = match op {
                Opcode::Udiv | Opcode::Urem => false,
                Opcode::Sdiv | Opcode::Srem => true,
                _ => unreachable!(),
            };
            let is_rem = match op {
                Opcode::Udiv | Opcode::Sdiv => false,
                Opcode::Urem | Opcode::Srem => true,
                _ => unreachable!(),
            };
            let narrow_mode = if is_signed {
                NarrowValueMode::SignExtend64
            } else {
                NarrowValueMode::ZeroExtend64
            };
            // TODO: Add SDiv32 to implement 32-bit directly, rather
            // than extending the input.
            let div_op = if is_signed {
                ALUOp::SDiv64
            } else {
                ALUOp::UDiv64
            };
            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
            let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
            // The div instruction does not trap on divide by zero or signed overflow
            // so checks are inserted below.
            //
            //   div rd, rn, rm
            ctx.emit(Inst::AluRRR {
                alu_op: div_op,
                rd,
                rn,
                rm,
            });
            if is_rem {
                // Remainder (rn % rm) is implemented as:
                //
                //   tmp = rn / rm
                //   rd = rn - (tmp*rm)
                //
                // use 'rd' for tmp and you have:
                //
                //   div rd, rn, rm       ; rd = rn / rm
                //   cbnz rm, #8          ; branch over trap
                //   udf                  ; divide by zero
                //   msub rd, rd, rm, rn  ; rd = rn - rd * rm
                // Check for divide by 0.
                let trap_code = TrapCode::IntegerDivisionByZero;
                ctx.emit(Inst::TrapIf {
                    trap_code,
                    kind: CondBrKind::Zero(rm),
                });
                ctx.emit(Inst::AluRRRR {
                    alu_op: ALUOp3::MSub64,
                    rd,
                    rn: rd.to_reg(),
                    rm,
                    ra: rn,
                });
            } else {
                if div_op == ALUOp::SDiv64 {
                    //   cbnz rm, #8
                    //   udf ; divide by zero
                    //   cmn rm, 1
                    //   ccmp rn, 1, #nzcv, eq
                    //   b.vc #8
                    //   udf ; signed overflow
                    // Check for divide by 0.
                    let trap_code = TrapCode::IntegerDivisionByZero;
                    ctx.emit(Inst::TrapIf {
                        trap_code,
                        kind: CondBrKind::Zero(rm),
                    });
                    // Check for signed overflow. The only case is min_value / -1.
                    // The following checks must be done in 32-bit or 64-bit, depending
                    // on the input type. Even though the initial div instruction is
                    // always done in 64-bit currently.
                    let size = OperandSize::from_ty(ty);
                    // Check RHS is -1.
                    ctx.emit(Inst::AluRRImm12 {
                        alu_op: choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64),
                        rd: writable_zero_reg(),
                        rn: rm,
                        imm12: Imm12::maybe_from_u64(1).unwrap(),
                    });
                    // Check LHS is min_value, by subtracting 1 and branching if
                    // there is overflow.
                    ctx.emit(Inst::CCmpImm {
                        size,
                        rn,
                        imm: UImm5::maybe_from_u8(1).unwrap(),
                        nzcv: NZCV::new(false, false, false, false),
                        cond: Cond::Eq,
                    });
                    let trap_code = TrapCode::IntegerOverflow;
                    ctx.emit(Inst::TrapIf {
                        trap_code,
                        kind: CondBrKind::Cond(Cond::Vs),
                    });
                } else {
                    //   cbnz rm, #8
                    //   udf ; divide by zero
                    // Check for divide by 0.
                    let trap_code = TrapCode::IntegerDivisionByZero;
                    ctx.emit(Inst::TrapIf {
                        trap_code,
                        kind: CondBrKind::Zero(rm),
                    });
                }
            }
        }
        Opcode::Uextend | Opcode::Sextend => {
            let output_ty = ty.unwrap();
--- a/cranelift/codegen/src/isa/x64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle.rs
@@ -12,7 +12,7 @@ use crate::isa::x64::inst::regs;
 use crate::isa::x64::settings as x64_settings;
 use crate::machinst::isle::*;
 use crate::{
-    ir::{immediates::*, types::*, Inst, InstructionData, Opcode, Value, ValueList},
+    ir::{immediates::*, types::*, Inst, InstructionData, Opcode, TrapCode, Value, ValueList},
    isa::x64::inst::{
        args::{
            Avx512Opcode, CmpOpcode, ExtMode, FcmpImm, Imm8Reg, RegMem, ShiftKind, SseOpcode, CC,
--- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle be1359b4b6b153f378517c1dd95cd80f4a6bed0c7b86eaba11c088fd71b7bfe80a3c868ace245b2da0bfbbd6ded262ea9576c8e0eeacbf89d03c34a17a709602
-src/prelude.isle 9bd1fcb6a3604a24cf2e05e6b7eb04dcb3b9dc8fa9a2f1c8f29c25b6e3bf7f679b3b1b72dff87501497b72bc30fc92fd755b898db7e03f380235fae931b6a74b
+src/prelude.isle d3d2a6a42fb778231a4cdca30995324e1293a9ca8073c5a27a501535759eb51f84a6718322a93dfba4b66ee4f0c9afce7dcec0428516ef0c5bc96e8c8b76925d
 src/isa/x64/inst.isle b151120df3c356ac697122a8557becd8857eb725851506e844edeb85d831d461322a96d280ad84f9a23518e1e4efb607aebc0e249004148675e4cc19e89f0655
 src/isa/x64/lower.isle c9b408df0a089fb4f207838973ac775b0f9b56c86f056867c28e6bae317873d3844f74f713f9acd6fed98d3d11a2f9d19d392fe5049169dad33b1fc703b9b766
--- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
@@ -42,6 +42,7 @@ pub trait Context {
    fn writable_reg_to_reg(&mut self, arg0: WritableReg) -> Reg;
    fn u8_from_uimm8(&mut self, arg0: Uimm8) -> u8;
    fn u64_from_imm64(&mut self, arg0: Imm64) -> u64;
    fn nonzero_u64_from_imm64(&mut self, arg0: Imm64) -> Option<u64>;
    fn u64_from_ieee32(&mut self, arg0: Ieee32) -> u64;
    fn u64_from_ieee64(&mut self, arg0: Ieee64) -> u64;
    fn inst_results(&mut self, arg0: Inst) -> ValueSlice;
@@ -50,6 +51,8 @@ pub trait Context {
    fn value_type(&mut self, arg0: Value) -> Type;
    fn multi_lane(&mut self, arg0: Type) -> Option<(u8, u16)>;
    fn def_inst(&mut self, arg0: Value) -> Option<Inst>;
    fn trap_code_division_by_zero(&mut self) -> TrapCode;
    fn trap_code_integer_overflow(&mut self) -> TrapCode;
    fn operand_size_of_type(&mut self, arg0: Type) -> OperandSize;
    fn put_in_reg_mem(&mut self, arg0: Value) -> RegMem;
    fn encode_fcmp_imm(&mut self, arg0: &FcmpImm) -> u8;
@@ -68,13 +71,13 @@ pub trait Context {
    fn sse_insertps_lane_imm(&mut self, arg0: u8) -> u8;
 }
-/// Internal type ProducesFlags: defined at src/prelude.isle line 242.
+/// Internal type ProducesFlags: defined at src/prelude.isle line 246.
 #[derive(Clone, Debug)]
 pub enum ProducesFlags {
    ProducesFlags { inst: MInst, result: Reg },
 }
-/// Internal type ConsumesFlags: defined at src/prelude.isle line 245.
+/// Internal type ConsumesFlags: defined at src/prelude.isle line 249.
 #[derive(Clone, Debug)]
 pub enum ConsumesFlags {
    ConsumesFlags { inst: MInst, result: Reg },
@@ -124,7 +127,7 @@ pub fn constructor_with_flags<C: Context>(
            result: pattern3_1,
        } = pattern2_0
        {
-            // Rule at src/prelude.isle line 255.
+            // Rule at src/prelude.isle line 259.
            let expr0_0 = C::emit(ctx, &pattern1_0);
            let expr1_0 = C::emit(ctx, &pattern3_0);
            let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -152,7 +155,7 @@ pub fn constructor_with_flags_1<C: Context>(
            result: pattern3_1,
        } = pattern2_0
        {
-            // Rule at src/prelude.isle line 263.
+            // Rule at src/prelude.isle line 267.
            let expr0_0 = C::emit(ctx, &pattern1_0);
            let expr1_0 = C::emit(ctx, &pattern3_0);
            return Some(pattern3_1);
@@ -186,7 +189,7 @@ pub fn constructor_with_flags_2<C: Context>(
                result: pattern5_1,
            } = pattern4_0
            {
-                // Rule at src/prelude.isle line 273.
+                // Rule at src/prelude.isle line 277.
                let expr0_0 = C::emit(ctx, &pattern1_0);
                let expr1_0 = C::emit(ctx, &pattern3_0);
                let expr2_0 = C::emit(ctx, &pattern5_0);
--- a/cranelift/codegen/src/machinst/isle.rs
+++ b/cranelift/codegen/src/machinst/isle.rs
@@ -210,6 +210,21 @@ macro_rules! isle_prelude_methods {
                Some(())
            }
        }
        fn trap_code_division_by_zero(&mut self) -> TrapCode {
            TrapCode::IntegerDivisionByZero
        }
        fn trap_code_integer_overflow(&mut self) -> TrapCode {
            TrapCode::IntegerOverflow
        }
        fn nonzero_u64_from_imm64(&mut self, val: Imm64) -> Option<u64> {
            match val.bits() {
                0 => None,
                n => Some(n as u64),
            }
        }
    };
 }
--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -190,6 +190,10 @@
 (decl u64_from_imm64 (u64) Imm64)
 (extern extractor infallible u64_from_imm64 u64_from_imm64)
 ;; Extract a `u64` from an `Imm64` which is not zero.
 (decl nonzero_u64_from_imm64 (u64) Imm64)
 (extern extractor nonzero_u64_from_imm64 nonzero_u64_from_imm64)
 ;; Extract a `u64` from an `Ieee32`.
 (decl u64_from_ieee32 (u64) Ieee32)
 (extern extractor infallible u64_from_ieee32 u64_from_ieee32)
@@ -278,3 +282,10 @@
            (_z Unit (emit consumer_inst_2)))
        (value_regs consumer_result_1 consumer_result_2)))
 ;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (decl trap_code_division_by_zero () TrapCode)
 (extern constructor trap_code_division_by_zero trap_code_division_by_zero)
 (decl trap_code_integer_overflow () TrapCode)
 (extern constructor trap_code_integer_overflow trap_code_integer_overflow)
--- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
@@ -54,12 +54,11 @@ block0(v0: i64, v1: i64):
  return v2
 }
-; check:  sdiv x2, x0, x1
+; check:   cbnz x1, 8 ; udf
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  adds xzr, x1, #1
 ; nextln:  ccmp x0, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
-; nextln:  mov x0, x2
+; nextln:  sdiv x0, x0, x1
 ; nextln:  ret
 function %f7(i64) -> i64 {
@@ -69,13 +68,8 @@ block0(v0: i64):
  return v2
 }
-; check:  movz x2, #2
+; check:   orr x1, xzr, #2
-; nextln:  sdiv x1, x0, x2
+; nextln:  sdiv x0, x0, x1
 ; nextln:  cbnz x2, 8 ; udf
 ; nextln:  adds xzr, x2, #1
 ; nextln:  ccmp x0, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
 ; nextln:  mov x0, x1
 ; nextln:  ret
 function %f8(i64, i64) -> i64 {
@@ -84,8 +78,8 @@ block0(v0: i64, v1: i64):
  return v2
 }
-; check:  udiv x0, x0, x1
+; check:   cbnz x1, 8 ; udf
-; nextln:  cbnz x1, 8 ; udf
+; nextln:  udiv x0, x0, x1
 ; nextln:  ret
 function %f9(i64) -> i64 {
@@ -95,9 +89,8 @@ block0(v0: i64):
  return v2
 }
-; check:  movz x1, #2
+; check:   orr x1, xzr, #2
 ; nextln:  udiv x0, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  ret
 function %f10(i64, i64) -> i64 {
@@ -106,8 +99,8 @@ block0(v0: i64, v1: i64):
  return v2
 }
-; check:  sdiv x2, x0, x1
+; check:   cbnz x1, 8 ; udf
-; nextln:  cbnz x1, 8 ; udf
+; nextln:  sdiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret
@@ -117,8 +110,8 @@ block0(v0: i64, v1: i64):
  return v2
 }
-; check:  udiv x2, x0, x1
+; check:   cbnz x1, 8 ; udf
-; nextln:  cbnz x1, 8 ; udf
+; nextln:  udiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret
@@ -129,13 +122,13 @@ block0(v0: i32, v1: i32):
  return v2
 }
-; check:  sxtw x3, w0
+; check:  sxtw x0, w0
-; nextln:  sxtw x2, w1
+; nextln:  sxtw x1, w1
-; nextln:  sdiv x0, x3, x2
+; nextln:  cbnz x1, 8 ; udf
-; nextln:  cbnz x2, 8 ; udf
+; nextln:  adds wzr, w1, #1
-; nextln:  adds wzr, w2, #1
+; nextln:  ccmp w0, #1, #nzcv, eq
 ; nextln:  ccmp w3, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
 ; nextln:  sdiv x0, x0, x1
 ; nextln:  ret
 function %f13(i32) -> i32 {
@@ -146,14 +139,8 @@ block0(v0: i32):
 }
 ; check:  sxtw x0, w0
-; nextln: movz x1, #2
+; nextln: orr x1, xzr, #2
-; nextln: sxtw x2, w1
+; nextln: sdiv x0, x0, x1
 ; nextln: sdiv x1, x0, x2
 ; nextln: cbnz x2, 8 ; udf
 ; nextln: adds wzr, w2, #1
 ; nextln: ccmp w0, #1, #nzcv, eq
 ; nextln: b.vc 8 ; udf
 ; nextln: mov x0, x1
 ; nextln: ret
 function %f14(i32, i32) -> i32 {
@@ -164,8 +151,8 @@ block0(v0: i32, v1: i32):
 ; check: mov w0, w0
 ; nextln: mov w1, w1
 ; nextln: udiv x0, x0, x1
 ; nextln: cbnz x1, 8 ; udf
 ; nextln: udiv x0, x0, x1
 ; nextln: ret
@@ -177,9 +164,8 @@ block0(v0: i32):
 }
 ; check:   mov w0, w0
-; nextln:  movz x1, #2
+; nextln:  orr x1, xzr, #2
 ; nextln:  udiv x0, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  ret
 function %f16(i32, i32) -> i32 {
@@ -190,8 +176,8 @@ block0(v0: i32, v1: i32):
 ; check:  sxtw x0, w0
 ; nextln:  sxtw x1, w1
 ; nextln:  sdiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  sdiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret
@@ -203,8 +189,8 @@ block0(v0: i32, v1: i32):
 ; check:  mov w0, w0
 ; nextln:  mov w1, w1
 ; nextln:  udiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  udiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret
@@ -389,3 +375,40 @@ block0(v0: i32, v1: i32, v2: i32):
 ; check:  madd w0, w1, w2, w0
 ; nextln: ret
 function %srem_const (i64) -> i64 {
 block0(v0: i64):
  v1 = iconst.i64 2
  v2 = srem.i64 v0, v1
  return v2
 }
 ; check:   orr x1, xzr, #2
 ; nextln:  sdiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret
 function %urem_const (i64) -> i64 {
 block0(v0: i64):
  v1 = iconst.i64 2
  v2 = urem.i64 v0, v1
  return v2
 }
 ; check:   orr x1, xzr, #2
 ; nextln:  udiv x2, x0, x1
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  ret
 function %sdiv_minus_one(i64) -> i64 {
 block0(v0: i64):
  v1 = iconst.i64 -1
  v2 = sdiv.i64 v0, v1
  return v2
 }
 ; check:  movn x1, #0
 ; nextln:  adds xzr, x1, #1
 ; nextln:  ccmp x0, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
 ; nextln:  sdiv x0, x0, x1
 ; nextln:  ret