aarch64: Migrate {s,u}{div,rem} to ISLE (#3572)

* aarch64: Migrate {s,u}{div,rem} to ISLE This commit migrates four different instructions at once to ISLE: * `sdiv` * `udiv` * `srem` * `urem` These all share similar codegen and center around the `div` instruction to use internally. The main feature of these was to model the manual traps since the `div` instruction doesn't trap on overflow, instead requiring manual checks to adhere to the semantics of the instruction itself. While I was here I went ahead and implemented an optimization for these instructions when the right-hand-side is a constant with a known value. For `udiv`, `srem`, and `urem` if the right-hand-side is a nonzero constant then the checks for traps can be skipped entirely. For `sdiv` if the constant is not 0 and not -1 then additionally all checks can be elided. Finally if the right-hand-side of `sdiv` is -1 the zero-check is elided, but it still needs a check for `i64::MIN` on the left-hand-side and currently there's a TODO where `-1` is still checked too. * Rebasing and review conflicts
2021-12-13 17:27:11 -06:00
parent f1225dfd93
commit 20e090b114
12 changed files with 567 additions and 215 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -75,135 +75,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

        Opcode::Umulhi | Opcode::Smulhi => implemented_in_isle(ctx),

-        Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem => {
-            let ty = ty.unwrap();
-
-            if ty.is_vector() || ty_bits(ty) > 64 {
-                return Err(CodegenError::Unsupported(format!(
-                    "{}: Unsupported type: {:?}",
-                    op, ty
-                )));
-            }
-
-            let is_signed = match op {
-                Opcode::Udiv | Opcode::Urem => false,
-                Opcode::Sdiv | Opcode::Srem => true,
-                _ => unreachable!(),
-            };
-            let is_rem = match op {
-                Opcode::Udiv | Opcode::Sdiv => false,
-                Opcode::Urem | Opcode::Srem => true,
-                _ => unreachable!(),
-            };
-            let narrow_mode = if is_signed {
-                NarrowValueMode::SignExtend64
-            } else {
-                NarrowValueMode::ZeroExtend64
-            };
-            // TODO: Add SDiv32 to implement 32-bit directly, rather
-            // than extending the input.
-            let div_op = if is_signed {
-                ALUOp::SDiv64
-            } else {
-                ALUOp::UDiv64
-            };
-
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
-            let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
-            // The div instruction does not trap on divide by zero or signed overflow
-            // so checks are inserted below.
-            //
-            //   div rd, rn, rm
-            ctx.emit(Inst::AluRRR {
-                alu_op: div_op,
-                rd,
-                rn,
-                rm,
-            });
-
-            if is_rem {
-                // Remainder (rn % rm) is implemented as:
-                //
-                //   tmp = rn / rm
-                //   rd = rn - (tmp*rm)
-                //
-                // use 'rd' for tmp and you have:
-                //
-                //   div rd, rn, rm       ; rd = rn / rm
-                //   cbnz rm, #8          ; branch over trap
-                //   udf                  ; divide by zero
-                //   msub rd, rd, rm, rn  ; rd = rn - rd * rm
-
-                // Check for divide by 0.
-                let trap_code = TrapCode::IntegerDivisionByZero;
-                ctx.emit(Inst::TrapIf {
-                    trap_code,
-                    kind: CondBrKind::Zero(rm),
-                });
-
-                ctx.emit(Inst::AluRRRR {
-                    alu_op: ALUOp3::MSub64,
-                    rd,
-                    rn: rd.to_reg(),
-                    rm,
-                    ra: rn,
-                });
-            } else {
-                if div_op == ALUOp::SDiv64 {
-                    //   cbnz rm, #8
-                    //   udf ; divide by zero
-                    //   cmn rm, 1
-                    //   ccmp rn, 1, #nzcv, eq
-                    //   b.vc #8
-                    //   udf ; signed overflow
-
-                    // Check for divide by 0.
-                    let trap_code = TrapCode::IntegerDivisionByZero;
-                    ctx.emit(Inst::TrapIf {
-                        trap_code,
-                        kind: CondBrKind::Zero(rm),
-                    });
-
-                    // Check for signed overflow. The only case is min_value / -1.
-                    // The following checks must be done in 32-bit or 64-bit, depending
-                    // on the input type. Even though the initial div instruction is
-                    // always done in 64-bit currently.
-                    let size = OperandSize::from_ty(ty);
-                    // Check RHS is -1.
-                    ctx.emit(Inst::AluRRImm12 {
-                        alu_op: choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64),
-                        rd: writable_zero_reg(),
-                        rn: rm,
-                        imm12: Imm12::maybe_from_u64(1).unwrap(),
-                    });
-                    // Check LHS is min_value, by subtracting 1 and branching if
-                    // there is overflow.
-                    ctx.emit(Inst::CCmpImm {
-                        size,
-                        rn,
-                        imm: UImm5::maybe_from_u8(1).unwrap(),
-                        nzcv: NZCV::new(false, false, false, false),
-                        cond: Cond::Eq,
-                    });
-                    let trap_code = TrapCode::IntegerOverflow;
-                    ctx.emit(Inst::TrapIf {
-                        trap_code,
-                        kind: CondBrKind::Cond(Cond::Vs),
-                    });
-                } else {
-                    //   cbnz rm, #8
-                    //   udf ; divide by zero
-
-                    // Check for divide by 0.
-                    let trap_code = TrapCode::IntegerDivisionByZero;
-                    ctx.emit(Inst::TrapIf {
-                        trap_code,
-                        kind: CondBrKind::Zero(rm),
-                    });
-                }
-            }
-        }
+        Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem => implemented_in_isle(ctx),

        Opcode::Uextend | Opcode::Sextend => {
            let output_ty = ty.unwrap();