arm64: Implement checks in division / remainder

2020-04-24 11:39:39 +01:00
parent b6e6998713
commit f020f0812e
2 changed files with 229 additions and 18 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -917,6 +917,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
            } else {
                NarrowValueMode::ZeroExtend64
            };
            // TODO: Add SDiv32 to implement 32-bit directly, rather
            // than extending the input.
            let div_op = if is_signed {
                ALUOp::SDiv64
            } else {
@@ -925,16 +927,19 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
            let rd = output_to_reg(ctx, outputs[0]);
            let rn = input_to_reg(ctx, inputs[0], narrow_mode);
-            if !is_rem {
+            let rm = input_to_reg(ctx, inputs[1], narrow_mode);
-                let rm = input_to_reg(ctx, inputs[1], narrow_mode);
+            // The div instruction does not trap on divide by zero or signed overflow
-                ctx.emit(Inst::AluRRR {
+            // so checks are inserted below.
-                    alu_op: div_op,
+            //
-                    rd,
+            //   div rd, rn, rm
-                    rn,
+            ctx.emit(Inst::AluRRR {
-                    rm,
+                alu_op: div_op,
-                });
+                rd,
-            } else {
+                rn,
-                let rm = input_to_reg(ctx, inputs[1], narrow_mode);
+                rm,
            });
            if is_rem {
                // Remainder (rn % rm) is implemented as:
                //
                //   tmp = rn / rm
@@ -943,13 +948,20 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
                // use 'rd' for tmp and you have:
                //
                //   div rd, rn, rm       ; rd = rn / rm
                //   cbnz rm, #8          ; branch over trap
                //   udf                  ; divide by zero
                //   msub rd, rd, rm, rn  ; rd = rn - rd * rm
-                ctx.emit(Inst::AluRRR {
+
-                    alu_op: div_op,
+                // Check for divide by 0.
-                    rd,
+                let branch_size = 8;
-                    rn,
+                ctx.emit(Inst::CondBrLowered {
-                    rm,
+                    target: BranchTarget::ResolvedOffset(branch_size),
                    kind: CondBrKind::NotZero(rm),
                });
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
                ctx.emit(Inst::Udf { trap_info });
                ctx.emit(Inst::AluRRRR {
                    alu_op: ALUOp::MSub64,
                    rd: rd,
@@ -957,6 +969,65 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
                    rm: rm,
                    ra: rn,
                });
            } else {
                if div_op == ALUOp::SDiv64 {
                    //   cbz rm, #20
                    //   cmn rm, 1
                    //   ccmp rn, 1, #nzcv, eq
                    //   b.vc 12
                    //   udf ; signed overflow
                    //   udf ; divide by zero
                    // Check for divide by 0.
                    let branch_size = 20;
                    ctx.emit(Inst::CondBrLowered {
                        target: BranchTarget::ResolvedOffset(branch_size),
                        kind: CondBrKind::Zero(rm),
                    });
                    // Check for signed overflow. The only case is min_value / -1.
                    let ty = ty.unwrap();
                    // The following checks must be done in 32-bit or 64-bit, depending
                    // on the input type. Even though the initial div instruction is
                    // always done in 64-bit currently.
                    let size = InstSize::from_ty(ty);
                    // Check RHS is -1.
                    ctx.emit(Inst::AluRRImm12 {
                        alu_op: choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64),
                        rd: writable_zero_reg(),
                        rn: rm,
                        imm12: Imm12::maybe_from_u64(1).unwrap(),
                    });
                    // Check LHS is min_value, by subtracting 1 and branching if
                    // there is overflow.
                    ctx.emit(Inst::CCmpImm {
                        size,
                        rn,
                        imm: UImm5::maybe_from_u8(1).unwrap(),
                        nzcv: NZCV::new(false, false, false, false),
                        cond: Cond::Eq,
                    });
                    ctx.emit(Inst::CondBrLowered {
                        target: BranchTarget::ResolvedOffset(12),
                        kind: CondBrKind::Cond(Cond::Vc),
                    });
                    let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
                    ctx.emit(Inst::Udf { trap_info });
                } else {
                    //   cbnz rm, #8
                    //   udf ; divide by zero
                    // Check for divide by 0.
                    let branch_size = 8;
                    ctx.emit(Inst::CondBrLowered {
                        target: BranchTarget::ResolvedOffset(branch_size),
                        kind: CondBrKind::NotZero(rm),
                    });
                }
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
                ctx.emit(Inst::Udf { trap_info });
            }
        }
--- a/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif
@@ -75,7 +75,14 @@ block0(v0: i64, v1: i64):
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
-; nextln:  sdiv x0, x0, x1
+; nextln:  sdiv x2, x0, x1
 ; nextln:  cbz x1, 20
 ; nextln:  adds xzr, x1, #1
 ; nextln:  ccmp x0, #1, #nzcv, eq
 ; nextln:  b.vc 12
 ; nextln:  udf
 ; nextln:  udf
 ; nextln:  mov x0, x2
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
@@ -89,8 +96,15 @@ block0(v0: i64):
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
-; nextln:  movz x1, #2
+; nextln:  movz x2, #2
-; nextln:  sdiv x0, x0, x1
+; nextln:  sdiv x1, x0, x2
 ; nextln:  cbz x2, 20
 ; nextln:  adds xzr, x2, #1
 ; nextln:  ccmp x0, #1, #nzcv, eq
 ; nextln:  b.vc 12
 ; nextln:  udf
 ; nextln:  udf
 ; nextln:  mov x0, x1
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
@@ -104,6 +118,8 @@ block0(v0: i64, v1: i64):
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  udiv x0, x0, x1
 ; nextln:  cbnz x1, 8
 ; nextln:  udf
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
@@ -119,6 +135,8 @@ block0(v0: i64):
 ; nextln:  mov fp, sp
 ; nextln:  movz x1, #2
 ; nextln:  udiv x0, x0, x1
 ; nextln:  cbnz x1, 8
 ; nextln:  udf
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
@@ -132,6 +150,8 @@ block0(v0: i64, v1: i64):
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  sdiv x2, x0, x1
 ; nextln:  cbnz x1, 8
 ; nextln:  udf
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -146,6 +166,126 @@ block0(v0: i64, v1: i64):
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  udiv x2, x0, x1
 ; nextln:  cbnz x1, 8
 ; nextln:  udf
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 function %f(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
  v2 = sdiv.i32 v0, v1
  return v2
 }
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  sxtw x3, w0
 ; nextln:  sxtw x2, w1
 ; nextln:  sdiv x0, x3, x2
 ; nextln:  cbz x2, 20
 ; nextln:  adds wzr, w2, #1
 ; nextln:  ccmp w3, #1, #nzcv, eq
 ; nextln:  b.vc 12
 ; nextln:  udf
 ; nextln:  udf
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 function %f(i32) -> i32 {
 block0(v0: i32):
  v1 = iconst.i32 2
  v2 = sdiv.i32 v0, v1
  return v2
 }
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  mov x1, x0
 ; nextln:  movz x0, #2
 ; nextln:  sxtw x1, w1
 ; nextln:  sxtw x2, w0
 ; nextln:  sdiv x0, x1, x2
 ; nextln:  cbz x2, 20
 ; nextln:  adds wzr, w2, #1
 ; nextln:  ccmp w1, #1, #nzcv, eq
 ; nextln:  b.vc 12
 ; nextln:  udf
 ; nextln:  udf
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 function %f(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
  v2 = udiv.i32 v0, v1
  return v2
 }
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  mov w0, w0
 ; nextln:  mov w1, w1
 ; nextln:  udiv x0, x0, x1
 ; nextln:  cbnz x1, 8
 ; nextln:  udf
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 function %f(i32) -> i32 {
 block0(v0: i32):
  v1 = iconst.i32 2
  v2 = udiv.i32 v0, v1
  return v2
 }
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  movz x1, #2
 ; nextln:  mov w0, w0
 ; nextln:  mov w1, w1
 ; nextln:  udiv x0, x0, x1
 ; nextln:  cbnz x1, 8
 ; nextln:  udf
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 function %f(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
  v2 = srem.i32 v0, v1
  return v2
 }
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  sxtw x0, w0
 ; nextln:  sxtw x1, w1
 ; nextln:  sdiv x2, x0, x1
 ; nextln:  cbnz x1, 8
 ; nextln:  udf
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 function %f(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
  v2 = urem.i32 v0, v1
  return v2
 }
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  mov w0, w0
 ; nextln:  mov w1, w1
 ; nextln:  udiv x2, x0, x1
 ; nextln:  cbnz x1, 8
 ; nextln:  udf
 ; nextln:  msub x0, x2, x1, x0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16