arm64: Implement checks in division / remainder

This implements the divide by 0 and signed overflow checks that Wasm
specifies.

Copyright (c) 2020, Arm Limited.
This commit is contained in:
Joey Gouly
2020-04-24 11:39:39 +01:00
committed by Benjamin Bouvier
parent b6e6998713
commit f020f0812e
2 changed files with 229 additions and 18 deletions

View File

@@ -917,6 +917,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
} else {
NarrowValueMode::ZeroExtend64
};
// TODO: Add SDiv32 to implement 32-bit directly, rather
// than extending the input.
let div_op = if is_signed {
ALUOp::SDiv64
} else {
@@ -925,16 +927,19 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
let rd = output_to_reg(ctx, outputs[0]);
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
if !is_rem {
let rm = input_to_reg(ctx, inputs[1], narrow_mode);
// The div instruction does not trap on divide by zero or signed overflow
// so checks are inserted below.
//
// div rd, rn, rm
ctx.emit(Inst::AluRRR {
alu_op: div_op,
rd,
rn,
rm,
});
} else {
let rm = input_to_reg(ctx, inputs[1], narrow_mode);
if is_rem {
// Remainder (rn % rm) is implemented as:
//
// tmp = rn / rm
@@ -943,13 +948,20 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
// use 'rd' for tmp and you have:
//
// div rd, rn, rm ; rd = rn / rm
// cbnz rm, #8 ; branch over trap
// udf ; divide by zero
// msub rd, rd, rm, rn ; rd = rn - rd * rm
ctx.emit(Inst::AluRRR {
alu_op: div_op,
rd,
rn,
rm,
// Check for divide by 0.
let branch_size = 8;
ctx.emit(Inst::CondBrLowered {
target: BranchTarget::ResolvedOffset(branch_size),
kind: CondBrKind::NotZero(rm),
});
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
ctx.emit(Inst::Udf { trap_info });
ctx.emit(Inst::AluRRRR {
alu_op: ALUOp::MSub64,
rd: rd,
@@ -957,6 +969,65 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
rm: rm,
ra: rn,
});
} else {
if div_op == ALUOp::SDiv64 {
// cbz rm, #20
// cmn rm, 1
// ccmp rn, 1, #nzcv, eq
// b.vc 12
// udf ; signed overflow
// udf ; divide by zero
// Check for divide by 0.
let branch_size = 20;
ctx.emit(Inst::CondBrLowered {
target: BranchTarget::ResolvedOffset(branch_size),
kind: CondBrKind::Zero(rm),
});
// Check for signed overflow. The only case is min_value / -1.
let ty = ty.unwrap();
// The following checks must be done in 32-bit or 64-bit, depending
// on the input type. Even though the initial div instruction is
// always done in 64-bit currently.
let size = InstSize::from_ty(ty);
// Check RHS is -1.
ctx.emit(Inst::AluRRImm12 {
alu_op: choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64),
rd: writable_zero_reg(),
rn: rm,
imm12: Imm12::maybe_from_u64(1).unwrap(),
});
// Check LHS is min_value, by subtracting 1 and branching if
// there is overflow.
ctx.emit(Inst::CCmpImm {
size,
rn,
imm: UImm5::maybe_from_u8(1).unwrap(),
nzcv: NZCV::new(false, false, false, false),
cond: Cond::Eq,
});
ctx.emit(Inst::CondBrLowered {
target: BranchTarget::ResolvedOffset(12),
kind: CondBrKind::Cond(Cond::Vc),
});
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
ctx.emit(Inst::Udf { trap_info });
} else {
// cbnz rm, #8
// udf ; divide by zero
// Check for divide by 0.
let branch_size = 8;
ctx.emit(Inst::CondBrLowered {
target: BranchTarget::ResolvedOffset(branch_size),
kind: CondBrKind::NotZero(rm),
});
}
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
ctx.emit(Inst::Udf { trap_info });
}
}

View File

@@ -75,7 +75,14 @@ block0(v0: i64, v1: i64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sdiv x0, x0, x1
; nextln: sdiv x2, x0, x1
; nextln: cbz x1, 20
; nextln: adds xzr, x1, #1
; nextln: ccmp x0, #1, #nzcv, eq
; nextln: b.vc 12
; nextln: udf
; nextln: udf
; nextln: mov x0, x2
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -89,8 +96,15 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x1, #2
; nextln: sdiv x0, x0, x1
; nextln: movz x2, #2
; nextln: sdiv x1, x0, x2
; nextln: cbz x2, 20
; nextln: adds xzr, x2, #1
; nextln: ccmp x0, #1, #nzcv, eq
; nextln: b.vc 12
; nextln: udf
; nextln: udf
; nextln: mov x0, x1
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -104,6 +118,8 @@ block0(v0: i64, v1: i64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: udiv x0, x0, x1
; nextln: cbnz x1, 8
; nextln: udf
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -119,6 +135,8 @@ block0(v0: i64):
; nextln: mov fp, sp
; nextln: movz x1, #2
; nextln: udiv x0, x0, x1
; nextln: cbnz x1, 8
; nextln: udf
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@@ -132,6 +150,8 @@ block0(v0: i64, v1: i64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sdiv x2, x0, x1
; nextln: cbnz x1, 8
; nextln: udf
; nextln: msub x0, x2, x1, x0
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
@@ -146,6 +166,126 @@ block0(v0: i64, v1: i64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: udiv x2, x0, x1
; nextln: cbnz x1, 8
; nextln: udf
; nextln: msub x0, x2, x1, x0
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = sdiv.i32 v0, v1
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sxtw x3, w0
; nextln: sxtw x2, w1
; nextln: sdiv x0, x3, x2
; nextln: cbz x2, 20
; nextln: adds wzr, w2, #1
; nextln: ccmp w3, #1, #nzcv, eq
; nextln: b.vc 12
; nextln: udf
; nextln: udf
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 2
v2 = sdiv.i32 v0, v1
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov x1, x0
; nextln: movz x0, #2
; nextln: sxtw x1, w1
; nextln: sxtw x2, w0
; nextln: sdiv x0, x1, x2
; nextln: cbz x2, 20
; nextln: adds wzr, w2, #1
; nextln: ccmp w1, #1, #nzcv, eq
; nextln: b.vc 12
; nextln: udf
; nextln: udf
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = udiv.i32 v0, v1
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov w0, w0
; nextln: mov w1, w1
; nextln: udiv x0, x0, x1
; nextln: cbnz x1, 8
; nextln: udf
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 2
v2 = udiv.i32 v0, v1
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x1, #2
; nextln: mov w0, w0
; nextln: mov w1, w1
; nextln: udiv x0, x0, x1
; nextln: cbnz x1, 8
; nextln: udf
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = srem.i32 v0, v1
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sxtw x0, w0
; nextln: sxtw x1, w1
; nextln: sdiv x2, x0, x1
; nextln: cbnz x1, 8
; nextln: udf
; nextln: msub x0, x2, x1, x0
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = urem.i32 v0, v1
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: mov w0, w0
; nextln: mov w1, w1
; nextln: udiv x2, x0, x1
; nextln: cbnz x1, 8
; nextln: udf
; nextln: msub x0, x2, x1, x0
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16