arm64: Implement checks in division / remainder
This implements the divide by 0 and signed overflow checks that Wasm specifies. Copyright (c) 2020, Arm Limited.
This commit is contained in:
committed by
Benjamin Bouvier
parent
b6e6998713
commit
f020f0812e
@@ -917,6 +917,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
|||||||
} else {
|
} else {
|
||||||
NarrowValueMode::ZeroExtend64
|
NarrowValueMode::ZeroExtend64
|
||||||
};
|
};
|
||||||
|
// TODO: Add SDiv32 to implement 32-bit directly, rather
|
||||||
|
// than extending the input.
|
||||||
let div_op = if is_signed {
|
let div_op = if is_signed {
|
||||||
ALUOp::SDiv64
|
ALUOp::SDiv64
|
||||||
} else {
|
} else {
|
||||||
@@ -925,16 +927,19 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
|||||||
|
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||||
if !is_rem {
|
let rm = input_to_reg(ctx, inputs[1], narrow_mode);
|
||||||
let rm = input_to_reg(ctx, inputs[1], narrow_mode);
|
// The div instruction does not trap on divide by zero or signed overflow
|
||||||
ctx.emit(Inst::AluRRR {
|
// so checks are inserted below.
|
||||||
alu_op: div_op,
|
//
|
||||||
rd,
|
// div rd, rn, rm
|
||||||
rn,
|
ctx.emit(Inst::AluRRR {
|
||||||
rm,
|
alu_op: div_op,
|
||||||
});
|
rd,
|
||||||
} else {
|
rn,
|
||||||
let rm = input_to_reg(ctx, inputs[1], narrow_mode);
|
rm,
|
||||||
|
});
|
||||||
|
|
||||||
|
if is_rem {
|
||||||
// Remainder (rn % rm) is implemented as:
|
// Remainder (rn % rm) is implemented as:
|
||||||
//
|
//
|
||||||
// tmp = rn / rm
|
// tmp = rn / rm
|
||||||
@@ -943,13 +948,20 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
|||||||
// use 'rd' for tmp and you have:
|
// use 'rd' for tmp and you have:
|
||||||
//
|
//
|
||||||
// div rd, rn, rm ; rd = rn / rm
|
// div rd, rn, rm ; rd = rn / rm
|
||||||
|
// cbnz rm, #8 ; branch over trap
|
||||||
|
// udf ; divide by zero
|
||||||
// msub rd, rd, rm, rn ; rd = rn - rd * rm
|
// msub rd, rd, rm, rn ; rd = rn - rd * rm
|
||||||
ctx.emit(Inst::AluRRR {
|
|
||||||
alu_op: div_op,
|
// Check for divide by 0.
|
||||||
rd,
|
let branch_size = 8;
|
||||||
rn,
|
ctx.emit(Inst::CondBrLowered {
|
||||||
rm,
|
target: BranchTarget::ResolvedOffset(branch_size),
|
||||||
|
kind: CondBrKind::NotZero(rm),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
|
||||||
|
ctx.emit(Inst::Udf { trap_info });
|
||||||
|
|
||||||
ctx.emit(Inst::AluRRRR {
|
ctx.emit(Inst::AluRRRR {
|
||||||
alu_op: ALUOp::MSub64,
|
alu_op: ALUOp::MSub64,
|
||||||
rd: rd,
|
rd: rd,
|
||||||
@@ -957,6 +969,65 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
|||||||
rm: rm,
|
rm: rm,
|
||||||
ra: rn,
|
ra: rn,
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
if div_op == ALUOp::SDiv64 {
|
||||||
|
// cbz rm, #20
|
||||||
|
// cmn rm, 1
|
||||||
|
// ccmp rn, 1, #nzcv, eq
|
||||||
|
// b.vc 12
|
||||||
|
// udf ; signed overflow
|
||||||
|
// udf ; divide by zero
|
||||||
|
|
||||||
|
// Check for divide by 0.
|
||||||
|
let branch_size = 20;
|
||||||
|
ctx.emit(Inst::CondBrLowered {
|
||||||
|
target: BranchTarget::ResolvedOffset(branch_size),
|
||||||
|
kind: CondBrKind::Zero(rm),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check for signed overflow. The only case is min_value / -1.
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
// The following checks must be done in 32-bit or 64-bit, depending
|
||||||
|
// on the input type. Even though the initial div instruction is
|
||||||
|
// always done in 64-bit currently.
|
||||||
|
let size = InstSize::from_ty(ty);
|
||||||
|
// Check RHS is -1.
|
||||||
|
ctx.emit(Inst::AluRRImm12 {
|
||||||
|
alu_op: choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64),
|
||||||
|
rd: writable_zero_reg(),
|
||||||
|
rn: rm,
|
||||||
|
imm12: Imm12::maybe_from_u64(1).unwrap(),
|
||||||
|
});
|
||||||
|
// Check LHS is min_value, by subtracting 1 and branching if
|
||||||
|
// there is overflow.
|
||||||
|
ctx.emit(Inst::CCmpImm {
|
||||||
|
size,
|
||||||
|
rn,
|
||||||
|
imm: UImm5::maybe_from_u8(1).unwrap(),
|
||||||
|
nzcv: NZCV::new(false, false, false, false),
|
||||||
|
cond: Cond::Eq,
|
||||||
|
});
|
||||||
|
ctx.emit(Inst::CondBrLowered {
|
||||||
|
target: BranchTarget::ResolvedOffset(12),
|
||||||
|
kind: CondBrKind::Cond(Cond::Vc),
|
||||||
|
});
|
||||||
|
|
||||||
|
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
|
||||||
|
ctx.emit(Inst::Udf { trap_info });
|
||||||
|
} else {
|
||||||
|
// cbnz rm, #8
|
||||||
|
// udf ; divide by zero
|
||||||
|
|
||||||
|
// Check for divide by 0.
|
||||||
|
let branch_size = 8;
|
||||||
|
ctx.emit(Inst::CondBrLowered {
|
||||||
|
target: BranchTarget::ResolvedOffset(branch_size),
|
||||||
|
kind: CondBrKind::NotZero(rm),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero);
|
||||||
|
ctx.emit(Inst::Udf { trap_info });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -75,7 +75,14 @@ block0(v0: i64, v1: i64):
|
|||||||
|
|
||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: sdiv x0, x0, x1
|
; nextln: sdiv x2, x0, x1
|
||||||
|
; nextln: cbz x1, 20
|
||||||
|
; nextln: adds xzr, x1, #1
|
||||||
|
; nextln: ccmp x0, #1, #nzcv, eq
|
||||||
|
; nextln: b.vc 12
|
||||||
|
; nextln: udf
|
||||||
|
; nextln: udf
|
||||||
|
; nextln: mov x0, x2
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
@@ -89,8 +96,15 @@ block0(v0: i64):
|
|||||||
|
|
||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: movz x1, #2
|
; nextln: movz x2, #2
|
||||||
; nextln: sdiv x0, x0, x1
|
; nextln: sdiv x1, x0, x2
|
||||||
|
; nextln: cbz x2, 20
|
||||||
|
; nextln: adds xzr, x2, #1
|
||||||
|
; nextln: ccmp x0, #1, #nzcv, eq
|
||||||
|
; nextln: b.vc 12
|
||||||
|
; nextln: udf
|
||||||
|
; nextln: udf
|
||||||
|
; nextln: mov x0, x1
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
@@ -104,6 +118,8 @@ block0(v0: i64, v1: i64):
|
|||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: udiv x0, x0, x1
|
; nextln: udiv x0, x0, x1
|
||||||
|
; nextln: cbnz x1, 8
|
||||||
|
; nextln: udf
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
@@ -119,6 +135,8 @@ block0(v0: i64):
|
|||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: movz x1, #2
|
; nextln: movz x1, #2
|
||||||
; nextln: udiv x0, x0, x1
|
; nextln: udiv x0, x0, x1
|
||||||
|
; nextln: cbnz x1, 8
|
||||||
|
; nextln: udf
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
@@ -132,6 +150,8 @@ block0(v0: i64, v1: i64):
|
|||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: sdiv x2, x0, x1
|
; nextln: sdiv x2, x0, x1
|
||||||
|
; nextln: cbnz x1, 8
|
||||||
|
; nextln: udf
|
||||||
; nextln: msub x0, x2, x1, x0
|
; nextln: msub x0, x2, x1, x0
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
@@ -146,6 +166,126 @@ block0(v0: i64, v1: i64):
|
|||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: udiv x2, x0, x1
|
; nextln: udiv x2, x0, x1
|
||||||
|
; nextln: cbnz x1, 8
|
||||||
|
; nextln: udf
|
||||||
|
; nextln: msub x0, x2, x1, x0
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
function %f(i32, i32) -> i32 {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = sdiv.i32 v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: sxtw x3, w0
|
||||||
|
; nextln: sxtw x2, w1
|
||||||
|
; nextln: sdiv x0, x3, x2
|
||||||
|
; nextln: cbz x2, 20
|
||||||
|
; nextln: adds wzr, w2, #1
|
||||||
|
; nextln: ccmp w3, #1, #nzcv, eq
|
||||||
|
; nextln: b.vc 12
|
||||||
|
; nextln: udf
|
||||||
|
; nextln: udf
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f(i32) -> i32 {
|
||||||
|
block0(v0: i32):
|
||||||
|
v1 = iconst.i32 2
|
||||||
|
v2 = sdiv.i32 v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov x1, x0
|
||||||
|
; nextln: movz x0, #2
|
||||||
|
; nextln: sxtw x1, w1
|
||||||
|
; nextln: sxtw x2, w0
|
||||||
|
; nextln: sdiv x0, x1, x2
|
||||||
|
; nextln: cbz x2, 20
|
||||||
|
; nextln: adds wzr, w2, #1
|
||||||
|
; nextln: ccmp w1, #1, #nzcv, eq
|
||||||
|
; nextln: b.vc 12
|
||||||
|
; nextln: udf
|
||||||
|
; nextln: udf
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f(i32, i32) -> i32 {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = udiv.i32 v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov w0, w0
|
||||||
|
; nextln: mov w1, w1
|
||||||
|
; nextln: udiv x0, x0, x1
|
||||||
|
; nextln: cbnz x1, 8
|
||||||
|
; nextln: udf
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f(i32) -> i32 {
|
||||||
|
block0(v0: i32):
|
||||||
|
v1 = iconst.i32 2
|
||||||
|
v2 = udiv.i32 v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: movz x1, #2
|
||||||
|
; nextln: mov w0, w0
|
||||||
|
; nextln: mov w1, w1
|
||||||
|
; nextln: udiv x0, x0, x1
|
||||||
|
; nextln: cbnz x1, 8
|
||||||
|
; nextln: udf
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f(i32, i32) -> i32 {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = srem.i32 v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: sxtw x0, w0
|
||||||
|
; nextln: sxtw x1, w1
|
||||||
|
; nextln: sdiv x2, x0, x1
|
||||||
|
; nextln: cbnz x1, 8
|
||||||
|
; nextln: udf
|
||||||
|
; nextln: msub x0, x2, x1, x0
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f(i32, i32) -> i32 {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = urem.i32 v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov w0, w0
|
||||||
|
; nextln: mov w1, w1
|
||||||
|
; nextln: udiv x2, x0, x1
|
||||||
|
; nextln: cbnz x1, 8
|
||||||
|
; nextln: udf
|
||||||
; nextln: msub x0, x2, x1, x0
|
; nextln: msub x0, x2, x1, x0
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
|||||||
Reference in New Issue
Block a user