From f020f0812e4b3dbab75915cdf723eaf212577fee Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 24 Apr 2020 11:39:39 +0100 Subject: [PATCH] arm64: Implement checks in division / remainder This implements the divide by 0 and signed overflow checks that Wasm specifies. Copyright (c) 2020, Arm Limited. --- cranelift/codegen/src/isa/aarch64/lower.rs | 101 ++++++++++-- .../filetests/vcode/aarch64/arithmetic.clif | 146 +++++++++++++++++- 2 files changed, 229 insertions(+), 18 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 56962d9110..e9e72dd6d5 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -917,6 +917,8 @@ fn lower_insn_to_regs>(ctx: &mut C, insn: IRInst) { } else { NarrowValueMode::ZeroExtend64 }; + // TODO: Add SDiv32 to implement 32-bit directly, rather + // than extending the input. let div_op = if is_signed { ALUOp::SDiv64 } else { @@ -925,16 +927,19 @@ fn lower_insn_to_regs>(ctx: &mut C, insn: IRInst) { let rd = output_to_reg(ctx, outputs[0]); let rn = input_to_reg(ctx, inputs[0], narrow_mode); - if !is_rem { - let rm = input_to_reg(ctx, inputs[1], narrow_mode); - ctx.emit(Inst::AluRRR { - alu_op: div_op, - rd, - rn, - rm, - }); - } else { - let rm = input_to_reg(ctx, inputs[1], narrow_mode); + let rm = input_to_reg(ctx, inputs[1], narrow_mode); + // The div instruction does not trap on divide by zero or signed overflow + // so checks are inserted below. + // + // div rd, rn, rm + ctx.emit(Inst::AluRRR { + alu_op: div_op, + rd, + rn, + rm, + }); + + if is_rem { // Remainder (rn % rm) is implemented as: // // tmp = rn / rm @@ -943,13 +948,20 @@ fn lower_insn_to_regs>(ctx: &mut C, insn: IRInst) { // use 'rd' for tmp and you have: // // div rd, rn, rm ; rd = rn / rm + // cbnz rm, #8 ; branch over trap + // udf ; divide by zero // msub rd, rd, rm, rn ; rd = rn - rd * rm - ctx.emit(Inst::AluRRR { - alu_op: div_op, - rd, - rn, - rm, + + // Check for divide by 0. + let branch_size = 8; + ctx.emit(Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(branch_size), + kind: CondBrKind::NotZero(rm), }); + + let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero); + ctx.emit(Inst::Udf { trap_info }); + ctx.emit(Inst::AluRRRR { alu_op: ALUOp::MSub64, rd: rd, @@ -957,6 +969,65 @@ fn lower_insn_to_regs>(ctx: &mut C, insn: IRInst) { rm: rm, ra: rn, }); + } else { + if div_op == ALUOp::SDiv64 { + // cbz rm, #20 + // cmn rm, 1 + // ccmp rn, 1, #nzcv, eq + // b.vc 12 + // udf ; signed overflow + // udf ; divide by zero + + // Check for divide by 0. + let branch_size = 20; + ctx.emit(Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(branch_size), + kind: CondBrKind::Zero(rm), + }); + + // Check for signed overflow. The only case is min_value / -1. + let ty = ty.unwrap(); + // The following checks must be done in 32-bit or 64-bit, depending + // on the input type. Even though the initial div instruction is + // always done in 64-bit currently. + let size = InstSize::from_ty(ty); + // Check RHS is -1. + ctx.emit(Inst::AluRRImm12 { + alu_op: choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64), + rd: writable_zero_reg(), + rn: rm, + imm12: Imm12::maybe_from_u64(1).unwrap(), + }); + // Check LHS is min_value, by subtracting 1 and branching if + // there is overflow. + ctx.emit(Inst::CCmpImm { + size, + rn, + imm: UImm5::maybe_from_u8(1).unwrap(), + nzcv: NZCV::new(false, false, false, false), + cond: Cond::Eq, + }); + ctx.emit(Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(12), + kind: CondBrKind::Cond(Cond::Vc), + }); + + let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow); + ctx.emit(Inst::Udf { trap_info }); + } else { + // cbnz rm, #8 + // udf ; divide by zero + + // Check for divide by 0. + let branch_size = 8; + ctx.emit(Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(branch_size), + kind: CondBrKind::NotZero(rm), + }); + } + + let trap_info = (ctx.srcloc(insn), TrapCode::IntegerDivisionByZero); + ctx.emit(Inst::Udf { trap_info }); } } diff --git a/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif b/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif index 1f6dcf6b82..08ecb31d35 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/arithmetic.clif @@ -75,7 +75,14 @@ block0(v0: i64, v1: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: sdiv x0, x0, x1 +; nextln: sdiv x2, x0, x1 +; nextln: cbz x1, 20 +; nextln: adds xzr, x1, #1 +; nextln: ccmp x0, #1, #nzcv, eq +; nextln: b.vc 12 +; nextln: udf +; nextln: udf +; nextln: mov x0, x2 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -89,8 +96,15 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: movz x1, #2 -; nextln: sdiv x0, x0, x1 +; nextln: movz x2, #2 +; nextln: sdiv x1, x0, x2 +; nextln: cbz x2, 20 +; nextln: adds xzr, x2, #1 +; nextln: ccmp x0, #1, #nzcv, eq +; nextln: b.vc 12 +; nextln: udf +; nextln: udf +; nextln: mov x0, x1 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -104,6 +118,8 @@ block0(v0: i64, v1: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: udiv x0, x0, x1 +; nextln: cbnz x1, 8 +; nextln: udf ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -119,6 +135,8 @@ block0(v0: i64): ; nextln: mov fp, sp ; nextln: movz x1, #2 ; nextln: udiv x0, x0, x1 +; nextln: cbnz x1, 8 +; nextln: udf ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -132,6 +150,8 @@ block0(v0: i64, v1: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: sdiv x2, x0, x1 +; nextln: cbnz x1, 8 +; nextln: udf ; nextln: msub x0, x2, x1, x0 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 @@ -146,6 +166,126 @@ block0(v0: i64, v1: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: udiv x2, x0, x1 +; nextln: cbnz x1, 8 +; nextln: udf +; nextln: msub x0, x2, x1, x0 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %f(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sdiv.i32 v0, v1 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sxtw x3, w0 +; nextln: sxtw x2, w1 +; nextln: sdiv x0, x3, x2 +; nextln: cbz x2, 20 +; nextln: adds wzr, w2, #1 +; nextln: ccmp w3, #1, #nzcv, eq +; nextln: b.vc 12 +; nextln: udf +; nextln: udf +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = sdiv.i32 v0, v1 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: mov x1, x0 +; nextln: movz x0, #2 +; nextln: sxtw x1, w1 +; nextln: sxtw x2, w0 +; nextln: sdiv x0, x1, x2 +; nextln: cbz x2, 20 +; nextln: adds wzr, w2, #1 +; nextln: ccmp w1, #1, #nzcv, eq +; nextln: b.vc 12 +; nextln: udf +; nextln: udf +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = udiv.i32 v0, v1 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: mov w0, w0 +; nextln: mov w1, w1 +; nextln: udiv x0, x0, x1 +; nextln: cbnz x1, 8 +; nextln: udf +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = udiv.i32 v0, v1 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: movz x1, #2 +; nextln: mov w0, w0 +; nextln: mov w1, w1 +; nextln: udiv x0, x0, x1 +; nextln: cbnz x1, 8 +; nextln: udf +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = srem.i32 v0, v1 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sxtw x0, w0 +; nextln: sxtw x1, w1 +; nextln: sdiv x2, x0, x1 +; nextln: cbnz x1, 8 +; nextln: udf +; nextln: msub x0, x2, x1, x0 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = urem.i32 v0, v1 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: mov w0, w0 +; nextln: mov w1, w1 +; nextln: udiv x2, x0, x1 +; nextln: cbnz x1, 8 +; nextln: udf ; nextln: msub x0, x2, x1, x0 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16