aarch64: Migrate {s,u}{div,rem} to ISLE (#3572)
* aarch64: Migrate {s,u}{div,rem} to ISLE
This commit migrates four different instructions at once to ISLE:
* `sdiv`
* `udiv`
* `srem`
* `urem`
These all share similar codegen and center around the `div` instruction
to use internally. The main feature of these was to model the manual
traps since the `div` instruction doesn't trap on overflow, instead
requiring manual checks to adhere to the semantics of the instruction
itself.
While I was here I went ahead and implemented an optimization for these
instructions when the right-hand-side is a constant with a known value.
For `udiv`, `srem`, and `urem` if the right-hand-side is a nonzero
constant then the checks for traps can be skipped entirely. For `sdiv`
if the constant is not 0 and not -1 then additionally all checks can be
elided. Finally if the right-hand-side of `sdiv` is -1 the zero-check is
elided, but it still needs a check for `i64::MIN` on the left-hand-side
and currently there's a TODO where `-1` is still checked too.
* Rebasing and review conflicts
This commit is contained in:
@@ -75,135 +75,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
Opcode::Umulhi | Opcode::Smulhi => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem => {
|
||||
let ty = ty.unwrap();
|
||||
|
||||
if ty.is_vector() || ty_bits(ty) > 64 {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"{}: Unsupported type: {:?}",
|
||||
op, ty
|
||||
)));
|
||||
}
|
||||
|
||||
let is_signed = match op {
|
||||
Opcode::Udiv | Opcode::Urem => false,
|
||||
Opcode::Sdiv | Opcode::Srem => true,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let is_rem = match op {
|
||||
Opcode::Udiv | Opcode::Sdiv => false,
|
||||
Opcode::Urem | Opcode::Srem => true,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let narrow_mode = if is_signed {
|
||||
NarrowValueMode::SignExtend64
|
||||
} else {
|
||||
NarrowValueMode::ZeroExtend64
|
||||
};
|
||||
// TODO: Add SDiv32 to implement 32-bit directly, rather
|
||||
// than extending the input.
|
||||
let div_op = if is_signed {
|
||||
ALUOp::SDiv64
|
||||
} else {
|
||||
ALUOp::UDiv64
|
||||
};
|
||||
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
|
||||
// The div instruction does not trap on divide by zero or signed overflow
|
||||
// so checks are inserted below.
|
||||
//
|
||||
// div rd, rn, rm
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op: div_op,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
});
|
||||
|
||||
if is_rem {
|
||||
// Remainder (rn % rm) is implemented as:
|
||||
//
|
||||
// tmp = rn / rm
|
||||
// rd = rn - (tmp*rm)
|
||||
//
|
||||
// use 'rd' for tmp and you have:
|
||||
//
|
||||
// div rd, rn, rm ; rd = rn / rm
|
||||
// cbnz rm, #8 ; branch over trap
|
||||
// udf ; divide by zero
|
||||
// msub rd, rd, rm, rn ; rd = rn - rd * rm
|
||||
|
||||
// Check for divide by 0.
|
||||
let trap_code = TrapCode::IntegerDivisionByZero;
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_code,
|
||||
kind: CondBrKind::Zero(rm),
|
||||
});
|
||||
|
||||
ctx.emit(Inst::AluRRRR {
|
||||
alu_op: ALUOp3::MSub64,
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
rm,
|
||||
ra: rn,
|
||||
});
|
||||
} else {
|
||||
if div_op == ALUOp::SDiv64 {
|
||||
// cbnz rm, #8
|
||||
// udf ; divide by zero
|
||||
// cmn rm, 1
|
||||
// ccmp rn, 1, #nzcv, eq
|
||||
// b.vc #8
|
||||
// udf ; signed overflow
|
||||
|
||||
// Check for divide by 0.
|
||||
let trap_code = TrapCode::IntegerDivisionByZero;
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_code,
|
||||
kind: CondBrKind::Zero(rm),
|
||||
});
|
||||
|
||||
// Check for signed overflow. The only case is min_value / -1.
|
||||
// The following checks must be done in 32-bit or 64-bit, depending
|
||||
// on the input type. Even though the initial div instruction is
|
||||
// always done in 64-bit currently.
|
||||
let size = OperandSize::from_ty(ty);
|
||||
// Check RHS is -1.
|
||||
ctx.emit(Inst::AluRRImm12 {
|
||||
alu_op: choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64),
|
||||
rd: writable_zero_reg(),
|
||||
rn: rm,
|
||||
imm12: Imm12::maybe_from_u64(1).unwrap(),
|
||||
});
|
||||
// Check LHS is min_value, by subtracting 1 and branching if
|
||||
// there is overflow.
|
||||
ctx.emit(Inst::CCmpImm {
|
||||
size,
|
||||
rn,
|
||||
imm: UImm5::maybe_from_u8(1).unwrap(),
|
||||
nzcv: NZCV::new(false, false, false, false),
|
||||
cond: Cond::Eq,
|
||||
});
|
||||
let trap_code = TrapCode::IntegerOverflow;
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_code,
|
||||
kind: CondBrKind::Cond(Cond::Vs),
|
||||
});
|
||||
} else {
|
||||
// cbnz rm, #8
|
||||
// udf ; divide by zero
|
||||
|
||||
// Check for divide by 0.
|
||||
let trap_code = TrapCode::IntegerDivisionByZero;
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_code,
|
||||
kind: CondBrKind::Zero(rm),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::Uextend | Opcode::Sextend => {
|
||||
let output_ty = ty.unwrap();
|
||||
|
||||
Reference in New Issue
Block a user