x64: Migrate {s,u}{div,rem} to ISLE (#6008)
* x64: Add precise-output tests for div traps
This adds a suite of `*.clif` files which are intended to test the
`avoid_div_traps=true` compilation of the `{s,u}{div,rem}` instructions.
* x64: Remove conditional regalloc in `Div` instruction
Move the 8-bit `Div` logic into a dedicated `Div8` instruction to avoid
having conditionally-used registers with respect to regalloc.
* x64: Migrate non-trapping, `udiv`/`urem` to ISLE
* x64: Port checked `udiv` to ISLE
* x64: Migrate urem entirely to ISLE
* x64: Use `test` instead of `cmp` to compare-to-zero
* x64: Port `sdiv` lowering to ISLE
* x64: Port `srem` lowering to ISLE
* Tidy up regalloc behavior and fix tests
* Update docs and winch
* Review comments
* Reword again
* More refactoring test fixes
* More test fixes
This commit is contained in:
@@ -848,138 +848,6 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
|
||||
.use_constant(VCodeConstantData::WellKnown(&UINT_MASK))
|
||||
}
|
||||
|
||||
fn emit_div_or_rem(
|
||||
&mut self,
|
||||
kind: &DivOrRemKind,
|
||||
ty: Type,
|
||||
dst: WritableGpr,
|
||||
dividend: Gpr,
|
||||
divisor: Gpr,
|
||||
) {
|
||||
let is_div = kind.is_div();
|
||||
let size = OperandSize::from_ty(ty);
|
||||
|
||||
let dst_quotient = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
let dst_remainder = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
|
||||
// Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly.
|
||||
if self.backend.flags.avoid_div_traps() || *kind == DivOrRemKind::SignedRem {
|
||||
// A vcode meta-instruction is used to lower the inline checks, since they embed
|
||||
// pc-relative offsets that must not change, thus requiring regalloc to not
|
||||
// interfere by introducing spills and reloads.
|
||||
let tmp = if *kind == DivOrRemKind::SignedDiv && size == OperandSize::Size64 {
|
||||
Some(self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
self.lower_ctx.emit(MInst::AluConstOp {
|
||||
op: AluRmiROpcode::Xor,
|
||||
size: OperandSize::Size32,
|
||||
dst: WritableGpr::from_reg(Gpr::new(dividend_hi.to_reg()).unwrap()),
|
||||
});
|
||||
self.lower_ctx.emit(MInst::checked_div_or_rem_seq(
|
||||
kind.clone(),
|
||||
size,
|
||||
divisor.to_reg(),
|
||||
Gpr::new(dividend.to_reg()).unwrap(),
|
||||
Gpr::new(dividend_hi.to_reg()).unwrap(),
|
||||
WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()),
|
||||
WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()),
|
||||
tmp,
|
||||
));
|
||||
} else {
|
||||
// We don't want more than one trap record for a single instruction,
|
||||
// so let's not allow the "mem" case (load-op merging) here; force
|
||||
// divisor into a register instead.
|
||||
let divisor = RegMem::reg(divisor.to_reg());
|
||||
|
||||
let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
|
||||
// Fill in the high parts:
|
||||
let dividend_lo = if kind.is_signed() && ty == types::I8 {
|
||||
let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
// 8-bit div takes its dividend in only the `lo` reg.
|
||||
self.lower_ctx.emit(MInst::sign_extend_data(
|
||||
size,
|
||||
Gpr::new(dividend.to_reg()).unwrap(),
|
||||
WritableGpr::from_reg(Gpr::new(dividend_lo.to_reg()).unwrap()),
|
||||
));
|
||||
// `dividend_hi` is not used by the Div below, so we
|
||||
// don't def it here.
|
||||
|
||||
dividend_lo.to_reg()
|
||||
} else if kind.is_signed() {
|
||||
// 16-bit and higher div takes its operand in hi:lo
|
||||
// with half in each (64:64, 32:32 or 16:16).
|
||||
self.lower_ctx.emit(MInst::sign_extend_data(
|
||||
size,
|
||||
Gpr::new(dividend.to_reg()).unwrap(),
|
||||
WritableGpr::from_reg(Gpr::new(dividend_hi.to_reg()).unwrap()),
|
||||
));
|
||||
|
||||
dividend.to_reg()
|
||||
} else if ty == types::I8 {
|
||||
let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
self.lower_ctx.emit(MInst::movzx_rm_r(
|
||||
ExtMode::BL,
|
||||
RegMem::reg(dividend.to_reg()),
|
||||
dividend_lo,
|
||||
));
|
||||
|
||||
dividend_lo.to_reg()
|
||||
} else {
|
||||
// zero for unsigned opcodes.
|
||||
self.lower_ctx
|
||||
.emit(MInst::imm(OperandSize::Size64, 0, dividend_hi));
|
||||
|
||||
dividend.to_reg()
|
||||
};
|
||||
|
||||
// Emit the actual idiv.
|
||||
self.lower_ctx.emit(MInst::div(
|
||||
size,
|
||||
kind.is_signed(),
|
||||
divisor,
|
||||
Gpr::new(dividend_lo).unwrap(),
|
||||
Gpr::new(dividend_hi.to_reg()).unwrap(),
|
||||
WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()),
|
||||
WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()),
|
||||
));
|
||||
}
|
||||
|
||||
// Move the result back into the destination reg.
|
||||
if is_div {
|
||||
// The quotient is in rax.
|
||||
self.lower_ctx.emit(MInst::gen_move(
|
||||
dst.to_writable_reg(),
|
||||
dst_quotient.to_reg(),
|
||||
ty,
|
||||
));
|
||||
} else {
|
||||
if size == OperandSize::Size8 {
|
||||
let tmp = self.temp_writable_reg(ty);
|
||||
// The remainder is in AH. Right-shift by 8 bits then move from rax.
|
||||
self.lower_ctx.emit(MInst::shift_r(
|
||||
OperandSize::Size64,
|
||||
ShiftKind::ShiftRightLogical,
|
||||
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 8 }).unwrap(),
|
||||
dst_quotient.to_reg(),
|
||||
tmp,
|
||||
));
|
||||
self.lower_ctx
|
||||
.emit(MInst::gen_move(dst.to_writable_reg(), tmp.to_reg(), ty));
|
||||
} else {
|
||||
// The remainder is in rdx.
|
||||
self.lower_ctx.emit(MInst::gen_move(
|
||||
dst.to_writable_reg(),
|
||||
dst_remainder.to_reg(),
|
||||
ty,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned {
|
||||
match XmmMemAligned::new(arg.clone().into()) {
|
||||
Some(aligned) => aligned,
|
||||
|
||||
Reference in New Issue
Block a user