x64: Migrate {s,u}{div,rem} to ISLE (#6008)

* x64: Add precise-output tests for div traps

This adds a suite of `*.clif` files which are intended to test the
`avoid_div_traps=true` compilation of the `{s,u}{div,rem}` instructions.

* x64: Remove conditional regalloc in `Div` instruction

Move the 8-bit `Div` logic into a dedicated `Div8` instruction to avoid
having conditionally-used registers with respect to regalloc.

* x64: Migrate non-trapping, `udiv`/`urem` to ISLE

* x64: Port checked `udiv` to ISLE

* x64: Migrate urem entirely to ISLE

* x64: Use `test` instead of `cmp` to compare-to-zero

* x64: Port `sdiv` lowering to ISLE

* x64: Port `srem` lowering to ISLE

* Tidy up regalloc behavior and fix tests

* Update docs and winch

* Review comments

* Reword again

* More refactoring test fixes

* More test fixes
This commit is contained in:
Alex Crichton
2023-03-13 20:44:06 -05:00
committed by GitHub
parent 188f712025
commit 5c1b468648
52 changed files with 2178 additions and 835 deletions

View File

@@ -848,138 +848,6 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
.use_constant(VCodeConstantData::WellKnown(&UINT_MASK))
}
fn emit_div_or_rem(
&mut self,
kind: &DivOrRemKind,
ty: Type,
dst: WritableGpr,
dividend: Gpr,
divisor: Gpr,
) {
let is_div = kind.is_div();
let size = OperandSize::from_ty(ty);
let dst_quotient = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
let dst_remainder = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
// Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly.
if self.backend.flags.avoid_div_traps() || *kind == DivOrRemKind::SignedRem {
// A vcode meta-instruction is used to lower the inline checks, since they embed
// pc-relative offsets that must not change, thus requiring regalloc to not
// interfere by introducing spills and reloads.
let tmp = if *kind == DivOrRemKind::SignedDiv && size == OperandSize::Size64 {
Some(self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap())
} else {
None
};
let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
self.lower_ctx.emit(MInst::AluConstOp {
op: AluRmiROpcode::Xor,
size: OperandSize::Size32,
dst: WritableGpr::from_reg(Gpr::new(dividend_hi.to_reg()).unwrap()),
});
self.lower_ctx.emit(MInst::checked_div_or_rem_seq(
kind.clone(),
size,
divisor.to_reg(),
Gpr::new(dividend.to_reg()).unwrap(),
Gpr::new(dividend_hi.to_reg()).unwrap(),
WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()),
WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()),
tmp,
));
} else {
// We don't want more than one trap record for a single instruction,
// so let's not allow the "mem" case (load-op merging) here; force
// divisor into a register instead.
let divisor = RegMem::reg(divisor.to_reg());
let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
// Fill in the high parts:
let dividend_lo = if kind.is_signed() && ty == types::I8 {
let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
// 8-bit div takes its dividend in only the `lo` reg.
self.lower_ctx.emit(MInst::sign_extend_data(
size,
Gpr::new(dividend.to_reg()).unwrap(),
WritableGpr::from_reg(Gpr::new(dividend_lo.to_reg()).unwrap()),
));
// `dividend_hi` is not used by the Div below, so we
// don't def it here.
dividend_lo.to_reg()
} else if kind.is_signed() {
// 16-bit and higher div takes its operand in hi:lo
// with half in each (64:64, 32:32 or 16:16).
self.lower_ctx.emit(MInst::sign_extend_data(
size,
Gpr::new(dividend.to_reg()).unwrap(),
WritableGpr::from_reg(Gpr::new(dividend_hi.to_reg()).unwrap()),
));
dividend.to_reg()
} else if ty == types::I8 {
let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
self.lower_ctx.emit(MInst::movzx_rm_r(
ExtMode::BL,
RegMem::reg(dividend.to_reg()),
dividend_lo,
));
dividend_lo.to_reg()
} else {
// zero for unsigned opcodes.
self.lower_ctx
.emit(MInst::imm(OperandSize::Size64, 0, dividend_hi));
dividend.to_reg()
};
// Emit the actual idiv.
self.lower_ctx.emit(MInst::div(
size,
kind.is_signed(),
divisor,
Gpr::new(dividend_lo).unwrap(),
Gpr::new(dividend_hi.to_reg()).unwrap(),
WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()),
WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()),
));
}
// Move the result back into the destination reg.
if is_div {
// The quotient is in rax.
self.lower_ctx.emit(MInst::gen_move(
dst.to_writable_reg(),
dst_quotient.to_reg(),
ty,
));
} else {
if size == OperandSize::Size8 {
let tmp = self.temp_writable_reg(ty);
// The remainder is in AH. Right-shift by 8 bits then move from rax.
self.lower_ctx.emit(MInst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightLogical,
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 8 }).unwrap(),
dst_quotient.to_reg(),
tmp,
));
self.lower_ctx
.emit(MInst::gen_move(dst.to_writable_reg(), tmp.to_reg(), ty));
} else {
// The remainder is in rdx.
self.lower_ctx.emit(MInst::gen_move(
dst.to_writable_reg(),
dst_remainder.to_reg(),
ty,
));
}
}
}
fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned {
match XmmMemAligned::new(arg.clone().into()) {
Some(aligned) => aligned,