machinst x64: use xor/xorpss/xorpd to generate zero constants;
This commit is contained in:
committed by
Andrew Brown
parent
9a9b5015d0
commit
e108f14620
@@ -210,6 +210,13 @@ impl RegMemImm {
|
||||
Self::Imm { .. } => {}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn to_reg(&self) -> Option<Reg> {
|
||||
match self {
|
||||
Self::Reg { reg } => Some(*reg),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for RegMemImm {
|
||||
@@ -255,6 +262,12 @@ impl RegMem {
|
||||
RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector),
|
||||
}
|
||||
}
|
||||
pub(crate) fn to_reg(&self) -> Option<Reg> {
|
||||
match self {
|
||||
RegMem::Reg { reg } => Some(*reg),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for RegMem {
|
||||
@@ -271,7 +284,7 @@ impl ShowWithRRU for RegMem {
|
||||
}
|
||||
|
||||
/// Some basic ALU operations. TODO: maybe add Adc, Sbb.
|
||||
#[derive(Clone, PartialEq)]
|
||||
#[derive(Copy, Clone, PartialEq)]
|
||||
pub enum AluRmiROpcode {
|
||||
Add,
|
||||
Sub,
|
||||
|
||||
@@ -2044,8 +2044,7 @@ pub(crate) fn emit(
|
||||
//
|
||||
// ;; positive inputs get saturated to INT_MAX; negative ones to INT_MIN, which is
|
||||
// ;; already in %dst.
|
||||
// mov 0, %tmp_gpr
|
||||
// movd/movq %tmp_gpr, %tmp_xmm
|
||||
// xorpd %tmp_xmm, %tmp_xmm
|
||||
// cmpss/cmpsd %src, %tmp_xmm
|
||||
// jnb done
|
||||
// mov/movaps $INT_MAX, %dst
|
||||
@@ -2069,8 +2068,7 @@ pub(crate) fn emit(
|
||||
//
|
||||
// ;; if positive, it was a real overflow
|
||||
// check_positive:
|
||||
// mov 0, %tmp_gpr
|
||||
// movd/movq %tmp_gpr, %tmp_xmm
|
||||
// xorpd %tmp_xmm, %tmp_xmm
|
||||
// cmpss/cmpsd %src, %tmp_xmm
|
||||
// jnb done
|
||||
// ud2 trap IntegerOverflow
|
||||
@@ -2120,11 +2118,10 @@ pub(crate) fn emit(
|
||||
sink.bind_label(not_nan);
|
||||
|
||||
// If the input was positive, saturate to INT_MAX.
|
||||
// TODO use xorps/xorpd here
|
||||
let inst = Inst::imm_r(false, 0, *tmp_gpr); // rely on sign-extension to get 0 on 64-bits
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
// Zero out tmp_xmm.
|
||||
let inst =
|
||||
Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
|
||||
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
|
||||
@@ -2194,12 +2191,9 @@ pub(crate) fn emit(
|
||||
|
||||
sink.bind_label(check_positive);
|
||||
|
||||
// TODO use xorpd
|
||||
let inst = Inst::imm_r(false, 0, *tmp_gpr);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
// Zero out the tmp_xmm register.
|
||||
let inst =
|
||||
Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
|
||||
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
|
||||
inst.emit(sink, flags, state);
|
||||
|
||||
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
|
||||
|
||||
@@ -939,6 +939,40 @@ impl Inst {
|
||||
}
|
||||
}
|
||||
|
||||
// Inst helpers.
|
||||
|
||||
impl Inst {
|
||||
/// In certain cases, instructions of this format can act as a definition of an XMM register,
|
||||
/// producing a value that is independent of its initial value.
|
||||
///
|
||||
/// For example, a vector equality comparison (`cmppd` or `cmpps`) that compares a register to
|
||||
/// itself will generate all ones as a result, regardless of its value. From the register
|
||||
/// allocator's point of view, we should (i) record the first register, which is normally a
|
||||
/// mod, as a def instead; and (ii) not record the second register as a use, because it is the
|
||||
/// same as the first register (already handled).
|
||||
fn produces_const(&self) -> bool {
|
||||
match self {
|
||||
Self::Alu_RMI_R { op, src, dst, .. } => {
|
||||
src.to_reg() == Some(dst.to_reg())
|
||||
&& (*op == AluRmiROpcode::Xor || *op == AluRmiROpcode::Sub)
|
||||
}
|
||||
|
||||
Self::XMM_RM_R { op, src, dst, .. } => {
|
||||
src.to_reg() == Some(dst.to_reg())
|
||||
&& (*op == SseOpcode::Xorps || *op == SseOpcode::Xorpd)
|
||||
}
|
||||
|
||||
Self::XmmRmRImm { op, src, dst, imm } => {
|
||||
src.to_reg() == Some(dst.to_reg())
|
||||
&& (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
|
||||
&& *imm == FcmpImm::Equal.encode()
|
||||
}
|
||||
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instructions: printing
|
||||
|
||||
@@ -1433,9 +1467,14 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
// sets.
|
||||
match inst {
|
||||
Inst::Alu_RMI_R { src, dst, .. } => {
|
||||
if inst.produces_const() {
|
||||
// No need to account for src, since src == dst.
|
||||
collector.add_def(*dst);
|
||||
} else {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(*dst);
|
||||
}
|
||||
}
|
||||
Inst::Div { divisor, .. } => {
|
||||
collector.add_mod(Writable::from_reg(regs::rax()));
|
||||
collector.add_mod(Writable::from_reg(regs::rdx()));
|
||||
@@ -1466,26 +1505,17 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
Inst::XMM_RM_R { src, dst, .. } => {
|
||||
if inst.produces_const() {
|
||||
// No need to account for src, since src == dst.
|
||||
collector.add_def(*dst);
|
||||
} else {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_mod(*dst);
|
||||
}
|
||||
Inst::XmmRmRImm { src, dst, op, imm } => {
|
||||
// In certain cases, instructions of this format can act as a definition of an XMM
|
||||
// register, producing a value that is independent of its initial value. For example,
|
||||
// a vector equality comparison (`cmppd` or `cmpps`) that compares a register to itself
|
||||
// will generate all ones as a result, regardless of its value. From the register
|
||||
// allocator's point of view, we should (i) record the first register, which is normally
|
||||
// a mod, as a def instread; and (ii) not record the second register as a use, because
|
||||
// it is the same as the first register (already handled). TODO Re-factored in #2071.
|
||||
let is_def = if let RegMem::Reg { reg } = src {
|
||||
(*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
|
||||
&& *imm == FcmpImm::Equal.encode()
|
||||
&& *reg == dst.to_reg()
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
if is_def {
|
||||
}
|
||||
Inst::XmmRmRImm { src, dst, .. } => {
|
||||
if inst.produces_const() {
|
||||
// No need to account for src, since src == dst.
|
||||
collector.add_def(*dst);
|
||||
} else {
|
||||
src.get_regs_as_uses(collector);
|
||||
@@ -1694,6 +1724,17 @@ impl RegMemImm {
|
||||
RegMemImm::Imm { .. } => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn map_as_def<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
|
||||
match self {
|
||||
Self::Reg { reg } => {
|
||||
let mut writable_src = Writable::from_reg(*reg);
|
||||
map_def(mapper, &mut writable_src);
|
||||
*self = Self::reg(writable_src.to_reg());
|
||||
}
|
||||
_ => panic!("unexpected RegMemImm kind in map_src_reg_as_def"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RegMem {
|
||||
@@ -1703,10 +1744,23 @@ impl RegMem {
|
||||
RegMem::Mem { ref mut addr, .. } => addr.map_uses(map),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_as_def<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
|
||||
match self {
|
||||
Self::Reg { reg } => {
|
||||
let mut writable_src = Writable::from_reg(*reg);
|
||||
map_def(mapper, &mut writable_src);
|
||||
*self = Self::reg(writable_src.to_reg());
|
||||
}
|
||||
_ => panic!("unexpected RegMem kind in map_src_reg_as_def"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
// Note this must be carefully synchronized with x64_get_regs.
|
||||
let produces_const = inst.produces_const();
|
||||
|
||||
match inst {
|
||||
// ** Nop
|
||||
Inst::Alu_RMI_R {
|
||||
@@ -1714,9 +1768,14 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
if produces_const {
|
||||
src.map_as_def(mapper);
|
||||
map_def(mapper, dst);
|
||||
} else {
|
||||
src.map_uses(mapper);
|
||||
map_mod(mapper, dst);
|
||||
}
|
||||
}
|
||||
Inst::Div { divisor, .. } => divisor.map_uses(mapper),
|
||||
Inst::MulHi { rhs, .. } => rhs.map_uses(mapper),
|
||||
Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
|
||||
@@ -1742,28 +1801,12 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
Inst::XmmRmRImm {
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
ref op,
|
||||
ref imm,
|
||||
..
|
||||
} => {
|
||||
// In certain cases, instructions of this format can convert an XMM register into a
|
||||
// define (e.g. an equality comparison); this extra logic is necessary to inform the
|
||||
// registry allocator of a different register usage. TODO Re-factored in #2071.
|
||||
if let RegMem::Reg { reg } = src {
|
||||
if (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
|
||||
&& *imm == FcmpImm::Equal.encode()
|
||||
&& *reg == dst.to_reg()
|
||||
{
|
||||
let mut writable_src = Writable::from_reg(*reg);
|
||||
map_def(mapper, &mut writable_src);
|
||||
*reg = writable_src.to_reg();
|
||||
if produces_const {
|
||||
src.map_as_def(mapper);
|
||||
map_def(mapper, dst);
|
||||
} else {
|
||||
// Otherwise, we map the instruction as usual.
|
||||
src.map_uses(mapper);
|
||||
map_mod(mapper, dst);
|
||||
}
|
||||
} else {
|
||||
// TODO this is duplicated because there seems to be no way to join the `if let` and `if`?
|
||||
src.map_uses(mapper);
|
||||
map_mod(mapper, dst);
|
||||
}
|
||||
@@ -1773,9 +1816,14 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
ref mut dst,
|
||||
..
|
||||
} => {
|
||||
if produces_const {
|
||||
src.map_as_def(mapper);
|
||||
map_def(mapper, dst);
|
||||
} else {
|
||||
src.map_uses(mapper);
|
||||
map_mod(mapper, dst);
|
||||
}
|
||||
}
|
||||
Inst::XmmRmiReg {
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
@@ -2097,8 +2145,23 @@ impl MachInst for Inst {
|
||||
) -> SmallVec<[Self; 4]> {
|
||||
let mut ret = SmallVec::new();
|
||||
if ty.is_int() {
|
||||
if value == 0 {
|
||||
ret.push(Inst::alu_rmi_r(
|
||||
ty == I64,
|
||||
AluRmiROpcode::Xor,
|
||||
RegMemImm::reg(to_reg.to_reg()),
|
||||
to_reg,
|
||||
));
|
||||
} else {
|
||||
let is_64 = ty == I64 && value > 0x7fffffff;
|
||||
ret.push(Inst::imm_r(is_64, value, to_reg));
|
||||
}
|
||||
} else if value == 0 {
|
||||
ret.push(Inst::xmm_rm_r(
|
||||
SseOpcode::Xorps,
|
||||
RegMem::reg(to_reg.to_reg()),
|
||||
to_reg,
|
||||
));
|
||||
} else {
|
||||
match ty {
|
||||
F32 => {
|
||||
|
||||
@@ -1043,7 +1043,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::F64const => {
|
||||
// TODO use xorpd for 0 and cmpeqpd for all 1s.
|
||||
// TODO use cmpeqpd for all 1s.
|
||||
let value = ctx.get_constant(insn).unwrap();
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
for inst in Inst::gen_constant(dst, value, F64, |reg_class, ty| {
|
||||
@@ -1054,7 +1054,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::F32const => {
|
||||
// TODO use xorps for 0 and cmpeqps for all 1s.
|
||||
// TODO use cmpeqps for all 1s.
|
||||
let value = ctx.get_constant(insn).unwrap();
|
||||
let dst = output_to_reg(ctx, outputs[0]);
|
||||
for inst in Inst::gen_constant(dst, value, F32, |reg_class, ty| {
|
||||
|
||||
Reference in New Issue
Block a user