machinst x64: use xor/xorpss/xorpd to generate zero constants;
This commit is contained in:
committed by
Andrew Brown
parent
9a9b5015d0
commit
e108f14620
@@ -210,6 +210,13 @@ impl RegMemImm {
|
|||||||
Self::Imm { .. } => {}
|
Self::Imm { .. } => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn to_reg(&self) -> Option<Reg> {
|
||||||
|
match self {
|
||||||
|
Self::Reg { reg } => Some(*reg),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ShowWithRRU for RegMemImm {
|
impl ShowWithRRU for RegMemImm {
|
||||||
@@ -255,6 +262,12 @@ impl RegMem {
|
|||||||
RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector),
|
RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pub(crate) fn to_reg(&self) -> Option<Reg> {
|
||||||
|
match self {
|
||||||
|
RegMem::Reg { reg } => Some(*reg),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ShowWithRRU for RegMem {
|
impl ShowWithRRU for RegMem {
|
||||||
@@ -271,7 +284,7 @@ impl ShowWithRRU for RegMem {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Some basic ALU operations. TODO: maybe add Adc, Sbb.
|
/// Some basic ALU operations. TODO: maybe add Adc, Sbb.
|
||||||
#[derive(Clone, PartialEq)]
|
#[derive(Copy, Clone, PartialEq)]
|
||||||
pub enum AluRmiROpcode {
|
pub enum AluRmiROpcode {
|
||||||
Add,
|
Add,
|
||||||
Sub,
|
Sub,
|
||||||
|
|||||||
@@ -2044,8 +2044,7 @@ pub(crate) fn emit(
|
|||||||
//
|
//
|
||||||
// ;; positive inputs get saturated to INT_MAX; negative ones to INT_MIN, which is
|
// ;; positive inputs get saturated to INT_MAX; negative ones to INT_MIN, which is
|
||||||
// ;; already in %dst.
|
// ;; already in %dst.
|
||||||
// mov 0, %tmp_gpr
|
// xorpd %tmp_xmm, %tmp_xmm
|
||||||
// movd/movq %tmp_gpr, %tmp_xmm
|
|
||||||
// cmpss/cmpsd %src, %tmp_xmm
|
// cmpss/cmpsd %src, %tmp_xmm
|
||||||
// jnb done
|
// jnb done
|
||||||
// mov/movaps $INT_MAX, %dst
|
// mov/movaps $INT_MAX, %dst
|
||||||
@@ -2069,8 +2068,7 @@ pub(crate) fn emit(
|
|||||||
//
|
//
|
||||||
// ;; if positive, it was a real overflow
|
// ;; if positive, it was a real overflow
|
||||||
// check_positive:
|
// check_positive:
|
||||||
// mov 0, %tmp_gpr
|
// xorpd %tmp_xmm, %tmp_xmm
|
||||||
// movd/movq %tmp_gpr, %tmp_xmm
|
|
||||||
// cmpss/cmpsd %src, %tmp_xmm
|
// cmpss/cmpsd %src, %tmp_xmm
|
||||||
// jnb done
|
// jnb done
|
||||||
// ud2 trap IntegerOverflow
|
// ud2 trap IntegerOverflow
|
||||||
@@ -2120,11 +2118,10 @@ pub(crate) fn emit(
|
|||||||
sink.bind_label(not_nan);
|
sink.bind_label(not_nan);
|
||||||
|
|
||||||
// If the input was positive, saturate to INT_MAX.
|
// If the input was positive, saturate to INT_MAX.
|
||||||
// TODO use xorps/xorpd here
|
|
||||||
let inst = Inst::imm_r(false, 0, *tmp_gpr); // rely on sign-extension to get 0 on 64-bits
|
// Zero out tmp_xmm.
|
||||||
inst.emit(sink, flags, state);
|
|
||||||
let inst =
|
let inst =
|
||||||
Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
|
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
|
||||||
inst.emit(sink, flags, state);
|
inst.emit(sink, flags, state);
|
||||||
|
|
||||||
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
|
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
|
||||||
@@ -2194,12 +2191,9 @@ pub(crate) fn emit(
|
|||||||
|
|
||||||
sink.bind_label(check_positive);
|
sink.bind_label(check_positive);
|
||||||
|
|
||||||
// TODO use xorpd
|
// Zero out the tmp_xmm register.
|
||||||
let inst = Inst::imm_r(false, 0, *tmp_gpr);
|
|
||||||
inst.emit(sink, flags, state);
|
|
||||||
|
|
||||||
let inst =
|
let inst =
|
||||||
Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
|
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
|
||||||
inst.emit(sink, flags, state);
|
inst.emit(sink, flags, state);
|
||||||
|
|
||||||
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
|
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
|
||||||
|
|||||||
@@ -939,6 +939,40 @@ impl Inst {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Inst helpers.
|
||||||
|
|
||||||
|
impl Inst {
|
||||||
|
/// In certain cases, instructions of this format can act as a definition of an XMM register,
|
||||||
|
/// producing a value that is independent of its initial value.
|
||||||
|
///
|
||||||
|
/// For example, a vector equality comparison (`cmppd` or `cmpps`) that compares a register to
|
||||||
|
/// itself will generate all ones as a result, regardless of its value. From the register
|
||||||
|
/// allocator's point of view, we should (i) record the first register, which is normally a
|
||||||
|
/// mod, as a def instead; and (ii) not record the second register as a use, because it is the
|
||||||
|
/// same as the first register (already handled).
|
||||||
|
fn produces_const(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
Self::Alu_RMI_R { op, src, dst, .. } => {
|
||||||
|
src.to_reg() == Some(dst.to_reg())
|
||||||
|
&& (*op == AluRmiROpcode::Xor || *op == AluRmiROpcode::Sub)
|
||||||
|
}
|
||||||
|
|
||||||
|
Self::XMM_RM_R { op, src, dst, .. } => {
|
||||||
|
src.to_reg() == Some(dst.to_reg())
|
||||||
|
&& (*op == SseOpcode::Xorps || *op == SseOpcode::Xorpd)
|
||||||
|
}
|
||||||
|
|
||||||
|
Self::XmmRmRImm { op, src, dst, imm } => {
|
||||||
|
src.to_reg() == Some(dst.to_reg())
|
||||||
|
&& (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
|
||||||
|
&& *imm == FcmpImm::Equal.encode()
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
// Instructions: printing
|
// Instructions: printing
|
||||||
|
|
||||||
@@ -1433,8 +1467,13 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
// sets.
|
// sets.
|
||||||
match inst {
|
match inst {
|
||||||
Inst::Alu_RMI_R { src, dst, .. } => {
|
Inst::Alu_RMI_R { src, dst, .. } => {
|
||||||
src.get_regs_as_uses(collector);
|
if inst.produces_const() {
|
||||||
collector.add_mod(*dst);
|
// No need to account for src, since src == dst.
|
||||||
|
collector.add_def(*dst);
|
||||||
|
} else {
|
||||||
|
src.get_regs_as_uses(collector);
|
||||||
|
collector.add_mod(*dst);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Inst::Div { divisor, .. } => {
|
Inst::Div { divisor, .. } => {
|
||||||
collector.add_mod(Writable::from_reg(regs::rax()));
|
collector.add_mod(Writable::from_reg(regs::rax()));
|
||||||
@@ -1466,26 +1505,17 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_def(*dst);
|
collector.add_def(*dst);
|
||||||
}
|
}
|
||||||
Inst::XMM_RM_R { src, dst, .. } => {
|
Inst::XMM_RM_R { src, dst, .. } => {
|
||||||
src.get_regs_as_uses(collector);
|
if inst.produces_const() {
|
||||||
collector.add_mod(*dst);
|
// No need to account for src, since src == dst.
|
||||||
}
|
collector.add_def(*dst);
|
||||||
Inst::XmmRmRImm { src, dst, op, imm } => {
|
|
||||||
// In certain cases, instructions of this format can act as a definition of an XMM
|
|
||||||
// register, producing a value that is independent of its initial value. For example,
|
|
||||||
// a vector equality comparison (`cmppd` or `cmpps`) that compares a register to itself
|
|
||||||
// will generate all ones as a result, regardless of its value. From the register
|
|
||||||
// allocator's point of view, we should (i) record the first register, which is normally
|
|
||||||
// a mod, as a def instread; and (ii) not record the second register as a use, because
|
|
||||||
// it is the same as the first register (already handled). TODO Re-factored in #2071.
|
|
||||||
let is_def = if let RegMem::Reg { reg } = src {
|
|
||||||
(*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
|
|
||||||
&& *imm == FcmpImm::Equal.encode()
|
|
||||||
&& *reg == dst.to_reg()
|
|
||||||
} else {
|
} else {
|
||||||
false
|
src.get_regs_as_uses(collector);
|
||||||
};
|
collector.add_mod(*dst);
|
||||||
|
}
|
||||||
if is_def {
|
}
|
||||||
|
Inst::XmmRmRImm { src, dst, .. } => {
|
||||||
|
if inst.produces_const() {
|
||||||
|
// No need to account for src, since src == dst.
|
||||||
collector.add_def(*dst);
|
collector.add_def(*dst);
|
||||||
} else {
|
} else {
|
||||||
src.get_regs_as_uses(collector);
|
src.get_regs_as_uses(collector);
|
||||||
@@ -1694,6 +1724,17 @@ impl RegMemImm {
|
|||||||
RegMemImm::Imm { .. } => {}
|
RegMemImm::Imm { .. } => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn map_as_def<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
|
||||||
|
match self {
|
||||||
|
Self::Reg { reg } => {
|
||||||
|
let mut writable_src = Writable::from_reg(*reg);
|
||||||
|
map_def(mapper, &mut writable_src);
|
||||||
|
*self = Self::reg(writable_src.to_reg());
|
||||||
|
}
|
||||||
|
_ => panic!("unexpected RegMemImm kind in map_src_reg_as_def"),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RegMem {
|
impl RegMem {
|
||||||
@@ -1703,10 +1744,23 @@ impl RegMem {
|
|||||||
RegMem::Mem { ref mut addr, .. } => addr.map_uses(map),
|
RegMem::Mem { ref mut addr, .. } => addr.map_uses(map),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn map_as_def<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
|
||||||
|
match self {
|
||||||
|
Self::Reg { reg } => {
|
||||||
|
let mut writable_src = Writable::from_reg(*reg);
|
||||||
|
map_def(mapper, &mut writable_src);
|
||||||
|
*self = Self::reg(writable_src.to_reg());
|
||||||
|
}
|
||||||
|
_ => panic!("unexpected RegMem kind in map_src_reg_as_def"),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||||
// Note this must be carefully synchronized with x64_get_regs.
|
// Note this must be carefully synchronized with x64_get_regs.
|
||||||
|
let produces_const = inst.produces_const();
|
||||||
|
|
||||||
match inst {
|
match inst {
|
||||||
// ** Nop
|
// ** Nop
|
||||||
Inst::Alu_RMI_R {
|
Inst::Alu_RMI_R {
|
||||||
@@ -1714,8 +1768,13 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
ref mut dst,
|
ref mut dst,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
src.map_uses(mapper);
|
if produces_const {
|
||||||
map_mod(mapper, dst);
|
src.map_as_def(mapper);
|
||||||
|
map_def(mapper, dst);
|
||||||
|
} else {
|
||||||
|
src.map_uses(mapper);
|
||||||
|
map_mod(mapper, dst);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Inst::Div { divisor, .. } => divisor.map_uses(mapper),
|
Inst::Div { divisor, .. } => divisor.map_uses(mapper),
|
||||||
Inst::MulHi { rhs, .. } => rhs.map_uses(mapper),
|
Inst::MulHi { rhs, .. } => rhs.map_uses(mapper),
|
||||||
@@ -1742,28 +1801,12 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
Inst::XmmRmRImm {
|
Inst::XmmRmRImm {
|
||||||
ref mut src,
|
ref mut src,
|
||||||
ref mut dst,
|
ref mut dst,
|
||||||
ref op,
|
..
|
||||||
ref imm,
|
|
||||||
} => {
|
} => {
|
||||||
// In certain cases, instructions of this format can convert an XMM register into a
|
if produces_const {
|
||||||
// define (e.g. an equality comparison); this extra logic is necessary to inform the
|
src.map_as_def(mapper);
|
||||||
// registry allocator of a different register usage. TODO Re-factored in #2071.
|
map_def(mapper, dst);
|
||||||
if let RegMem::Reg { reg } = src {
|
|
||||||
if (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
|
|
||||||
&& *imm == FcmpImm::Equal.encode()
|
|
||||||
&& *reg == dst.to_reg()
|
|
||||||
{
|
|
||||||
let mut writable_src = Writable::from_reg(*reg);
|
|
||||||
map_def(mapper, &mut writable_src);
|
|
||||||
*reg = writable_src.to_reg();
|
|
||||||
map_def(mapper, dst);
|
|
||||||
} else {
|
|
||||||
// Otherwise, we map the instruction as usual.
|
|
||||||
src.map_uses(mapper);
|
|
||||||
map_mod(mapper, dst);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// TODO this is duplicated because there seems to be no way to join the `if let` and `if`?
|
|
||||||
src.map_uses(mapper);
|
src.map_uses(mapper);
|
||||||
map_mod(mapper, dst);
|
map_mod(mapper, dst);
|
||||||
}
|
}
|
||||||
@@ -1773,8 +1816,13 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
ref mut dst,
|
ref mut dst,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
src.map_uses(mapper);
|
if produces_const {
|
||||||
map_mod(mapper, dst);
|
src.map_as_def(mapper);
|
||||||
|
map_def(mapper, dst);
|
||||||
|
} else {
|
||||||
|
src.map_uses(mapper);
|
||||||
|
map_mod(mapper, dst);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Inst::XmmRmiReg {
|
Inst::XmmRmiReg {
|
||||||
ref mut src,
|
ref mut src,
|
||||||
@@ -2097,8 +2145,23 @@ impl MachInst for Inst {
|
|||||||
) -> SmallVec<[Self; 4]> {
|
) -> SmallVec<[Self; 4]> {
|
||||||
let mut ret = SmallVec::new();
|
let mut ret = SmallVec::new();
|
||||||
if ty.is_int() {
|
if ty.is_int() {
|
||||||
let is_64 = ty == I64 && value > 0x7fffffff;
|
if value == 0 {
|
||||||
ret.push(Inst::imm_r(is_64, value, to_reg));
|
ret.push(Inst::alu_rmi_r(
|
||||||
|
ty == I64,
|
||||||
|
AluRmiROpcode::Xor,
|
||||||
|
RegMemImm::reg(to_reg.to_reg()),
|
||||||
|
to_reg,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
let is_64 = ty == I64 && value > 0x7fffffff;
|
||||||
|
ret.push(Inst::imm_r(is_64, value, to_reg));
|
||||||
|
}
|
||||||
|
} else if value == 0 {
|
||||||
|
ret.push(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Xorps,
|
||||||
|
RegMem::reg(to_reg.to_reg()),
|
||||||
|
to_reg,
|
||||||
|
));
|
||||||
} else {
|
} else {
|
||||||
match ty {
|
match ty {
|
||||||
F32 => {
|
F32 => {
|
||||||
|
|||||||
@@ -1043,7 +1043,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::F64const => {
|
Opcode::F64const => {
|
||||||
// TODO use xorpd for 0 and cmpeqpd for all 1s.
|
// TODO use cmpeqpd for all 1s.
|
||||||
let value = ctx.get_constant(insn).unwrap();
|
let value = ctx.get_constant(insn).unwrap();
|
||||||
let dst = output_to_reg(ctx, outputs[0]);
|
let dst = output_to_reg(ctx, outputs[0]);
|
||||||
for inst in Inst::gen_constant(dst, value, F64, |reg_class, ty| {
|
for inst in Inst::gen_constant(dst, value, F64, |reg_class, ty| {
|
||||||
@@ -1054,7 +1054,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::F32const => {
|
Opcode::F32const => {
|
||||||
// TODO use xorps for 0 and cmpeqps for all 1s.
|
// TODO use cmpeqps for all 1s.
|
||||||
let value = ctx.get_constant(insn).unwrap();
|
let value = ctx.get_constant(insn).unwrap();
|
||||||
let dst = output_to_reg(ctx, outputs[0]);
|
let dst = output_to_reg(ctx, outputs[0]);
|
||||||
for inst in Inst::gen_constant(dst, value, F32, |reg_class, ty| {
|
for inst in Inst::gen_constant(dst, value, F32, |reg_class, ty| {
|
||||||
|
|||||||
Reference in New Issue
Block a user