machinst x64: add packed FP comparisons
Re-orders the SseOpcode variants alphabetically.
This commit is contained in:
@@ -12,6 +12,7 @@ use super::{
|
|||||||
regs::{self, show_ireg_sized},
|
regs::{self, show_ireg_sized},
|
||||||
EmitState,
|
EmitState,
|
||||||
};
|
};
|
||||||
|
use core::fmt::Debug;
|
||||||
|
|
||||||
/// A possible addressing mode (amode) that can be used in instructions.
|
/// A possible addressing mode (amode) that can be used in instructions.
|
||||||
/// These denote a 64-bit value only.
|
/// These denote a 64-bit value only.
|
||||||
@@ -343,6 +344,8 @@ pub enum SseOpcode {
|
|||||||
Andnpd,
|
Andnpd,
|
||||||
Comiss,
|
Comiss,
|
||||||
Comisd,
|
Comisd,
|
||||||
|
Cmpps,
|
||||||
|
Cmppd,
|
||||||
Cmpss,
|
Cmpss,
|
||||||
Cmpsd,
|
Cmpsd,
|
||||||
Cvtsd2ss,
|
Cvtsd2ss,
|
||||||
@@ -407,6 +410,9 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Addss
|
| SseOpcode::Addss
|
||||||
| SseOpcode::Andps
|
| SseOpcode::Andps
|
||||||
| SseOpcode::Andnps
|
| SseOpcode::Andnps
|
||||||
|
| SseOpcode::Comiss
|
||||||
|
| SseOpcode::Cmpps
|
||||||
|
| SseOpcode::Cmpss
|
||||||
| SseOpcode::Cvtsi2ss
|
| SseOpcode::Cvtsi2ss
|
||||||
| SseOpcode::Cvtss2si
|
| SseOpcode::Cvtss2si
|
||||||
| SseOpcode::Cvttss2si
|
| SseOpcode::Cvttss2si
|
||||||
@@ -429,14 +435,15 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Subps
|
| SseOpcode::Subps
|
||||||
| SseOpcode::Subss
|
| SseOpcode::Subss
|
||||||
| SseOpcode::Ucomiss
|
| SseOpcode::Ucomiss
|
||||||
| SseOpcode::Comiss
|
|
||||||
| SseOpcode::Cmpss
|
|
||||||
| SseOpcode::Xorps => SSE,
|
| SseOpcode::Xorps => SSE,
|
||||||
|
|
||||||
SseOpcode::Addpd
|
SseOpcode::Addpd
|
||||||
| SseOpcode::Addsd
|
| SseOpcode::Addsd
|
||||||
| SseOpcode::Andpd
|
| SseOpcode::Andpd
|
||||||
| SseOpcode::Andnpd
|
| SseOpcode::Andnpd
|
||||||
|
| SseOpcode::Cmppd
|
||||||
|
| SseOpcode::Cmpsd
|
||||||
|
| SseOpcode::Comisd
|
||||||
| SseOpcode::Cvtsd2ss
|
| SseOpcode::Cvtsd2ss
|
||||||
| SseOpcode::Cvtsd2si
|
| SseOpcode::Cvtsd2si
|
||||||
| SseOpcode::Cvtsi2sd
|
| SseOpcode::Cvtsi2sd
|
||||||
@@ -461,8 +468,6 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Subpd
|
| SseOpcode::Subpd
|
||||||
| SseOpcode::Subsd
|
| SseOpcode::Subsd
|
||||||
| SseOpcode::Ucomisd
|
| SseOpcode::Ucomisd
|
||||||
| SseOpcode::Comisd
|
|
||||||
| SseOpcode::Cmpsd
|
|
||||||
| SseOpcode::Xorpd => SSE2,
|
| SseOpcode::Xorpd => SSE2,
|
||||||
|
|
||||||
SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41,
|
SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41,
|
||||||
@@ -489,6 +494,10 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Andps => "andps",
|
SseOpcode::Andps => "andps",
|
||||||
SseOpcode::Andnps => "andnps",
|
SseOpcode::Andnps => "andnps",
|
||||||
SseOpcode::Andnpd => "andnpd",
|
SseOpcode::Andnpd => "andnpd",
|
||||||
|
SseOpcode::Cmpps => "cmpps",
|
||||||
|
SseOpcode::Cmppd => "cmppd",
|
||||||
|
SseOpcode::Cmpss => "cmpss",
|
||||||
|
SseOpcode::Cmpsd => "cmpsd",
|
||||||
SseOpcode::Comiss => "comiss",
|
SseOpcode::Comiss => "comiss",
|
||||||
SseOpcode::Comisd => "comisd",
|
SseOpcode::Comisd => "comisd",
|
||||||
SseOpcode::Cvtsd2ss => "cvtsd2ss",
|
SseOpcode::Cvtsd2ss => "cvtsd2ss",
|
||||||
@@ -503,6 +512,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Divpd => "divpd",
|
SseOpcode::Divpd => "divpd",
|
||||||
SseOpcode::Divss => "divss",
|
SseOpcode::Divss => "divss",
|
||||||
SseOpcode::Divsd => "divsd",
|
SseOpcode::Divsd => "divsd",
|
||||||
|
SseOpcode::Insertps => "insertps",
|
||||||
SseOpcode::Maxps => "maxps",
|
SseOpcode::Maxps => "maxps",
|
||||||
SseOpcode::Maxpd => "maxpd",
|
SseOpcode::Maxpd => "maxpd",
|
||||||
SseOpcode::Maxss => "maxss",
|
SseOpcode::Maxss => "maxss",
|
||||||
@@ -539,9 +549,6 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Subsd => "subsd",
|
SseOpcode::Subsd => "subsd",
|
||||||
SseOpcode::Ucomiss => "ucomiss",
|
SseOpcode::Ucomiss => "ucomiss",
|
||||||
SseOpcode::Ucomisd => "ucomisd",
|
SseOpcode::Ucomisd => "ucomisd",
|
||||||
SseOpcode::Cmpss => "cmpss",
|
|
||||||
SseOpcode::Cmpsd => "cmpsd",
|
|
||||||
SseOpcode::Insertps => "insertps",
|
|
||||||
SseOpcode::Xorps => "xorps",
|
SseOpcode::Xorps => "xorps",
|
||||||
SseOpcode::Xorpd => "xorpd",
|
SseOpcode::Xorpd => "xorpd",
|
||||||
};
|
};
|
||||||
@@ -814,6 +821,42 @@ impl fmt::Display for CC {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Encode the ways that floats can be compared. This is used in float comparisons such as `cmpps`,
|
||||||
|
/// e.g.; it is distinguished from other float comparisons (e.g. `ucomiss`) in that those use EFLAGS
|
||||||
|
/// whereas [FcmpImm] is used as an immediate.
|
||||||
|
pub(crate) enum FcmpImm {
|
||||||
|
Equal = 0x00,
|
||||||
|
LessThan = 0x01,
|
||||||
|
LessThanOrEqual = 0x02,
|
||||||
|
Unordered = 0x03,
|
||||||
|
NotEqual = 0x04,
|
||||||
|
UnorderedOrGreaterThanOrEqual = 0x05,
|
||||||
|
UnorderedOrGreaterThan = 0x06,
|
||||||
|
Ordered = 0x07,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FcmpImm {
|
||||||
|
pub(crate) fn encode(self) -> u8 {
|
||||||
|
self as u8
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<FloatCC> for FcmpImm {
|
||||||
|
fn from(cond: FloatCC) -> Self {
|
||||||
|
match cond {
|
||||||
|
FloatCC::Equal => FcmpImm::Equal,
|
||||||
|
FloatCC::LessThan => FcmpImm::LessThan,
|
||||||
|
FloatCC::LessThanOrEqual => FcmpImm::LessThanOrEqual,
|
||||||
|
FloatCC::Unordered => FcmpImm::Unordered,
|
||||||
|
FloatCC::NotEqual => FcmpImm::NotEqual,
|
||||||
|
FloatCC::UnorderedOrGreaterThanOrEqual => FcmpImm::UnorderedOrGreaterThanOrEqual,
|
||||||
|
FloatCC::UnorderedOrGreaterThan => FcmpImm::UnorderedOrGreaterThan,
|
||||||
|
FloatCC::Ordered => FcmpImm::Ordered,
|
||||||
|
_ => panic!("unable to create comparison predicate for {}", cond),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A branch target. Either unresolved (basic-block index) or resolved (offset
|
/// A branch target. Either unresolved (basic-block index) or resolved (offset
|
||||||
/// from end of current instruction).
|
/// from end of current instruction).
|
||||||
#[derive(Clone, Copy, Debug)]
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
|||||||
@@ -1717,6 +1717,28 @@ pub(crate) fn emit(
|
|||||||
sink.bind_label(done);
|
sink.bind_label(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XmmRmRImm { op, src, dst, imm } => {
|
||||||
|
let prefix = match op {
|
||||||
|
SseOpcode::Cmpps => LegacyPrefix::_66,
|
||||||
|
SseOpcode::Cmppd => LegacyPrefix::None,
|
||||||
|
SseOpcode::Cmpss => LegacyPrefix::_F3,
|
||||||
|
SseOpcode::Cmpsd => LegacyPrefix::_F2,
|
||||||
|
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||||
|
};
|
||||||
|
let opcode = 0x0FC2;
|
||||||
|
let rex = RexFlags::clear_w();
|
||||||
|
match src {
|
||||||
|
RegMem::Reg { reg } => {
|
||||||
|
emit_std_reg_reg(sink, prefix, opcode, 2, dst.to_reg(), *reg, rex);
|
||||||
|
}
|
||||||
|
RegMem::Mem { addr } => {
|
||||||
|
let addr = &addr.finalize(state);
|
||||||
|
emit_std_reg_mem(sink, prefix, opcode, 2, dst.to_reg(), addr, rex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sink.put1(*imm)
|
||||||
|
}
|
||||||
|
|
||||||
Inst::Xmm_Mov_R_M {
|
Inst::Xmm_Mov_R_M {
|
||||||
op,
|
op,
|
||||||
src,
|
src,
|
||||||
|
|||||||
@@ -309,6 +309,14 @@ pub enum Inst {
|
|||||||
dst: Reg,
|
dst: Reg,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// A binary XMM instruction with an 8-bit immediate: cmp (ps pd) imm (reg addr) reg
|
||||||
|
XmmRmRImm {
|
||||||
|
op: SseOpcode,
|
||||||
|
src: RegMem,
|
||||||
|
dst: Writable<Reg>,
|
||||||
|
imm: u8,
|
||||||
|
},
|
||||||
|
|
||||||
// =====================================
|
// =====================================
|
||||||
// Control flow instructions.
|
// Control flow instructions.
|
||||||
/// Direct call: call simm32.
|
/// Direct call: call simm32.
|
||||||
@@ -681,6 +689,13 @@ impl Inst {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn xmm_rm_r_imm(op: SseOpcode, src: RegMem, dst: Writable<Reg>, imm: u8) -> Inst {
|
||||||
|
src.assert_regclass_is(RegClass::V128);
|
||||||
|
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
|
||||||
|
debug_assert!(imm < 8);
|
||||||
|
Inst::XmmRmRImm { op, src, dst, imm }
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn movzx_rm_r(
|
pub(crate) fn movzx_rm_r(
|
||||||
ext_mode: ExtMode,
|
ext_mode: ExtMode,
|
||||||
src: RegMem,
|
src: RegMem,
|
||||||
@@ -1055,6 +1070,14 @@ impl ShowWithRRU for Inst {
|
|||||||
show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8),
|
show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8),
|
||||||
),
|
),
|
||||||
|
|
||||||
|
Inst::XmmRmRImm { op, src, dst, imm } => format!(
|
||||||
|
"{} ${}, {}, {}",
|
||||||
|
ljustify(op.to_string()),
|
||||||
|
imm,
|
||||||
|
src.show_rru(mb_rru),
|
||||||
|
dst.show_rru(mb_rru),
|
||||||
|
),
|
||||||
|
|
||||||
Inst::XmmToGpr {
|
Inst::XmmToGpr {
|
||||||
op,
|
op,
|
||||||
src,
|
src,
|
||||||
@@ -1408,6 +1431,29 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
src.get_regs_as_uses(collector);
|
src.get_regs_as_uses(collector);
|
||||||
collector.add_mod(*dst);
|
collector.add_mod(*dst);
|
||||||
}
|
}
|
||||||
|
Inst::XmmRmRImm { src, dst, op, imm } => {
|
||||||
|
// In certain cases, instructions of this format can act as a definition of an XMM
|
||||||
|
// register, producing a value that is independent of its initial value. For example,
|
||||||
|
// a vector equality comparison (`cmppd` or `cmpps`) that compares a register to itself
|
||||||
|
// will generate all ones as a result, regardless of its value. From the register
|
||||||
|
// allocator's point of view, we should (i) record the first register, which is normally
|
||||||
|
// a mod, as a def instread; and (ii) not record the second register as a use, because
|
||||||
|
// it is the same as the first register (already handled). TODO Re-factored in #2071.
|
||||||
|
let is_def = if let RegMem::Reg { reg } = src {
|
||||||
|
(*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
|
||||||
|
&& *imm == FcmpImm::Equal.encode()
|
||||||
|
&& *reg == dst.to_reg()
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
};
|
||||||
|
|
||||||
|
if is_def {
|
||||||
|
collector.add_def(*dst);
|
||||||
|
} else {
|
||||||
|
src.get_regs_as_uses(collector);
|
||||||
|
collector.add_mod(*dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => {
|
Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => {
|
||||||
collector.add_use(*lhs);
|
collector.add_use(*lhs);
|
||||||
collector.add_mod(*rhs_dst);
|
collector.add_mod(*rhs_dst);
|
||||||
@@ -1650,6 +1696,35 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
src.map_uses(mapper);
|
src.map_uses(mapper);
|
||||||
map_def(mapper, dst);
|
map_def(mapper, dst);
|
||||||
}
|
}
|
||||||
|
Inst::XmmRmRImm {
|
||||||
|
ref mut src,
|
||||||
|
ref mut dst,
|
||||||
|
ref op,
|
||||||
|
ref imm,
|
||||||
|
} => {
|
||||||
|
// In certain cases, instructions of this format can convert an XMM register into a
|
||||||
|
// define (e.g. an equality comparison); this extra logic is necessary to inform the
|
||||||
|
// registry allocator of a different register usage. TODO Re-factored in #2071.
|
||||||
|
if let RegMem::Reg { reg } = src {
|
||||||
|
if (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
|
||||||
|
&& *imm == FcmpImm::Equal.encode()
|
||||||
|
&& *reg == dst.to_reg()
|
||||||
|
{
|
||||||
|
let mut writable_src = Writable::from_reg(*reg);
|
||||||
|
map_def(mapper, &mut writable_src);
|
||||||
|
*reg = writable_src.to_reg();
|
||||||
|
map_def(mapper, dst);
|
||||||
|
} else {
|
||||||
|
// Otherwise, we map the instruction as usual.
|
||||||
|
src.map_uses(mapper);
|
||||||
|
map_mod(mapper, dst);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// TODO this is duplicated because there seems to be no way to join the `if let` and `if`?
|
||||||
|
src.map_uses(mapper);
|
||||||
|
map_mod(mapper, dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
Inst::XMM_RM_R {
|
Inst::XMM_RM_R {
|
||||||
ref mut src,
|
ref mut src,
|
||||||
ref mut dst,
|
ref mut dst,
|
||||||
|
|||||||
@@ -1043,7 +1043,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::F64const => {
|
Opcode::F64const => {
|
||||||
// TODO use xorpd for 0
|
// TODO use xorpd for 0 and cmpeqpd for all 1s.
|
||||||
let value = ctx.get_constant(insn).unwrap();
|
let value = ctx.get_constant(insn).unwrap();
|
||||||
let dst = output_to_reg(ctx, outputs[0]);
|
let dst = output_to_reg(ctx, outputs[0]);
|
||||||
for inst in Inst::gen_constant(dst, value, F64, |reg_class, ty| {
|
for inst in Inst::gen_constant(dst, value, F64, |reg_class, ty| {
|
||||||
@@ -1054,7 +1054,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::F32const => {
|
Opcode::F32const => {
|
||||||
// TODO use xorps for 0.
|
// TODO use xorps for 0 and cmpeqps for all 1s.
|
||||||
let value = ctx.get_constant(insn).unwrap();
|
let value = ctx.get_constant(insn).unwrap();
|
||||||
let dst = output_to_reg(ctx, outputs[0]);
|
let dst = output_to_reg(ctx, outputs[0]);
|
||||||
for inst in Inst::gen_constant(dst, value, F32, |reg_class, ty| {
|
for inst in Inst::gen_constant(dst, value, F32, |reg_class, ty| {
|
||||||
|
|||||||
Reference in New Issue
Block a user