machinst x64: add packed FP comparisons

Re-orders the SseOpcode variants alphabetically.
2020-07-24 12:46:49 -07:00
parent e3bd8d696b
commit 0398033447
4 changed files with 149 additions and 9 deletions
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -12,6 +12,7 @@ use super::{
    regs::{self, show_ireg_sized},
    EmitState,
 };
+use core::fmt::Debug;

 /// A possible addressing mode (amode) that can be used in instructions.
 /// These denote a 64-bit value only.
@@ -343,6 +344,8 @@ pub enum SseOpcode {
    Andnpd,
    Comiss,
    Comisd,
+    Cmpps,
+    Cmppd,
    Cmpss,
    Cmpsd,
    Cvtsd2ss,
@@ -407,6 +410,9 @@ impl SseOpcode {
            | SseOpcode::Addss
            | SseOpcode::Andps
            | SseOpcode::Andnps
+            | SseOpcode::Comiss
+            | SseOpcode::Cmpps
+            | SseOpcode::Cmpss
            | SseOpcode::Cvtsi2ss
            | SseOpcode::Cvtss2si
            | SseOpcode::Cvttss2si
@@ -429,14 +435,15 @@ impl SseOpcode {
            | SseOpcode::Subps
            | SseOpcode::Subss
            | SseOpcode::Ucomiss
-            | SseOpcode::Comiss
-            | SseOpcode::Cmpss
            | SseOpcode::Xorps => SSE,

            SseOpcode::Addpd
            | SseOpcode::Addsd
            | SseOpcode::Andpd
            | SseOpcode::Andnpd
+            | SseOpcode::Cmppd
+            | SseOpcode::Cmpsd
+            | SseOpcode::Comisd
            | SseOpcode::Cvtsd2ss
            | SseOpcode::Cvtsd2si
            | SseOpcode::Cvtsi2sd
@@ -461,8 +468,6 @@ impl SseOpcode {
            | SseOpcode::Subpd
            | SseOpcode::Subsd
            | SseOpcode::Ucomisd
-            | SseOpcode::Comisd
-            | SseOpcode::Cmpsd
            | SseOpcode::Xorpd => SSE2,

            SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41,
@@ -489,6 +494,10 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Andps => "andps",
            SseOpcode::Andnps => "andnps",
            SseOpcode::Andnpd => "andnpd",
+            SseOpcode::Cmpps => "cmpps",
+            SseOpcode::Cmppd => "cmppd",
+            SseOpcode::Cmpss => "cmpss",
+            SseOpcode::Cmpsd => "cmpsd",
            SseOpcode::Comiss => "comiss",
            SseOpcode::Comisd => "comisd",
            SseOpcode::Cvtsd2ss => "cvtsd2ss",
@@ -503,6 +512,7 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Divpd => "divpd",
            SseOpcode::Divss => "divss",
            SseOpcode::Divsd => "divsd",
+            SseOpcode::Insertps => "insertps",
            SseOpcode::Maxps => "maxps",
            SseOpcode::Maxpd => "maxpd",
            SseOpcode::Maxss => "maxss",
@@ -539,9 +549,6 @@ impl fmt::Debug for SseOpcode {
            SseOpcode::Subsd => "subsd",
            SseOpcode::Ucomiss => "ucomiss",
            SseOpcode::Ucomisd => "ucomisd",
-            SseOpcode::Cmpss => "cmpss",
-            SseOpcode::Cmpsd => "cmpsd",
-            SseOpcode::Insertps => "insertps",
            SseOpcode::Xorps => "xorps",
            SseOpcode::Xorpd => "xorpd",
        };
@@ -814,6 +821,42 @@ impl fmt::Display for CC {
    }
 }

+/// Encode the ways that floats can be compared. This is used in float comparisons such as `cmpps`,
+/// e.g.; it is distinguished from other float comparisons (e.g. `ucomiss`) in that those use EFLAGS
+/// whereas [FcmpImm] is used as an immediate.
+pub(crate) enum FcmpImm {
+    Equal = 0x00,
+    LessThan = 0x01,
+    LessThanOrEqual = 0x02,
+    Unordered = 0x03,
+    NotEqual = 0x04,
+    UnorderedOrGreaterThanOrEqual = 0x05,
+    UnorderedOrGreaterThan = 0x06,
+    Ordered = 0x07,
+}
+
+impl FcmpImm {
+    pub(crate) fn encode(self) -> u8 {
+        self as u8
+    }
+}
+
+impl From<FloatCC> for FcmpImm {
+    fn from(cond: FloatCC) -> Self {
+        match cond {
+            FloatCC::Equal => FcmpImm::Equal,
+            FloatCC::LessThan => FcmpImm::LessThan,
+            FloatCC::LessThanOrEqual => FcmpImm::LessThanOrEqual,
+            FloatCC::Unordered => FcmpImm::Unordered,
+            FloatCC::NotEqual => FcmpImm::NotEqual,
+            FloatCC::UnorderedOrGreaterThanOrEqual => FcmpImm::UnorderedOrGreaterThanOrEqual,
+            FloatCC::UnorderedOrGreaterThan => FcmpImm::UnorderedOrGreaterThan,
+            FloatCC::Ordered => FcmpImm::Ordered,
+            _ => panic!("unable to create comparison predicate for {}", cond),
+        }
+    }
+}
+
 /// A branch target. Either unresolved (basic-block index) or resolved (offset
 /// from end of current instruction).
 #[derive(Clone, Copy, Debug)]
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -1717,6 +1717,28 @@ pub(crate) fn emit(
            sink.bind_label(done);
        }

+        Inst::XmmRmRImm { op, src, dst, imm } => {
+            let prefix = match op {
+                SseOpcode::Cmpps => LegacyPrefix::_66,
+                SseOpcode::Cmppd => LegacyPrefix::None,
+                SseOpcode::Cmpss => LegacyPrefix::_F3,
+                SseOpcode::Cmpsd => LegacyPrefix::_F2,
+                _ => unimplemented!("Opcode {:?} not implemented", op),
+            };
+            let opcode = 0x0FC2;
+            let rex = RexFlags::clear_w();
+            match src {
+                RegMem::Reg { reg } => {
+                    emit_std_reg_reg(sink, prefix, opcode, 2, dst.to_reg(), *reg, rex);
+                }
+                RegMem::Mem { addr } => {
+                    let addr = &addr.finalize(state);
+                    emit_std_reg_mem(sink, prefix, opcode, 2, dst.to_reg(), addr, rex);
+                }
+            }
+            sink.put1(*imm)
+        }
+
        Inst::Xmm_Mov_R_M {
            op,
            src,
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -309,6 +309,14 @@ pub enum Inst {
        dst: Reg,
    },

+    /// A binary XMM instruction with an 8-bit immediate: cmp (ps pd) imm (reg addr) reg
+    XmmRmRImm {
+        op: SseOpcode,
+        src: RegMem,
+        dst: Writable<Reg>,
+        imm: u8,
+    },
+
    // =====================================
    // Control flow instructions.
    /// Direct call: call simm32.
@@ -681,6 +689,13 @@ impl Inst {
        }
    }

+    pub(crate) fn xmm_rm_r_imm(op: SseOpcode, src: RegMem, dst: Writable<Reg>, imm: u8) -> Inst {
+        src.assert_regclass_is(RegClass::V128);
+        debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+        debug_assert!(imm < 8);
+        Inst::XmmRmRImm { op, src, dst, imm }
+    }
+
    pub(crate) fn movzx_rm_r(
        ext_mode: ExtMode,
        src: RegMem,
@@ -1055,6 +1070,14 @@ impl ShowWithRRU for Inst {
                show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8),
            ),

+            Inst::XmmRmRImm { op, src, dst, imm } => format!(
+                "{} ${}, {}, {}",
+                ljustify(op.to_string()),
+                imm,
+                src.show_rru(mb_rru),
+                dst.show_rru(mb_rru),
+            ),
+
            Inst::XmmToGpr {
                op,
                src,
@@ -1408,6 +1431,29 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            src.get_regs_as_uses(collector);
            collector.add_mod(*dst);
        }
+        Inst::XmmRmRImm { src, dst, op, imm } => {
+            // In certain cases, instructions of this format can act as a definition of an XMM
+            // register, producing a value that is independent of its initial value. For example,
+            // a vector equality comparison (`cmppd` or `cmpps`) that compares a register to itself
+            // will generate all ones as a result, regardless of its value. From the register
+            // allocator's point of view, we should (i) record the first register, which is normally
+            // a mod, as a def instread; and (ii) not record the second register as a use, because
+            // it is the same as the first register (already handled). TODO Re-factored in #2071.
+            let is_def = if let RegMem::Reg { reg } = src {
+                (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
+                    && *imm == FcmpImm::Equal.encode()
+                    && *reg == dst.to_reg()
+            } else {
+                false
+            };
+
+            if is_def {
+                collector.add_def(*dst);
+            } else {
+                src.get_regs_as_uses(collector);
+                collector.add_mod(*dst);
+            }
+        }
        Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => {
            collector.add_use(*lhs);
            collector.add_mod(*rhs_dst);
@@ -1650,6 +1696,35 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            src.map_uses(mapper);
            map_def(mapper, dst);
        }
+        Inst::XmmRmRImm {
+            ref mut src,
+            ref mut dst,
+            ref op,
+            ref imm,
+        } => {
+            // In certain cases, instructions of this format can convert an XMM register into a
+            // define (e.g. an equality comparison); this extra logic is necessary to inform the
+            // registry allocator of a different register usage. TODO Re-factored in #2071.
+            if let RegMem::Reg { reg } = src {
+                if (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
+                    && *imm == FcmpImm::Equal.encode()
+                    && *reg == dst.to_reg()
+                {
+                    let mut writable_src = Writable::from_reg(*reg);
+                    map_def(mapper, &mut writable_src);
+                    *reg = writable_src.to_reg();
+                    map_def(mapper, dst);
+                } else {
+                    // Otherwise, we map the instruction as usual.
+                    src.map_uses(mapper);
+                    map_mod(mapper, dst);
+                }
+            } else {
+                // TODO this is duplicated because there seems to be no way to join the `if let` and `if`?
+                src.map_uses(mapper);
+                map_mod(mapper, dst);
+            }
+        }
        Inst::XMM_RM_R {
            ref mut src,
            ref mut dst,
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -1043,7 +1043,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::F64const => {
-            // TODO use xorpd for 0
+            // TODO use xorpd for 0 and cmpeqpd for all 1s.
            let value = ctx.get_constant(insn).unwrap();
            let dst = output_to_reg(ctx, outputs[0]);
            for inst in Inst::gen_constant(dst, value, F64, |reg_class, ty| {
@@ -1054,7 +1054,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::F32const => {
-            // TODO use xorps for 0.
+            // TODO use xorps for 0 and cmpeqps for all 1s.
            let value = ctx.get_constant(insn).unwrap();
            let dst = output_to_reg(ctx, outputs[0]);
            for inst in Inst::gen_constant(dst, value, F32, |reg_class, ty| {