diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 2351ac8899..c80db41f09 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -12,6 +12,7 @@ use super::{ regs::{self, show_ireg_sized}, EmitState, }; +use core::fmt::Debug; /// A possible addressing mode (amode) that can be used in instructions. /// These denote a 64-bit value only. @@ -343,6 +344,8 @@ pub enum SseOpcode { Andnpd, Comiss, Comisd, + Cmpps, + Cmppd, Cmpss, Cmpsd, Cvtsd2ss, @@ -407,6 +410,9 @@ impl SseOpcode { | SseOpcode::Addss | SseOpcode::Andps | SseOpcode::Andnps + | SseOpcode::Comiss + | SseOpcode::Cmpps + | SseOpcode::Cmpss | SseOpcode::Cvtsi2ss | SseOpcode::Cvtss2si | SseOpcode::Cvttss2si @@ -429,14 +435,15 @@ impl SseOpcode { | SseOpcode::Subps | SseOpcode::Subss | SseOpcode::Ucomiss - | SseOpcode::Comiss - | SseOpcode::Cmpss | SseOpcode::Xorps => SSE, SseOpcode::Addpd | SseOpcode::Addsd | SseOpcode::Andpd | SseOpcode::Andnpd + | SseOpcode::Cmppd + | SseOpcode::Cmpsd + | SseOpcode::Comisd | SseOpcode::Cvtsd2ss | SseOpcode::Cvtsd2si | SseOpcode::Cvtsi2sd @@ -461,8 +468,6 @@ impl SseOpcode { | SseOpcode::Subpd | SseOpcode::Subsd | SseOpcode::Ucomisd - | SseOpcode::Comisd - | SseOpcode::Cmpsd | SseOpcode::Xorpd => SSE2, SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41, @@ -489,6 +494,10 @@ impl fmt::Debug for SseOpcode { SseOpcode::Andps => "andps", SseOpcode::Andnps => "andnps", SseOpcode::Andnpd => "andnpd", + SseOpcode::Cmpps => "cmpps", + SseOpcode::Cmppd => "cmppd", + SseOpcode::Cmpss => "cmpss", + SseOpcode::Cmpsd => "cmpsd", SseOpcode::Comiss => "comiss", SseOpcode::Comisd => "comisd", SseOpcode::Cvtsd2ss => "cvtsd2ss", @@ -503,6 +512,7 @@ impl fmt::Debug for SseOpcode { SseOpcode::Divpd => "divpd", SseOpcode::Divss => "divss", SseOpcode::Divsd => "divsd", + SseOpcode::Insertps => "insertps", SseOpcode::Maxps => "maxps", SseOpcode::Maxpd => "maxpd", SseOpcode::Maxss => "maxss", @@ -539,9 +549,6 @@ impl fmt::Debug for SseOpcode { SseOpcode::Subsd => "subsd", SseOpcode::Ucomiss => "ucomiss", SseOpcode::Ucomisd => "ucomisd", - SseOpcode::Cmpss => "cmpss", - SseOpcode::Cmpsd => "cmpsd", - SseOpcode::Insertps => "insertps", SseOpcode::Xorps => "xorps", SseOpcode::Xorpd => "xorpd", }; @@ -814,6 +821,42 @@ impl fmt::Display for CC { } } +/// Encode the ways that floats can be compared. This is used in float comparisons such as `cmpps`, +/// e.g.; it is distinguished from other float comparisons (e.g. `ucomiss`) in that those use EFLAGS +/// whereas [FcmpImm] is used as an immediate. +pub(crate) enum FcmpImm { + Equal = 0x00, + LessThan = 0x01, + LessThanOrEqual = 0x02, + Unordered = 0x03, + NotEqual = 0x04, + UnorderedOrGreaterThanOrEqual = 0x05, + UnorderedOrGreaterThan = 0x06, + Ordered = 0x07, +} + +impl FcmpImm { + pub(crate) fn encode(self) -> u8 { + self as u8 + } +} + +impl From for FcmpImm { + fn from(cond: FloatCC) -> Self { + match cond { + FloatCC::Equal => FcmpImm::Equal, + FloatCC::LessThan => FcmpImm::LessThan, + FloatCC::LessThanOrEqual => FcmpImm::LessThanOrEqual, + FloatCC::Unordered => FcmpImm::Unordered, + FloatCC::NotEqual => FcmpImm::NotEqual, + FloatCC::UnorderedOrGreaterThanOrEqual => FcmpImm::UnorderedOrGreaterThanOrEqual, + FloatCC::UnorderedOrGreaterThan => FcmpImm::UnorderedOrGreaterThan, + FloatCC::Ordered => FcmpImm::Ordered, + _ => panic!("unable to create comparison predicate for {}", cond), + } + } +} + /// A branch target. Either unresolved (basic-block index) or resolved (offset /// from end of current instruction). #[derive(Clone, Copy, Debug)] diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 42c8e9c77e..84075a9335 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1717,6 +1717,28 @@ pub(crate) fn emit( sink.bind_label(done); } + Inst::XmmRmRImm { op, src, dst, imm } => { + let prefix = match op { + SseOpcode::Cmpps => LegacyPrefix::_66, + SseOpcode::Cmppd => LegacyPrefix::None, + SseOpcode::Cmpss => LegacyPrefix::_F3, + SseOpcode::Cmpsd => LegacyPrefix::_F2, + _ => unimplemented!("Opcode {:?} not implemented", op), + }; + let opcode = 0x0FC2; + let rex = RexFlags::clear_w(); + match src { + RegMem::Reg { reg } => { + emit_std_reg_reg(sink, prefix, opcode, 2, dst.to_reg(), *reg, rex); + } + RegMem::Mem { addr } => { + let addr = &addr.finalize(state); + emit_std_reg_mem(sink, prefix, opcode, 2, dst.to_reg(), addr, rex); + } + } + sink.put1(*imm) + } + Inst::Xmm_Mov_R_M { op, src, diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index fd5037d295..818f73c887 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -309,6 +309,14 @@ pub enum Inst { dst: Reg, }, + /// A binary XMM instruction with an 8-bit immediate: cmp (ps pd) imm (reg addr) reg + XmmRmRImm { + op: SseOpcode, + src: RegMem, + dst: Writable, + imm: u8, + }, + // ===================================== // Control flow instructions. /// Direct call: call simm32. @@ -681,6 +689,13 @@ impl Inst { } } + pub(crate) fn xmm_rm_r_imm(op: SseOpcode, src: RegMem, dst: Writable, imm: u8) -> Inst { + src.assert_regclass_is(RegClass::V128); + debug_assert!(dst.to_reg().get_class() == RegClass::V128); + debug_assert!(imm < 8); + Inst::XmmRmRImm { op, src, dst, imm } + } + pub(crate) fn movzx_rm_r( ext_mode: ExtMode, src: RegMem, @@ -1055,6 +1070,14 @@ impl ShowWithRRU for Inst { show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8), ), + Inst::XmmRmRImm { op, src, dst, imm } => format!( + "{} ${}, {}, {}", + ljustify(op.to_string()), + imm, + src.show_rru(mb_rru), + dst.show_rru(mb_rru), + ), + Inst::XmmToGpr { op, src, @@ -1408,6 +1431,29 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { src.get_regs_as_uses(collector); collector.add_mod(*dst); } + Inst::XmmRmRImm { src, dst, op, imm } => { + // In certain cases, instructions of this format can act as a definition of an XMM + // register, producing a value that is independent of its initial value. For example, + // a vector equality comparison (`cmppd` or `cmpps`) that compares a register to itself + // will generate all ones as a result, regardless of its value. From the register + // allocator's point of view, we should (i) record the first register, which is normally + // a mod, as a def instread; and (ii) not record the second register as a use, because + // it is the same as the first register (already handled). TODO Re-factored in #2071. + let is_def = if let RegMem::Reg { reg } = src { + (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps) + && *imm == FcmpImm::Equal.encode() + && *reg == dst.to_reg() + } else { + false + }; + + if is_def { + collector.add_def(*dst); + } else { + src.get_regs_as_uses(collector); + collector.add_mod(*dst); + } + } Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => { collector.add_use(*lhs); collector.add_mod(*rhs_dst); @@ -1650,6 +1696,35 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { src.map_uses(mapper); map_def(mapper, dst); } + Inst::XmmRmRImm { + ref mut src, + ref mut dst, + ref op, + ref imm, + } => { + // In certain cases, instructions of this format can convert an XMM register into a + // define (e.g. an equality comparison); this extra logic is necessary to inform the + // registry allocator of a different register usage. TODO Re-factored in #2071. + if let RegMem::Reg { reg } = src { + if (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps) + && *imm == FcmpImm::Equal.encode() + && *reg == dst.to_reg() + { + let mut writable_src = Writable::from_reg(*reg); + map_def(mapper, &mut writable_src); + *reg = writable_src.to_reg(); + map_def(mapper, dst); + } else { + // Otherwise, we map the instruction as usual. + src.map_uses(mapper); + map_mod(mapper, dst); + } + } else { + // TODO this is duplicated because there seems to be no way to join the `if let` and `if`? + src.map_uses(mapper); + map_mod(mapper, dst); + } + } Inst::XMM_RM_R { ref mut src, ref mut dst, diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 1be2c53d86..6143df392f 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -1043,7 +1043,7 @@ fn lower_insn_to_regs>( } Opcode::F64const => { - // TODO use xorpd for 0 + // TODO use xorpd for 0 and cmpeqpd for all 1s. let value = ctx.get_constant(insn).unwrap(); let dst = output_to_reg(ctx, outputs[0]); for inst in Inst::gen_constant(dst, value, F64, |reg_class, ty| { @@ -1054,7 +1054,7 @@ fn lower_insn_to_regs>( } Opcode::F32const => { - // TODO use xorps for 0. + // TODO use xorps for 0 and cmpeqps for all 1s. let value = ctx.get_constant(insn).unwrap(); let dst = output_to_reg(ctx, outputs[0]); for inst in Inst::gen_constant(dst, value, F32, |reg_class, ty| {