diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index e624670a44..5697d2dbb5 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -5,7 +5,7 @@ use std::string::{String, ToString}; use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper}; -use crate::ir::condcodes::IntCC; +use crate::ir::condcodes::{FloatCC, IntCC}; use crate::machinst::*; use super::{ @@ -636,6 +636,12 @@ pub enum CC { LE = 14, /// > signed NLE = 15, + + /// parity + P = 10, + + /// not parity + NP = 11, } impl CC { @@ -678,6 +684,33 @@ impl CC { CC::LE => CC::NLE, CC::NLE => CC::LE, + + CC::P => CC::NP, + CC::NP => CC::P, + } + } + + pub(crate) fn from_floatcc(floatcc: FloatCC) -> Self { + match floatcc { + FloatCC::Ordered => CC::NP, + FloatCC::Unordered => CC::P, + // Alias for NE + FloatCC::NotEqual | FloatCC::OrderedNotEqual => CC::NZ, + // Alias for E + FloatCC::UnorderedOrEqual => CC::Z, + // Alias for A + FloatCC::GreaterThan => CC::NBE, + // Alias for AE + FloatCC::GreaterThanOrEqual => CC::NB, + FloatCC::UnorderedOrLessThan => CC::B, + FloatCC::UnorderedOrLessThanOrEqual => CC::BE, + FloatCC::Equal + | FloatCC::LessThan + | FloatCC::LessThanOrEqual + | FloatCC::UnorderedOrGreaterThan + | FloatCC::UnorderedOrGreaterThanOrEqual => unimplemented!( + "No single condition code to guarantee ordered. Treat as special case." + ), } } @@ -703,6 +736,8 @@ impl fmt::Debug for CC { CC::NL => "nl", CC::LE => "le", CC::NLE => "nle", + CC::P => "p", + CC::NP => "np", }; write!(fmt, "{}", name) } diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index d932682a28..2ff58a8844 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1603,6 +1603,25 @@ pub(crate) fn emit( } } + Inst::XMM_Cmp_RM_R { op, src, dst } => { + let rex = RexFlags::clear_w(); + let (prefix, opcode) = match op { + SseOpcode::Ucomisd => (LegacyPrefix::_66, 0x0F2E), + SseOpcode::Ucomiss => (LegacyPrefix::None, 0x0F2E), + _ => unimplemented!("Emit xmm cmp rm r"), + }; + + match src { + RegMem::Reg { reg } => { + emit_std_reg_reg(sink, prefix, opcode, 2, *dst, *reg, rex); + } + RegMem::Mem { addr } => { + let addr = &addr.finalize(state); + emit_std_reg_mem(sink, prefix, opcode, 2, *dst, addr, rex); + } + } + } + Inst::LoadExtName { dst, name, diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 655aad43bc..fc7f4869f5 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -2711,7 +2711,8 @@ fn test_x64_emit() { insns.push((Inst::setcc(CC::NLE, w_rsi), "400F9FC6", "setnle %sil")); insns.push((Inst::setcc(CC::Z, w_r14), "410F94C6", "setz %r14b")); insns.push((Inst::setcc(CC::LE, w_r14), "410F9EC6", "setle %r14b")); - + insns.push((Inst::setcc(CC::P, w_r9), "410F9AC1", "setp %r9b")); + insns.push((Inst::setcc(CC::NP, w_r8), "410F9BC0", "setnp %r8b")); // ======================================================== // Cmove insns.push(( @@ -2876,6 +2877,33 @@ fn test_x64_emit() { "jmp *321(%r10,%rdx,4)", )); + // ======================================================== + // XMM_CMP_RM_R + + insns.push(( + Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, RegMem::reg(xmm1), xmm2), + "0F2ED1", + "ucomiss %xmm1, %xmm2", + )); + + insns.push(( + Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, RegMem::reg(xmm0), xmm9), + "440F2EC8", + "ucomiss %xmm0, %xmm9", + )); + + insns.push(( + Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, RegMem::reg(xmm13), xmm4), + "66410F2EE5", + "ucomisd %xmm13, %xmm4", + )); + + insns.push(( + Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, RegMem::reg(xmm11), xmm12), + "66450F2EE3", + "ucomisd %xmm11, %xmm12", + )); + // ======================================================== // XMM_RM_R: float binary ops diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 5461f645f2..7b1e0dedfc 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -244,6 +244,13 @@ pub enum Inst { dst: Writable, }, + /// Float comparisons/tests: cmp (b w l q) (reg addr imm) reg. + XMM_Cmp_RM_R { + op: SseOpcode, + src: RegMem, + dst: Reg, + }, + // ===================================== // Control flow instructions. /// Direct call: call simm32. @@ -480,6 +487,12 @@ impl Inst { Inst::GprToXmm { op, src, dst } } + pub(crate) fn xmm_cmp_rm_r(op: SseOpcode, src: RegMem, dst: Reg) -> Inst { + //TODO:: Add assert_reg_type helper + debug_assert!(dst.get_class() == RegClass::V128); + Inst::XMM_Cmp_RM_R { op, src, dst } + } + pub(crate) fn movzx_rm_r( ext_mode: ExtMode, src: RegMem, @@ -859,6 +872,12 @@ impl ShowWithRRU for Inst { ) } + Inst::XMM_Cmp_RM_R { op, src, dst } => format!( + "{} {}, {}", + ljustify(op.to_string()), + src.show_rru_sized(mb_rru, 8), + show_ireg_sized(*dst, mb_rru, 8), + ), Inst::Imm_R { dst_is_64, simm64, @@ -1117,6 +1136,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_use(*src); dst.get_regs_as_uses(collector); } + Inst::XMM_Cmp_RM_R { src, dst, .. } => { + src.get_regs_as_uses(collector); + collector.add_use(*dst); + } Inst::Imm_R { dst, .. } => { collector.add_def(*dst); } @@ -1332,6 +1355,14 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { map_use(mapper, src); dst.map_uses(mapper); } + Inst::XMM_Cmp_RM_R { + ref mut src, + ref mut dst, + .. + } => { + src.map_uses(mapper); + map_use(mapper, dst); + } Inst::Imm_R { ref mut dst, .. } => map_def(mapper, dst), Inst::Mov_R_R { ref mut src, diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 44c6edd3c1..b7272c9a78 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -6,14 +6,14 @@ use log::trace; use regalloc::{Reg, RegClass, Writable}; use smallvec::SmallVec; -use alloc::boxed::Box; -use alloc::vec::Vec; -use std::convert::TryFrom; - use crate::ir::types; use crate::ir::types::*; use crate::ir::Inst as IRInst; -use crate::ir::{condcodes::IntCC, InstructionData, Opcode, TrapCode, Type}; +use crate::ir::{condcodes::FloatCC, condcodes::IntCC, InstructionData, Opcode, TrapCode, Type}; +use alloc::boxed::Box; +use alloc::vec::Vec; +use cranelift_codegen_shared::condcodes::CondCode; +use std::convert::TryFrom; use crate::machinst::lower::*; use crate::machinst::*; @@ -95,6 +95,16 @@ fn inst_condcode(data: &InstructionData) -> IntCC { } } +fn inst_fp_condcode(data: &InstructionData) -> Option { + match data { + &InstructionData::BranchFloat { cond, .. } + | &InstructionData::FloatCompare { cond, .. } + | &InstructionData::FloatCond { cond, .. } + | &InstructionData::FloatCondTrap { cond, .. } => Some(cond), + _ => None, + } +} + fn ldst_offset(data: &InstructionData) -> Option { match data { &InstructionData::Load { offset, .. } @@ -734,6 +744,77 @@ fn lower_insn_to_regs>( ctx.emit(Inst::setcc(cc, dst)); } + Opcode::Fcmp => { + let condcode = inst_fp_condcode(ctx.data(insn)).unwrap(); + let input_ty = ctx.input_ty(insn, 0); + let op = match input_ty { + F32 => SseOpcode::Ucomiss, + F64 => SseOpcode::Ucomisd, + _ => panic!("Bad input type to Fcmp"), + }; + + // Unordered is returned by setting ZF, PF, CF <- 111 + // Greater than by ZF, PF, CF <- 000 + // Less than by ZF, PF, CF <- 001 + // Equal by ZF, PF, CF <- 100 + // + // Checking the result of comiss is somewhat annoying because you don't + // have setcc instructions that explicitly check simultaneously for the condition + // (i.e. eq, le, gt, etc) and orderedness. So that might mean we need more + // than one setcc check and then a logical "and" or "or" to determine both. + // However knowing that if the parity bit is set, then the result was + // considered unordered and knowing that if the parity bit is set, then both + // the ZF and CF flag bits must also be set we can getaway with using one setcc + // for most condition codes. + match condcode { + // setb and setbe for ordered LessThan and LessThanOrEqual check if CF = 1 which + // doesn't exclude unorderdness. To get around this we can reverse the operands + // and the cc test to instead check if CF and ZF are 0 which would also excludes + // unorderedness. Using similiar logic we also reverse UnorderedOrGreaterThan and + // UnorderedOrGreaterThanOrEqual and assure that ZF or CF is 1 to exclude orderedness. + FloatCC::LessThan + | FloatCC::LessThanOrEqual + | FloatCC::UnorderedOrGreaterThan + | FloatCC::UnorderedOrGreaterThanOrEqual => { + let lhs = input_to_reg_mem(ctx, inputs[0]); + let rhs = input_to_reg(ctx, inputs[1]); + let dst = output_to_reg(ctx, outputs[0]); + ctx.emit(Inst::xmm_cmp_rm_r(op, lhs, rhs)); + let condcode = condcode.reverse(); + let cc = CC::from_floatcc(condcode); + ctx.emit(Inst::setcc(cc, dst)); + } + // Outlier case where we cannot get around checking the parity bit to determine + // if the result was ordered. + FloatCC::Equal => { + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg_mem(ctx, inputs[1]); + let dst = output_to_reg(ctx, outputs[0]); + let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32); + ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); + ctx.emit(Inst::setcc(CC::NP, tmp_gpr1)); + ctx.emit(Inst::setcc(CC::Z, dst)); + ctx.emit(Inst::alu_rmi_r( + false, + AluRmiROpcode::And, + RegMemImm::reg(tmp_gpr1.to_reg()), + dst, + )); + } + // For all remaining condition codes we can handle things with one check. Condition + // ordered NotEqual for example does not need a separate check for the parity bit because + // the setnz checks that the zero flag is 0 which is impossible with an unordered result. + _ => { + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg_mem(ctx, inputs[1]); + let dst = output_to_reg(ctx, outputs[0]); + let cc = CC::from_floatcc(condcode); + ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); + ctx.emit(Inst::setcc(cc, dst)); + } + } + } + Opcode::FallthroughReturn | Opcode::Return => { for i in 0..ctx.num_inputs(insn) { let src_reg = input_to_reg(ctx, inputs[i]);