Add support for 32 bit and 64 bit fcmp for the new backend

Implements commiss and commisd.
2020-07-10 18:04:16 -07:00
parent fbc05faa49
commit a7cedf3100
5 changed files with 201 additions and 7 deletions
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -5,7 +5,7 @@ use std::string::{String, ToString};
 use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper};
-use crate::ir::condcodes::IntCC;
+use crate::ir::condcodes::{FloatCC, IntCC};
 use crate::machinst::*;
 use super::{
@@ -636,6 +636,12 @@ pub enum CC {
    LE = 14,
    /// > signed
    NLE = 15,
    /// parity
    P = 10,
    /// not parity
    NP = 11,
 }
 impl CC {
@@ -678,6 +684,33 @@ impl CC {
            CC::LE => CC::NLE,
            CC::NLE => CC::LE,
            CC::P => CC::NP,
            CC::NP => CC::P,
        }
    }
    pub(crate) fn from_floatcc(floatcc: FloatCC) -> Self {
        match floatcc {
            FloatCC::Ordered => CC::NP,
            FloatCC::Unordered => CC::P,
            // Alias for NE
            FloatCC::NotEqual | FloatCC::OrderedNotEqual => CC::NZ,
            // Alias for E
            FloatCC::UnorderedOrEqual => CC::Z,
            // Alias for A
            FloatCC::GreaterThan => CC::NBE,
            // Alias for AE
            FloatCC::GreaterThanOrEqual => CC::NB,
            FloatCC::UnorderedOrLessThan => CC::B,
            FloatCC::UnorderedOrLessThanOrEqual => CC::BE,
            FloatCC::Equal
            | FloatCC::LessThan
            | FloatCC::LessThanOrEqual
            | FloatCC::UnorderedOrGreaterThan
            | FloatCC::UnorderedOrGreaterThanOrEqual => unimplemented!(
                "No single condition code to guarantee ordered. Treat as special case."
            ),
        }
    }
@@ -703,6 +736,8 @@ impl fmt::Debug for CC {
            CC::NL => "nl",
            CC::LE => "le",
            CC::NLE => "nle",
            CC::P => "p",
            CC::NP => "np",
        };
        write!(fmt, "{}", name)
    }
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -1603,6 +1603,25 @@ pub(crate) fn emit(
            }
        }
        Inst::XMM_Cmp_RM_R { op, src, dst } => {
            let rex = RexFlags::clear_w();
            let (prefix, opcode) = match op {
                SseOpcode::Ucomisd => (LegacyPrefix::_66, 0x0F2E),
                SseOpcode::Ucomiss => (LegacyPrefix::None, 0x0F2E),
                _ => unimplemented!("Emit xmm cmp rm r"),
            };
            match src {
                RegMem::Reg { reg } => {
                    emit_std_reg_reg(sink, prefix, opcode, 2, *dst, *reg, rex);
                }
                RegMem::Mem { addr } => {
                    let addr = &addr.finalize(state);
                    emit_std_reg_mem(sink, prefix, opcode, 2, *dst, addr, rex);
                }
            }
        }
        Inst::LoadExtName {
            dst,
            name,
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -2711,7 +2711,8 @@ fn test_x64_emit() {
    insns.push((Inst::setcc(CC::NLE, w_rsi), "400F9FC6", "setnle  %sil"));
    insns.push((Inst::setcc(CC::Z, w_r14), "410F94C6", "setz    %r14b"));
    insns.push((Inst::setcc(CC::LE, w_r14), "410F9EC6", "setle   %r14b"));
-
+    insns.push((Inst::setcc(CC::P, w_r9), "410F9AC1", "setp    %r9b"));
    insns.push((Inst::setcc(CC::NP, w_r8), "410F9BC0", "setnp   %r8b"));
    // ========================================================
    // Cmove
    insns.push((
@@ -2876,6 +2877,33 @@ fn test_x64_emit() {
        "jmp     *321(%r10,%rdx,4)",
    ));
    // ========================================================
    // XMM_CMP_RM_R
    insns.push((
        Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, RegMem::reg(xmm1), xmm2),
        "0F2ED1",
        "ucomiss %xmm1, %xmm2",
    ));
    insns.push((
        Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, RegMem::reg(xmm0), xmm9),
        "440F2EC8",
        "ucomiss %xmm0, %xmm9",
    ));
    insns.push((
        Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, RegMem::reg(xmm13), xmm4),
        "66410F2EE5",
        "ucomisd %xmm13, %xmm4",
    ));
    insns.push((
        Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, RegMem::reg(xmm11), xmm12),
        "66450F2EE3",
        "ucomisd %xmm11, %xmm12",
    ));
    // ========================================================
    // XMM_RM_R: float binary ops
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -244,6 +244,13 @@ pub enum Inst {
        dst: Writable<Reg>,
    },
    /// Float comparisons/tests: cmp (b w l q) (reg addr imm) reg.
    XMM_Cmp_RM_R {
        op: SseOpcode,
        src: RegMem,
        dst: Reg,
    },
    // =====================================
    // Control flow instructions.
    /// Direct call: call simm32.
@@ -480,6 +487,12 @@ impl Inst {
        Inst::GprToXmm { op, src, dst }
    }
    pub(crate) fn xmm_cmp_rm_r(op: SseOpcode, src: RegMem, dst: Reg) -> Inst {
        //TODO:: Add assert_reg_type helper
        debug_assert!(dst.get_class() == RegClass::V128);
        Inst::XMM_Cmp_RM_R { op, src, dst }
    }
    pub(crate) fn movzx_rm_r(
        ext_mode: ExtMode,
        src: RegMem,
@@ -859,6 +872,12 @@ impl ShowWithRRU for Inst {
                )
            }
            Inst::XMM_Cmp_RM_R { op, src, dst } => format!(
                "{} {}, {}",
                ljustify(op.to_string()),
                src.show_rru_sized(mb_rru, 8),
                show_ireg_sized(*dst, mb_rru, 8),
            ),
            Inst::Imm_R {
                dst_is_64,
                simm64,
@@ -1117,6 +1136,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_use(*src);
            dst.get_regs_as_uses(collector);
        }
        Inst::XMM_Cmp_RM_R { src, dst, .. } => {
            src.get_regs_as_uses(collector);
            collector.add_use(*dst);
        }
        Inst::Imm_R { dst, .. } => {
            collector.add_def(*dst);
        }
@@ -1332,6 +1355,14 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_use(mapper, src);
            dst.map_uses(mapper);
        }
        Inst::XMM_Cmp_RM_R {
            ref mut src,
            ref mut dst,
            ..
        } => {
            src.map_uses(mapper);
            map_use(mapper, dst);
        }
        Inst::Imm_R { ref mut dst, .. } => map_def(mapper, dst),
        Inst::Mov_R_R {
            ref mut src,
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -6,14 +6,14 @@ use log::trace;
 use regalloc::{Reg, RegClass, Writable};
 use smallvec::SmallVec;
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use std::convert::TryFrom;
 use crate::ir::types;
 use crate::ir::types::*;
 use crate::ir::Inst as IRInst;
-use crate::ir::{condcodes::IntCC, InstructionData, Opcode, TrapCode, Type};
+use crate::ir::{condcodes::FloatCC, condcodes::IntCC, InstructionData, Opcode, TrapCode, Type};
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use cranelift_codegen_shared::condcodes::CondCode;
 use std::convert::TryFrom;
 use crate::machinst::lower::*;
 use crate::machinst::*;
@@ -95,6 +95,16 @@ fn inst_condcode(data: &InstructionData) -> IntCC {
    }
 }
 fn inst_fp_condcode(data: &InstructionData) -> Option<FloatCC> {
    match data {
        &InstructionData::BranchFloat { cond, .. }
        | &InstructionData::FloatCompare { cond, .. }
        | &InstructionData::FloatCond { cond, .. }
        | &InstructionData::FloatCondTrap { cond, .. } => Some(cond),
        _ => None,
    }
 }
 fn ldst_offset(data: &InstructionData) -> Option<i32> {
    match data {
        &InstructionData::Load { offset, .. }
@@ -734,6 +744,77 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            ctx.emit(Inst::setcc(cc, dst));
        }
        Opcode::Fcmp => {
            let condcode = inst_fp_condcode(ctx.data(insn)).unwrap();
            let input_ty = ctx.input_ty(insn, 0);
            let op = match input_ty {
                F32 => SseOpcode::Ucomiss,
                F64 => SseOpcode::Ucomisd,
                _ => panic!("Bad input type to Fcmp"),
            };
            // Unordered is returned by setting ZF, PF, CF <- 111
            // Greater than by ZF, PF, CF <- 000
            // Less than by ZF, PF, CF <- 001
            // Equal by ZF, PF, CF <- 100
            //
            // Checking the result of comiss is somewhat annoying because you don't
            // have setcc instructions that explicitly check simultaneously for the condition
            // (i.e. eq, le, gt, etc) and orderedness. So that might mean we need more
            // than one setcc check and then a logical "and" or "or" to determine both.
            // However knowing that if the parity bit is set, then the result was
            // considered unordered and knowing that if the parity bit is set, then both
            // the ZF and CF flag bits must also be set we can getaway with using one setcc
            // for most condition codes.
            match condcode {
                // setb and setbe for ordered LessThan and LessThanOrEqual check if CF = 1 which
                // doesn't exclude unorderdness. To get around this we can reverse the operands
                // and the cc test to instead check if CF and ZF are 0 which would also excludes
                // unorderedness. Using similiar logic we also reverse UnorderedOrGreaterThan and
                // UnorderedOrGreaterThanOrEqual and assure that ZF or CF is 1 to exclude orderedness.
                FloatCC::LessThan
                | FloatCC::LessThanOrEqual
                | FloatCC::UnorderedOrGreaterThan
                | FloatCC::UnorderedOrGreaterThanOrEqual => {
                    let lhs = input_to_reg_mem(ctx, inputs[0]);
                    let rhs = input_to_reg(ctx, inputs[1]);
                    let dst = output_to_reg(ctx, outputs[0]);
                    ctx.emit(Inst::xmm_cmp_rm_r(op, lhs, rhs));
                    let condcode = condcode.reverse();
                    let cc = CC::from_floatcc(condcode);
                    ctx.emit(Inst::setcc(cc, dst));
                }
                // Outlier case where we cannot get around checking the parity bit to determine
                // if the result was ordered.
                FloatCC::Equal => {
                    let lhs = input_to_reg(ctx, inputs[0]);
                    let rhs = input_to_reg_mem(ctx, inputs[1]);
                    let dst = output_to_reg(ctx, outputs[0]);
                    let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32);
                    ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
                    ctx.emit(Inst::setcc(CC::NP, tmp_gpr1));
                    ctx.emit(Inst::setcc(CC::Z, dst));
                    ctx.emit(Inst::alu_rmi_r(
                        false,
                        AluRmiROpcode::And,
                        RegMemImm::reg(tmp_gpr1.to_reg()),
                        dst,
                    ));
                }
                // For all remaining condition codes we can handle things with one check. Condition
                // ordered NotEqual for example does not need a separate check for the parity bit because
                // the setnz checks that the zero flag is 0 which is impossible with an unordered result.
                _ => {
                    let lhs = input_to_reg(ctx, inputs[0]);
                    let rhs = input_to_reg_mem(ctx, inputs[1]);
                    let dst = output_to_reg(ctx, outputs[0]);
                    let cc = CC::from_floatcc(condcode);
                    ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
                    ctx.emit(Inst::setcc(cc, dst));
                }
            }
        }
        Opcode::FallthroughReturn | Opcode::Return => {
            for i in 0..ctx.num_inputs(insn) {
                let src_reg = input_to_reg(ctx, inputs[i]);