machinst x64: implement floating point comparisons

Note that this fixes an encoding issue in which the packed single and packed double prefixes were flipped.
2020-08-04 11:12:01 -07:00
parent 3d2e0e55f2
commit 8cfff26957
4 changed files with 142 additions and 82 deletions
--- a/build.rs
+++ b/build.rs
@@ -182,7 +182,9 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str
    match (testsuite, testname) {
        ("simd", "simd_address") => return false,
        ("simd", "simd_f32x4_arith") => return false,
        ("simd", "simd_f32x4_cmp") => return false,
        ("simd", "simd_f64x2_arith") => return false,
        ("simd", "simd_f64x2_cmp") => return false,
        ("simd", "simd_store") => return false,
        ("simd", _) => return true,
        _ => {}
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -1767,8 +1767,8 @@ pub(crate) fn emit(
        Inst::XmmRmRImm { op, src, dst, imm } => {
            let prefix = match op {
-                SseOpcode::Cmpps => LegacyPrefix::_66,
+                SseOpcode::Cmpps => LegacyPrefix::None,
-                SseOpcode::Cmppd => LegacyPrefix::None,
+                SseOpcode::Cmppd => LegacyPrefix::_66,
                SseOpcode::Cmpss => LegacyPrefix::_F3,
                SseOpcode::Cmpsd => LegacyPrefix::_F2,
                _ => unimplemented!("Opcode {:?} not implemented", op),
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -3190,6 +3190,19 @@ fn test_x64_emit() {
        "psrlq   $1, %xmm3",
    ));
    // ========================================================
    // XmmRmRImm
    insns.push((
        Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2),
        "660FC2CD02",
        "cmppd   $2, %xmm5, %xmm1",
    ));
    insns.push((
        Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0),
        "410FC2FF00",
        "cmpps   $0, %xmm15, %xmm7",
    ));
    // ========================================================
    // Misc instructions.
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -855,10 +855,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::Fcmp => {
            let condcode = inst_fp_condcode(ctx.data(insn)).unwrap();
            let input_ty = ctx.input_ty(insn, 0);
            if !input_ty.is_vector() {
                let op = match input_ty {
                    F32 => SseOpcode::Ucomiss,
                    F64 => SseOpcode::Ucomisd,
-                _ => panic!("Bad input type to Fcmp"),
+                    _ => panic!("Bad input type to fcmp: {}", input_ty),
                };
                // Unordered is returned by setting ZF, PF, CF <- 111
@@ -942,6 +943,50 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        ctx.emit(Inst::setcc(cc, dst));
                    }
                }
            } else {
                let op = match input_ty {
                    types::F32X4 => SseOpcode::Cmpps,
                    types::F64X2 => SseOpcode::Cmppd,
                    _ => panic!("Bad input type to fcmp: {}", input_ty),
                };
                // Since some packed comparisons are not available, some of the condition codes
                // must be inverted, with a corresponding `flip` of the operands.
                let (imm, flip) = match condcode {
                    FloatCC::GreaterThan => (FcmpImm::LessThan, true),
                    FloatCC::GreaterThanOrEqual => (FcmpImm::LessThanOrEqual, true),
                    FloatCC::UnorderedOrLessThan => (FcmpImm::UnorderedOrGreaterThan, true),
                    FloatCC::UnorderedOrLessThanOrEqual => {
                        (FcmpImm::UnorderedOrGreaterThanOrEqual, true)
                    }
                    FloatCC::OrderedNotEqual | FloatCC::UnorderedOrEqual => {
                        panic!("unsupported float condition code: {}", condcode)
                    }
                    _ => (FcmpImm::from(condcode), false),
                };
                // Determine the operands of the comparison, possibly by flipping them.
                let (lhs, rhs) = if flip {
                    (
                        input_to_reg(ctx, inputs[1]),
                        input_to_reg_mem(ctx, inputs[0]),
                    )
                } else {
                    (
                        input_to_reg(ctx, inputs[0]),
                        input_to_reg_mem(ctx, inputs[1]),
                    )
                };
                // Move the `lhs` to the same register as `dst`; this may not emit an actual move
                // but ensures that the registers are the same to match x86's read-write operand
                // encoding.
                let dst = output_to_reg(ctx, outputs[0]);
                ctx.emit(Inst::gen_move(dst, lhs, input_ty));
                // Emit the comparison.
                ctx.emit(Inst::xmm_rm_r_imm(op, rhs, dst, imm.encode()));
            }
        }
        Opcode::FallthroughReturn | Opcode::Return => {