From 8cfff2695713bce54c465a16005663aa143c7385 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 4 Aug 2020 11:12:01 -0700 Subject: [PATCH] machinst x64: implement floating point comparisons Note that this fixes an encoding issue in which the packed single and packed double prefixes were flipped. --- build.rs | 2 + cranelift/codegen/src/isa/x64/inst/emit.rs | 4 +- .../codegen/src/isa/x64/inst/emit_tests.rs | 13 ++ cranelift/codegen/src/isa/x64/lower.rs | 205 +++++++++++------- 4 files changed, 142 insertions(+), 82 deletions(-) diff --git a/build.rs b/build.rs index 200f532b2a..42e152604a 100644 --- a/build.rs +++ b/build.rs @@ -182,7 +182,9 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str match (testsuite, testname) { ("simd", "simd_address") => return false, ("simd", "simd_f32x4_arith") => return false, + ("simd", "simd_f32x4_cmp") => return false, ("simd", "simd_f64x2_arith") => return false, + ("simd", "simd_f64x2_cmp") => return false, ("simd", "simd_store") => return false, ("simd", _) => return true, _ => {} diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 26fd5a8947..764d34af6d 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1767,8 +1767,8 @@ pub(crate) fn emit( Inst::XmmRmRImm { op, src, dst, imm } => { let prefix = match op { - SseOpcode::Cmpps => LegacyPrefix::_66, - SseOpcode::Cmppd => LegacyPrefix::None, + SseOpcode::Cmpps => LegacyPrefix::None, + SseOpcode::Cmppd => LegacyPrefix::_66, SseOpcode::Cmpss => LegacyPrefix::_F3, SseOpcode::Cmpsd => LegacyPrefix::_F2, _ => unimplemented!("Opcode {:?} not implemented", op), diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index b966398804..e0f26c1966 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3190,6 +3190,19 @@ fn test_x64_emit() { "psrlq $1, %xmm3", )); + // ======================================================== + // XmmRmRImm + insns.push(( + Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2), + "660FC2CD02", + "cmppd $2, %xmm5, %xmm1", + )); + insns.push(( + Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0), + "410FC2FF00", + "cmpps $0, %xmm15, %xmm7", + )); + // ======================================================== // Misc instructions. diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 2d4a22933e..b84680245c 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -855,92 +855,137 @@ fn lower_insn_to_regs>( Opcode::Fcmp => { let condcode = inst_fp_condcode(ctx.data(insn)).unwrap(); let input_ty = ctx.input_ty(insn, 0); - let op = match input_ty { - F32 => SseOpcode::Ucomiss, - F64 => SseOpcode::Ucomisd, - _ => panic!("Bad input type to Fcmp"), - }; + if !input_ty.is_vector() { + let op = match input_ty { + F32 => SseOpcode::Ucomiss, + F64 => SseOpcode::Ucomisd, + _ => panic!("Bad input type to fcmp: {}", input_ty), + }; - // Unordered is returned by setting ZF, PF, CF <- 111 - // Greater than by ZF, PF, CF <- 000 - // Less than by ZF, PF, CF <- 001 - // Equal by ZF, PF, CF <- 100 - // - // Checking the result of comiss is somewhat annoying because you don't have setcc - // instructions that explicitly check simultaneously for the condition (i.e. eq, le, - // gt, etc) *and* orderedness. - // - // So that might mean we need more than one setcc check and then a logical "and" or - // "or" to determine both, in some cases. However knowing that if the parity bit is - // set, then the result was considered unordered and knowing that if the parity bit is - // set, then both the ZF and CF flag bits must also be set we can get away with using - // one setcc for most condition codes. + // Unordered is returned by setting ZF, PF, CF <- 111 + // Greater than by ZF, PF, CF <- 000 + // Less than by ZF, PF, CF <- 001 + // Equal by ZF, PF, CF <- 100 + // + // Checking the result of comiss is somewhat annoying because you don't have setcc + // instructions that explicitly check simultaneously for the condition (i.e. eq, le, + // gt, etc) *and* orderedness. + // + // So that might mean we need more than one setcc check and then a logical "and" or + // "or" to determine both, in some cases. However knowing that if the parity bit is + // set, then the result was considered unordered and knowing that if the parity bit is + // set, then both the ZF and CF flag bits must also be set we can get away with using + // one setcc for most condition codes. - match condcode { - FloatCC::LessThan - | FloatCC::LessThanOrEqual - | FloatCC::UnorderedOrGreaterThan - | FloatCC::UnorderedOrGreaterThanOrEqual => { - // setb and setbe for ordered LessThan and LessThanOrEqual check if CF = 1 - // which doesn't exclude unorderdness. To get around this we can reverse the - // operands and the cc test to instead check if CF and ZF are 0 which would - // also excludes unorderedness. Using similiar logic we also reverse - // UnorderedOrGreaterThan and UnorderedOrGreaterThanOrEqual and assure that ZF - // or CF is 1 to exclude orderedness. - let lhs = input_to_reg_mem(ctx, inputs[0]); - let rhs = input_to_reg(ctx, inputs[1]); - let dst = output_to_reg(ctx, outputs[0]); - ctx.emit(Inst::xmm_cmp_rm_r(op, lhs, rhs)); - let condcode = condcode.reverse(); - let cc = CC::from_floatcc(condcode); - ctx.emit(Inst::setcc(cc, dst)); + match condcode { + FloatCC::LessThan + | FloatCC::LessThanOrEqual + | FloatCC::UnorderedOrGreaterThan + | FloatCC::UnorderedOrGreaterThanOrEqual => { + // setb and setbe for ordered LessThan and LessThanOrEqual check if CF = 1 + // which doesn't exclude unorderdness. To get around this we can reverse the + // operands and the cc test to instead check if CF and ZF are 0 which would + // also excludes unorderedness. Using similiar logic we also reverse + // UnorderedOrGreaterThan and UnorderedOrGreaterThanOrEqual and assure that ZF + // or CF is 1 to exclude orderedness. + let lhs = input_to_reg_mem(ctx, inputs[0]); + let rhs = input_to_reg(ctx, inputs[1]); + let dst = output_to_reg(ctx, outputs[0]); + ctx.emit(Inst::xmm_cmp_rm_r(op, lhs, rhs)); + let condcode = condcode.reverse(); + let cc = CC::from_floatcc(condcode); + ctx.emit(Inst::setcc(cc, dst)); + } + + FloatCC::Equal => { + // Outlier case: equal means both the operands are ordered and equal; we cannot + // get around checking the parity bit to determine if the result was ordered. + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg_mem(ctx, inputs[1]); + let dst = output_to_reg(ctx, outputs[0]); + let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32); + ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); + ctx.emit(Inst::setcc(CC::NP, tmp_gpr1)); + ctx.emit(Inst::setcc(CC::Z, dst)); + ctx.emit(Inst::alu_rmi_r( + false, + AluRmiROpcode::And, + RegMemImm::reg(tmp_gpr1.to_reg()), + dst, + )); + } + + FloatCC::NotEqual => { + // Outlier case: not equal means either the operands are unordered, or they're + // not the same value. + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg_mem(ctx, inputs[1]); + let dst = output_to_reg(ctx, outputs[0]); + let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32); + ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); + ctx.emit(Inst::setcc(CC::P, tmp_gpr1)); + ctx.emit(Inst::setcc(CC::NZ, dst)); + ctx.emit(Inst::alu_rmi_r( + false, + AluRmiROpcode::Or, + RegMemImm::reg(tmp_gpr1.to_reg()), + dst, + )); + } + + _ => { + // For all remaining condition codes we can handle things with one check. + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg_mem(ctx, inputs[1]); + let dst = output_to_reg(ctx, outputs[0]); + let cc = CC::from_floatcc(condcode); + ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); + ctx.emit(Inst::setcc(cc, dst)); + } } + } else { + let op = match input_ty { + types::F32X4 => SseOpcode::Cmpps, + types::F64X2 => SseOpcode::Cmppd, + _ => panic!("Bad input type to fcmp: {}", input_ty), + }; - FloatCC::Equal => { - // Outlier case: equal means both the operands are ordered and equal; we cannot - // get around checking the parity bit to determine if the result was ordered. - let lhs = input_to_reg(ctx, inputs[0]); - let rhs = input_to_reg_mem(ctx, inputs[1]); - let dst = output_to_reg(ctx, outputs[0]); - let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32); - ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); - ctx.emit(Inst::setcc(CC::NP, tmp_gpr1)); - ctx.emit(Inst::setcc(CC::Z, dst)); - ctx.emit(Inst::alu_rmi_r( - false, - AluRmiROpcode::And, - RegMemImm::reg(tmp_gpr1.to_reg()), - dst, - )); - } + // Since some packed comparisons are not available, some of the condition codes + // must be inverted, with a corresponding `flip` of the operands. + let (imm, flip) = match condcode { + FloatCC::GreaterThan => (FcmpImm::LessThan, true), + FloatCC::GreaterThanOrEqual => (FcmpImm::LessThanOrEqual, true), + FloatCC::UnorderedOrLessThan => (FcmpImm::UnorderedOrGreaterThan, true), + FloatCC::UnorderedOrLessThanOrEqual => { + (FcmpImm::UnorderedOrGreaterThanOrEqual, true) + } + FloatCC::OrderedNotEqual | FloatCC::UnorderedOrEqual => { + panic!("unsupported float condition code: {}", condcode) + } + _ => (FcmpImm::from(condcode), false), + }; - FloatCC::NotEqual => { - // Outlier case: not equal means either the operands are unordered, or they're - // not the same value. - let lhs = input_to_reg(ctx, inputs[0]); - let rhs = input_to_reg_mem(ctx, inputs[1]); - let dst = output_to_reg(ctx, outputs[0]); - let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32); - ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); - ctx.emit(Inst::setcc(CC::P, tmp_gpr1)); - ctx.emit(Inst::setcc(CC::NZ, dst)); - ctx.emit(Inst::alu_rmi_r( - false, - AluRmiROpcode::Or, - RegMemImm::reg(tmp_gpr1.to_reg()), - dst, - )); - } + // Determine the operands of the comparison, possibly by flipping them. + let (lhs, rhs) = if flip { + ( + input_to_reg(ctx, inputs[1]), + input_to_reg_mem(ctx, inputs[0]), + ) + } else { + ( + input_to_reg(ctx, inputs[0]), + input_to_reg_mem(ctx, inputs[1]), + ) + }; - _ => { - // For all remaining condition codes we can handle things with one check. - let lhs = input_to_reg(ctx, inputs[0]); - let rhs = input_to_reg_mem(ctx, inputs[1]); - let dst = output_to_reg(ctx, outputs[0]); - let cc = CC::from_floatcc(condcode); - ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); - ctx.emit(Inst::setcc(cc, dst)); - } + // Move the `lhs` to the same register as `dst`; this may not emit an actual move + // but ensures that the registers are the same to match x86's read-write operand + // encoding. + let dst = output_to_reg(ctx, outputs[0]); + ctx.emit(Inst::gen_move(dst, lhs, input_ty)); + + // Emit the comparison. + ctx.emit(Inst::xmm_rm_r_imm(op, rhs, dst, imm.encode())); } }