machinst x64: implement floating point comparisons

Note that this fixes an encoding issue in which the packed single and packed double prefixes were flipped.
This commit is contained in:
Andrew Brown
2020-08-04 11:12:01 -07:00
parent 3d2e0e55f2
commit 8cfff26957
4 changed files with 142 additions and 82 deletions

View File

@@ -182,7 +182,9 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str
match (testsuite, testname) { match (testsuite, testname) {
("simd", "simd_address") => return false, ("simd", "simd_address") => return false,
("simd", "simd_f32x4_arith") => return false, ("simd", "simd_f32x4_arith") => return false,
("simd", "simd_f32x4_cmp") => return false,
("simd", "simd_f64x2_arith") => return false, ("simd", "simd_f64x2_arith") => return false,
("simd", "simd_f64x2_cmp") => return false,
("simd", "simd_store") => return false, ("simd", "simd_store") => return false,
("simd", _) => return true, ("simd", _) => return true,
_ => {} _ => {}

View File

@@ -1767,8 +1767,8 @@ pub(crate) fn emit(
Inst::XmmRmRImm { op, src, dst, imm } => { Inst::XmmRmRImm { op, src, dst, imm } => {
let prefix = match op { let prefix = match op {
SseOpcode::Cmpps => LegacyPrefix::_66, SseOpcode::Cmpps => LegacyPrefix::None,
SseOpcode::Cmppd => LegacyPrefix::None, SseOpcode::Cmppd => LegacyPrefix::_66,
SseOpcode::Cmpss => LegacyPrefix::_F3, SseOpcode::Cmpss => LegacyPrefix::_F3,
SseOpcode::Cmpsd => LegacyPrefix::_F2, SseOpcode::Cmpsd => LegacyPrefix::_F2,
_ => unimplemented!("Opcode {:?} not implemented", op), _ => unimplemented!("Opcode {:?} not implemented", op),

View File

@@ -3190,6 +3190,19 @@ fn test_x64_emit() {
"psrlq $1, %xmm3", "psrlq $1, %xmm3",
)); ));
// ========================================================
// XmmRmRImm
insns.push((
Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2),
"660FC2CD02",
"cmppd $2, %xmm5, %xmm1",
));
insns.push((
Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0),
"410FC2FF00",
"cmpps $0, %xmm15, %xmm7",
));
// ======================================================== // ========================================================
// Misc instructions. // Misc instructions.

View File

@@ -855,92 +855,137 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Fcmp => { Opcode::Fcmp => {
let condcode = inst_fp_condcode(ctx.data(insn)).unwrap(); let condcode = inst_fp_condcode(ctx.data(insn)).unwrap();
let input_ty = ctx.input_ty(insn, 0); let input_ty = ctx.input_ty(insn, 0);
let op = match input_ty { if !input_ty.is_vector() {
F32 => SseOpcode::Ucomiss, let op = match input_ty {
F64 => SseOpcode::Ucomisd, F32 => SseOpcode::Ucomiss,
_ => panic!("Bad input type to Fcmp"), F64 => SseOpcode::Ucomisd,
}; _ => panic!("Bad input type to fcmp: {}", input_ty),
};
// Unordered is returned by setting ZF, PF, CF <- 111 // Unordered is returned by setting ZF, PF, CF <- 111
// Greater than by ZF, PF, CF <- 000 // Greater than by ZF, PF, CF <- 000
// Less than by ZF, PF, CF <- 001 // Less than by ZF, PF, CF <- 001
// Equal by ZF, PF, CF <- 100 // Equal by ZF, PF, CF <- 100
// //
// Checking the result of comiss is somewhat annoying because you don't have setcc // Checking the result of comiss is somewhat annoying because you don't have setcc
// instructions that explicitly check simultaneously for the condition (i.e. eq, le, // instructions that explicitly check simultaneously for the condition (i.e. eq, le,
// gt, etc) *and* orderedness. // gt, etc) *and* orderedness.
// //
// So that might mean we need more than one setcc check and then a logical "and" or // So that might mean we need more than one setcc check and then a logical "and" or
// "or" to determine both, in some cases. However knowing that if the parity bit is // "or" to determine both, in some cases. However knowing that if the parity bit is
// set, then the result was considered unordered and knowing that if the parity bit is // set, then the result was considered unordered and knowing that if the parity bit is
// set, then both the ZF and CF flag bits must also be set we can get away with using // set, then both the ZF and CF flag bits must also be set we can get away with using
// one setcc for most condition codes. // one setcc for most condition codes.
match condcode { match condcode {
FloatCC::LessThan FloatCC::LessThan
| FloatCC::LessThanOrEqual | FloatCC::LessThanOrEqual
| FloatCC::UnorderedOrGreaterThan | FloatCC::UnorderedOrGreaterThan
| FloatCC::UnorderedOrGreaterThanOrEqual => { | FloatCC::UnorderedOrGreaterThanOrEqual => {
// setb and setbe for ordered LessThan and LessThanOrEqual check if CF = 1 // setb and setbe for ordered LessThan and LessThanOrEqual check if CF = 1
// which doesn't exclude unorderdness. To get around this we can reverse the // which doesn't exclude unorderdness. To get around this we can reverse the
// operands and the cc test to instead check if CF and ZF are 0 which would // operands and the cc test to instead check if CF and ZF are 0 which would
// also excludes unorderedness. Using similiar logic we also reverse // also excludes unorderedness. Using similiar logic we also reverse
// UnorderedOrGreaterThan and UnorderedOrGreaterThanOrEqual and assure that ZF // UnorderedOrGreaterThan and UnorderedOrGreaterThanOrEqual and assure that ZF
// or CF is 1 to exclude orderedness. // or CF is 1 to exclude orderedness.
let lhs = input_to_reg_mem(ctx, inputs[0]); let lhs = input_to_reg_mem(ctx, inputs[0]);
let rhs = input_to_reg(ctx, inputs[1]); let rhs = input_to_reg(ctx, inputs[1]);
let dst = output_to_reg(ctx, outputs[0]); let dst = output_to_reg(ctx, outputs[0]);
ctx.emit(Inst::xmm_cmp_rm_r(op, lhs, rhs)); ctx.emit(Inst::xmm_cmp_rm_r(op, lhs, rhs));
let condcode = condcode.reverse(); let condcode = condcode.reverse();
let cc = CC::from_floatcc(condcode); let cc = CC::from_floatcc(condcode);
ctx.emit(Inst::setcc(cc, dst)); ctx.emit(Inst::setcc(cc, dst));
}
FloatCC::Equal => {
// Outlier case: equal means both the operands are ordered and equal; we cannot
// get around checking the parity bit to determine if the result was ordered.
let lhs = input_to_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
let dst = output_to_reg(ctx, outputs[0]);
let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
ctx.emit(Inst::setcc(CC::NP, tmp_gpr1));
ctx.emit(Inst::setcc(CC::Z, dst));
ctx.emit(Inst::alu_rmi_r(
false,
AluRmiROpcode::And,
RegMemImm::reg(tmp_gpr1.to_reg()),
dst,
));
}
FloatCC::NotEqual => {
// Outlier case: not equal means either the operands are unordered, or they're
// not the same value.
let lhs = input_to_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
let dst = output_to_reg(ctx, outputs[0]);
let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
ctx.emit(Inst::setcc(CC::P, tmp_gpr1));
ctx.emit(Inst::setcc(CC::NZ, dst));
ctx.emit(Inst::alu_rmi_r(
false,
AluRmiROpcode::Or,
RegMemImm::reg(tmp_gpr1.to_reg()),
dst,
));
}
_ => {
// For all remaining condition codes we can handle things with one check.
let lhs = input_to_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
let dst = output_to_reg(ctx, outputs[0]);
let cc = CC::from_floatcc(condcode);
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
ctx.emit(Inst::setcc(cc, dst));
}
} }
} else {
let op = match input_ty {
types::F32X4 => SseOpcode::Cmpps,
types::F64X2 => SseOpcode::Cmppd,
_ => panic!("Bad input type to fcmp: {}", input_ty),
};
FloatCC::Equal => { // Since some packed comparisons are not available, some of the condition codes
// Outlier case: equal means both the operands are ordered and equal; we cannot // must be inverted, with a corresponding `flip` of the operands.
// get around checking the parity bit to determine if the result was ordered. let (imm, flip) = match condcode {
let lhs = input_to_reg(ctx, inputs[0]); FloatCC::GreaterThan => (FcmpImm::LessThan, true),
let rhs = input_to_reg_mem(ctx, inputs[1]); FloatCC::GreaterThanOrEqual => (FcmpImm::LessThanOrEqual, true),
let dst = output_to_reg(ctx, outputs[0]); FloatCC::UnorderedOrLessThan => (FcmpImm::UnorderedOrGreaterThan, true),
let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32); FloatCC::UnorderedOrLessThanOrEqual => {
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); (FcmpImm::UnorderedOrGreaterThanOrEqual, true)
ctx.emit(Inst::setcc(CC::NP, tmp_gpr1)); }
ctx.emit(Inst::setcc(CC::Z, dst)); FloatCC::OrderedNotEqual | FloatCC::UnorderedOrEqual => {
ctx.emit(Inst::alu_rmi_r( panic!("unsupported float condition code: {}", condcode)
false, }
AluRmiROpcode::And, _ => (FcmpImm::from(condcode), false),
RegMemImm::reg(tmp_gpr1.to_reg()), };
dst,
));
}
FloatCC::NotEqual => { // Determine the operands of the comparison, possibly by flipping them.
// Outlier case: not equal means either the operands are unordered, or they're let (lhs, rhs) = if flip {
// not the same value. (
let lhs = input_to_reg(ctx, inputs[0]); input_to_reg(ctx, inputs[1]),
let rhs = input_to_reg_mem(ctx, inputs[1]); input_to_reg_mem(ctx, inputs[0]),
let dst = output_to_reg(ctx, outputs[0]); )
let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32); } else {
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); (
ctx.emit(Inst::setcc(CC::P, tmp_gpr1)); input_to_reg(ctx, inputs[0]),
ctx.emit(Inst::setcc(CC::NZ, dst)); input_to_reg_mem(ctx, inputs[1]),
ctx.emit(Inst::alu_rmi_r( )
false, };
AluRmiROpcode::Or,
RegMemImm::reg(tmp_gpr1.to_reg()),
dst,
));
}
_ => { // Move the `lhs` to the same register as `dst`; this may not emit an actual move
// For all remaining condition codes we can handle things with one check. // but ensures that the registers are the same to match x86's read-write operand
let lhs = input_to_reg(ctx, inputs[0]); // encoding.
let rhs = input_to_reg_mem(ctx, inputs[1]); let dst = output_to_reg(ctx, outputs[0]);
let dst = output_to_reg(ctx, outputs[0]); ctx.emit(Inst::gen_move(dst, lhs, input_ty));
let cc = CC::from_floatcc(condcode);
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); // Emit the comparison.
ctx.emit(Inst::setcc(cc, dst)); ctx.emit(Inst::xmm_rm_r_imm(op, rhs, dst, imm.encode()));
}
} }
} }