diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 3bfac07a4d..feb72703ce 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -829,24 +829,27 @@ fn lower_insn_to_regs>( // Less than by ZF, PF, CF <- 001 // Equal by ZF, PF, CF <- 100 // - // Checking the result of comiss is somewhat annoying because you don't - // have setcc instructions that explicitly check simultaneously for the condition - // (i.e. eq, le, gt, etc) and orderedness. So that might mean we need more - // than one setcc check and then a logical "and" or "or" to determine both. - // However knowing that if the parity bit is set, then the result was - // considered unordered and knowing that if the parity bit is set, then both - // the ZF and CF flag bits must also be set we can getaway with using one setcc - // for most condition codes. + // Checking the result of comiss is somewhat annoying because you don't have setcc + // instructions that explicitly check simultaneously for the condition (i.e. eq, le, + // gt, etc) *and* orderedness. + // + // So that might mean we need more than one setcc check and then a logical "and" or + // "or" to determine both, in some cases. However knowing that if the parity bit is + // set, then the result was considered unordered and knowing that if the parity bit is + // set, then both the ZF and CF flag bits must also be set we can get away with using + // one setcc for most condition codes. + match condcode { - // setb and setbe for ordered LessThan and LessThanOrEqual check if CF = 1 which - // doesn't exclude unorderdness. To get around this we can reverse the operands - // and the cc test to instead check if CF and ZF are 0 which would also excludes - // unorderedness. Using similiar logic we also reverse UnorderedOrGreaterThan and - // UnorderedOrGreaterThanOrEqual and assure that ZF or CF is 1 to exclude orderedness. FloatCC::LessThan | FloatCC::LessThanOrEqual | FloatCC::UnorderedOrGreaterThan | FloatCC::UnorderedOrGreaterThanOrEqual => { + // setb and setbe for ordered LessThan and LessThanOrEqual check if CF = 1 + // which doesn't exclude unorderdness. To get around this we can reverse the + // operands and the cc test to instead check if CF and ZF are 0 which would + // also excludes unorderedness. Using similiar logic we also reverse + // UnorderedOrGreaterThan and UnorderedOrGreaterThanOrEqual and assure that ZF + // or CF is 1 to exclude orderedness. let lhs = input_to_reg_mem(ctx, inputs[0]); let rhs = input_to_reg(ctx, inputs[1]); let dst = output_to_reg(ctx, outputs[0]); @@ -855,9 +858,10 @@ fn lower_insn_to_regs>( let cc = CC::from_floatcc(condcode); ctx.emit(Inst::setcc(cc, dst)); } - // Outlier case where we cannot get around checking the parity bit to determine - // if the result was ordered. + FloatCC::Equal => { + // Outlier case: equal means both the operands are ordered and equal; we cannot + // get around checking the parity bit to determine if the result was ordered. let lhs = input_to_reg(ctx, inputs[0]); let rhs = input_to_reg_mem(ctx, inputs[1]); let dst = output_to_reg(ctx, outputs[0]); @@ -872,10 +876,27 @@ fn lower_insn_to_regs>( dst, )); } - // For all remaining condition codes we can handle things with one check. Condition - // ordered NotEqual for example does not need a separate check for the parity bit because - // the setnz checks that the zero flag is 0 which is impossible with an unordered result. + + FloatCC::NotEqual => { + // Outlier case: not equal means either the operands are unordered, or they're + // not the same value. + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg_mem(ctx, inputs[1]); + let dst = output_to_reg(ctx, outputs[0]); + let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32); + ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); + ctx.emit(Inst::setcc(CC::P, tmp_gpr1)); + ctx.emit(Inst::setcc(CC::NZ, dst)); + ctx.emit(Inst::alu_rmi_r( + false, + AluRmiROpcode::Or, + RegMemImm::reg(tmp_gpr1.to_reg()), + dst, + )); + } + _ => { + // For all remaining condition codes we can handle things with one check. let lhs = input_to_reg(ctx, inputs[0]); let rhs = input_to_reg_mem(ctx, inputs[1]); let dst = output_to_reg(ctx, outputs[0]);