Add Intel legalization for fmin and fmax.

The native x86_fmin and x86_fmax instructions don't behave correctly for
NaN inputs and when comparing +0.0 to -0.0, so we need separate branches
for those cases.
This commit is contained in:
Jakob Stoklund Olesen
2017-09-27 11:02:57 -07:00
parent 384b04b411
commit b6b474a8c9
5 changed files with 136 additions and 5 deletions

View File

@@ -1,7 +1,9 @@
//! Encoding tables for Intel ISAs.
use bitset::BitSet;
use ir;
use cursor::{Cursor, FuncCursor};
use flowgraph::ControlFlowGraph;
use ir::{self, InstBuilder};
use isa::constraints::*;
use isa::enc_tables::*;
use isa::encoding::RecipeSizing;
@@ -11,3 +13,91 @@ use super::registers::*;
include!(concat!(env!("OUT_DIR"), "/encoding-intel.rs"));
include!(concat!(env!("OUT_DIR"), "/legalize-intel.rs"));
/// Expand the `fmin` and `fmax` instructions using the Intel `x86_fmin` and `x86_fmax`
/// instructions.
fn expand_minmax(inst: ir::Inst, func: &mut ir::Function, cfg: &mut ControlFlowGraph) {
use ir::condcodes::FloatCC;
let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] {
ir::InstructionData::Binary {
opcode: ir::Opcode::Fmin,
args,
} => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor),
ir::InstructionData::Binary {
opcode: ir::Opcode::Fmax,
args,
} => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band),
_ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
};
let old_ebb = func.layout.pp_ebb(inst);
// We need to handle the following conditions, depending on how x and y compare:
//
// 1. LT or GT: The native `x86_opc` min/max instruction does what we need.
// 2. EQ: We need to use `bitwise_opc` to make sure that
// fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0.
// 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical.
// EBB handling case 3) where one operand is NaN.
let uno_ebb = func.dfg.make_ebb();
// EBB that handles the unordered or equal cases 2) and 3).
let ueq_ebb = func.dfg.make_ebb();
// Final EBB with one argument representing the final result value.
let done = func.dfg.make_ebb();
// The basic blocks are laid out to minimize branching for the common cases:
//
// 1) One branch not taken, one jump.
// 2) One branch taken.
// 3) Two branches taken, one jump.
// Move the `inst` result value onto the `done` EBB.
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
func.dfg.clear_results(inst);
func.dfg.attach_ebb_arg(done, result);
// Test for case 1) ordered and not equal.
let mut pos = FuncCursor::new(func).at_inst(inst);
let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y);
pos.ins().brnz(cmp_ueq, ueq_ebb, &[]);
// Handle the common ordered, not equal (LT|GT) case.
let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0;
let one_result = pos.func.dfg.first_result(one_inst);
pos.ins().jump(done, &[one_result]);
// Case 3) Unordered.
// We know that at least one operand is a NaN that needs to be propagated. We simply use an
// `fadd` instruction which has the same NaN propagation semantics.
pos.insert_ebb(uno_ebb);
let uno_result = pos.ins().fadd(x, y);
pos.ins().jump(done, &[uno_result]);
// Case 2) or 3).
pos.insert_ebb(ueq_ebb);
// Test for case 3) (UN) one value is NaN.
// TODO: When we get support for flag values, we can reuse the above comparison.
let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y);
pos.ins().brnz(cmp_uno, uno_ebb, &[]);
// We are now in case 2) where x and y compare EQ.
// We need a bitwise operation to get the sign right.
let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0;
let bw_result = pos.func.dfg.first_result(bw_inst);
// This should become a fall-through for this second most common case.
// Recycle the original instruction as a jump.
pos.func.dfg.replace(inst).jump(done, &[bw_result]);
// Finally insert a label for the completion.
pos.next_inst();
pos.insert_ebb(done);
cfg.recompute_ebb(pos.func, old_ebb);
cfg.recompute_ebb(pos.func, ueq_ebb);
cfg.recompute_ebb(pos.func, uno_ebb);
cfg.recompute_ebb(pos.func, done);
}