Add Intel legalization for fmin and fmax.

The native x86_fmin and x86_fmax instructions don't behave correctly for NaN inputs and when comparing +0.0 to -0.0, so we need separate branches for those cases.
2017-09-27 11:02:57 -07:00
parent 384b04b411
commit b6b474a8c9
5 changed files with 136 additions and 5 deletions
--- a/lib/cretonne/src/isa/intel/enc_tables.rs
+++ b/lib/cretonne/src/isa/intel/enc_tables.rs
@@ -1,7 +1,9 @@
 //! Encoding tables for Intel ISAs.

 use bitset::BitSet;
-use ir;
+use cursor::{Cursor, FuncCursor};
+use flowgraph::ControlFlowGraph;
+use ir::{self, InstBuilder};
 use isa::constraints::*;
 use isa::enc_tables::*;
 use isa::encoding::RecipeSizing;
@@ -11,3 +13,91 @@ use super::registers::*;

 include!(concat!(env!("OUT_DIR"), "/encoding-intel.rs"));
 include!(concat!(env!("OUT_DIR"), "/legalize-intel.rs"));
+
+/// Expand the `fmin` and `fmax` instructions using the Intel `x86_fmin` and `x86_fmax`
+/// instructions.
+fn expand_minmax(inst: ir::Inst, func: &mut ir::Function, cfg: &mut ControlFlowGraph) {
+    use ir::condcodes::FloatCC;
+
+    let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] {
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Fmin,
+            args,
+        } => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor),
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Fmax,
+            args,
+        } => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band),
+        _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
+    };
+    let old_ebb = func.layout.pp_ebb(inst);
+
+    // We need to handle the following conditions, depending on how x and y compare:
+    //
+    // 1. LT or GT: The native `x86_opc` min/max instruction does what we need.
+    // 2. EQ: We need to use `bitwise_opc` to make sure that
+    //    fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0.
+    // 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical.
+
+    // EBB handling case 3) where one operand is NaN.
+    let uno_ebb = func.dfg.make_ebb();
+
+    // EBB that handles the unordered or equal cases 2) and 3).
+    let ueq_ebb = func.dfg.make_ebb();
+
+    // Final EBB with one argument representing the final result value.
+    let done = func.dfg.make_ebb();
+
+    // The basic blocks are laid out to minimize branching for the common cases:
+    //
+    // 1) One branch not taken, one jump.
+    // 2) One branch taken.
+    // 3) Two branches taken, one jump.
+
+    // Move the `inst` result value onto the `done` EBB.
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+    func.dfg.clear_results(inst);
+    func.dfg.attach_ebb_arg(done, result);
+
+    // Test for case 1) ordered and not equal.
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y);
+    pos.ins().brnz(cmp_ueq, ueq_ebb, &[]);
+
+    // Handle the common ordered, not equal (LT|GT) case.
+    let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0;
+    let one_result = pos.func.dfg.first_result(one_inst);
+    pos.ins().jump(done, &[one_result]);
+
+    // Case 3) Unordered.
+    // We know that at least one operand is a NaN that needs to be propagated. We simply use an
+    // `fadd` instruction which has the same NaN propagation semantics.
+    pos.insert_ebb(uno_ebb);
+    let uno_result = pos.ins().fadd(x, y);
+    pos.ins().jump(done, &[uno_result]);
+
+    // Case 2) or 3).
+    pos.insert_ebb(ueq_ebb);
+    // Test for case 3) (UN) one value is NaN.
+    // TODO: When we get support for flag values, we can reuse the above comparison.
+    let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y);
+    pos.ins().brnz(cmp_uno, uno_ebb, &[]);
+
+    // We are now in case 2) where x and y compare EQ.
+    // We need a bitwise operation to get the sign right.
+    let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0;
+    let bw_result = pos.func.dfg.first_result(bw_inst);
+    // This should become a fall-through for this second most common case.
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[bw_result]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, ueq_ebb);
+    cfg.recompute_ebb(pos.func, uno_ebb);
+    cfg.recompute_ebb(pos.func, done);
+}