Add Intel encodings for the fcmp instruction.

Not all floating point condition codes are directly supported by the ucimiss/ucomisd instructions. Some inequalities need to be reversed and eq+ne require two separate tests.
2017-09-26 09:54:54 -07:00
parent 79968a2325
commit 7fb6159a85
9 changed files with 342 additions and 10 deletions
--- a/lib/cretonne/meta/isa/intel/legalize.py
+++ b/lib/cretonne/meta/isa/intel/legalize.py
@@ -4,7 +4,7 @@ Custom legalization patterns for Intel.
 from __future__ import absolute_import
 from cdsl.ast import Var
 from cdsl.xform import Rtl, XFormGroup
-from base.immediates import imm64
+from base.immediates import imm64, floatcc
 from base.types import i32, i64
 from base import legalize as shared
 from base import instructions as insts
@@ -25,6 +25,8 @@ dead = Var('dead')
 x = Var('x')
 xhi = Var('xhi')
 y = Var('y')
+a1 = Var('a1')
+a2 = Var('a2')

 #
 # Division and remainder.
@@ -56,3 +58,37 @@ for ty in [i32, i64]:
                xhi << insts.sshr_imm(x, imm64(ty.lane_bits() - 1)),
                (dead, a) << x86.sdivmodx(x, xhi, y)
            ))
+
+# Floating point condition codes.
+#
+# The 8 condition codes in `supported_floatccs` are directly supported by a
+# `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
+# patterns.
+
+# Equality needs an explicit `ord` test which checks the parity bit.
+intel_expand.legalize(
+        a << insts.fcmp(floatcc.eq, x, y),
+        Rtl(
+            a1 << insts.fcmp(floatcc.ord, x, y),
+            a2 << insts.fcmp(floatcc.ueq, x, y),
+            a << insts.band(a1, a2)
+        ))
+intel_expand.legalize(
+        a << insts.fcmp(floatcc.ne, x, y),
+        Rtl(
+            a1 << insts.fcmp(floatcc.uno, x, y),
+            a2 << insts.fcmp(floatcc.one, x, y),
+            a << insts.bor(a1, a2)
+        ))
+
+# Inequalities that need to be reversed.
+for cc,               rev_cc in [
+        (floatcc.lt,  floatcc.gt),
+        (floatcc.le,  floatcc.ge),
+        (floatcc.ugt, floatcc.ult),
+        (floatcc.uge, floatcc.ule)]:
+    intel_expand.legalize(
+            a << insts.fcmp(cc, x, y),
+            Rtl(
+                a << insts.fcmp(rev_cc, y, x)
+            ))