Add Intel encodings for the fcmp instruction.

Not all floating point condition codes are directly supported by the ucimiss/ucomisd instructions. Some inequalities need to be reversed and eq+ne require two separate tests.
2017-09-26 09:54:54 -07:00
parent 79968a2325
commit 7fb6159a85
9 changed files with 342 additions and 10 deletions
--- a/cranelift/filetests/isa/intel/binary32-float.cton
+++ b/cranelift/filetests/isa/intel/binary32-float.cton
@@ -157,6 +157,36 @@ ebb0:
    ; asm: movd 1032(%esp), %xmm2
    [-,%xmm2]           v211 = fill v201                        ; bin: 66 0f 6e 94 24 00000408
    ; Comparisons.
    ;
    ; Only `supported_floatccs` are tested here. Others are handled by
    ; legalization paterns.
    ; asm: ucomiss %xmm2, %xmm5
    ; asm: setnp %bl
    [-,%rbx]            v300 = fcmp ord v10, v11                ; bin: 0f 2e ea 0f 9b c3
    ; asm: ucomiss %xmm5, %xmm2
    ; asm: setp %bl
    [-,%rbx]            v301 = fcmp uno v11, v10                ; bin: 0f 2e d5 0f 9a c3
    ; asm: ucomiss %xmm2, %xmm5
    ; asm: setne %dl
    [-,%rdx]            v302 = fcmp one v10, v11                ; bin: 0f 2e ea 0f 95 c2
    ; asm: ucomiss %xmm5, %xmm2
    ; asm: sete %dl
    [-,%rdx]            v303 = fcmp ueq v11, v10                ; bin: 0f 2e d5 0f 94 c2
    ; asm: ucomiss %xmm2, %xmm5
    ; asm: seta %bl
    [-,%rbx]            v304 = fcmp gt v10, v11                 ; bin: 0f 2e ea 0f 97 c3
    ; asm: ucomiss %xmm5, %xmm2
    ; asm: setae %bl
    [-,%rbx]            v305 = fcmp ge v11, v10                 ; bin: 0f 2e d5 0f 93 c3
    ; asm: ucomiss %xmm2, %xmm5
    ; asm: setb %dl
    [-,%rdx]            v306 = fcmp ult v10, v11                ; bin: 0f 2e ea 0f 92 c2
    ; asm: ucomiss %xmm5, %xmm2
    ; asm: setbe %dl
    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 0f 2e d5 0f 96 c2
    return
 }
@@ -302,5 +332,35 @@ ebb0:
    ; asm: movq 1032(%esp), %xmm2
    [-,%xmm2]           v211 = fill v201                        ; bin: f3 0f 7e 94 24 00000408
    ; Comparisons.
    ;
    ; Only `supported_floatccs` are tested here. Others are handled by
    ; legalization paterns.
    ; asm: ucomisd %xmm2, %xmm5
    ; asm: setnp %bl
    [-,%rbx]            v300 = fcmp ord v10, v11                ; bin: 66 0f 2e ea 0f 9b c3
    ; asm: ucomisd %xmm5, %xmm2
    ; asm: setp %bl
    [-,%rbx]            v301 = fcmp uno v11, v10                ; bin: 66 0f 2e d5 0f 9a c3
    ; asm: ucomisd %xmm2, %xmm5
    ; asm: setne %dl
    [-,%rdx]            v302 = fcmp one v10, v11                ; bin: 66 0f 2e ea 0f 95 c2
    ; asm: ucomisd %xmm5, %xmm2
    ; asm: sete %dl
    [-,%rdx]            v303 = fcmp ueq v11, v10                ; bin: 66 0f 2e d5 0f 94 c2
    ; asm: ucomisd %xmm2, %xmm5
    ; asm: seta %bl
    [-,%rbx]            v304 = fcmp gt v10, v11                 ; bin: 66 0f 2e ea 0f 97 c3
    ; asm: ucomisd %xmm5, %xmm2
    ; asm: setae %bl
    [-,%rbx]            v305 = fcmp ge v11, v10                 ; bin: 66 0f 2e d5 0f 93 c3
    ; asm: ucomisd %xmm2, %xmm5
    ; asm: setb %dl
    [-,%rdx]            v306 = fcmp ult v10, v11                ; bin: 66 0f 2e ea 0f 92 c2
    ; asm: ucomisd %xmm5, %xmm2
    ; asm: setbe %dl
    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 66 0f 2e d5 0f 96 c2
    return
 }
--- a/cranelift/filetests/isa/intel/binary64-float.cton
+++ b/cranelift/filetests/isa/intel/binary64-float.cton
@@ -166,6 +166,36 @@ ebb0:
    ; asm: movd 1032(%rsp), %xmm10
    [-,%xmm10]          v211 = fill v201                        ; bin: 66 44 0f 6e 94 24 00000408
    ; Comparisons.
    ;
    ; Only `supported_floatccs` are tested here. Others are handled by
    ; legalization paterns.
    ; asm: ucomiss %xmm10, %xmm5
    ; asm: setnp %bl
    [-,%rbx]            v300 = fcmp ord v10, v11                ; bin: 41 0f 2e ea 0f 9b c3
    ; asm: ucomiss %xmm5, %xmm10
    ; asm: setp %bl
    [-,%rbx]            v301 = fcmp uno v11, v10                ; bin: 44 0f 2e d5 0f 9a c3
    ; asm: ucomiss %xmm10, %xmm5
    ; asm: setne %dl
    [-,%rdx]            v302 = fcmp one v10, v11                ; bin: 41 0f 2e ea 0f 95 c2
    ; asm: ucomiss %xmm5, %xmm10
    ; asm: sete %dl
    [-,%rdx]            v303 = fcmp ueq v11, v10                ; bin: 44 0f 2e d5 0f 94 c2
    ; asm: ucomiss %xmm10, %xmm5
    ; asm: seta %bl
    [-,%rbx]            v304 = fcmp gt v10, v11                 ; bin: 41 0f 2e ea 0f 97 c3
    ; asm: ucomiss %xmm5, %xmm10
    ; asm: setae %bl
    [-,%rbx]            v305 = fcmp ge v11, v10                 ; bin: 44 0f 2e d5 0f 93 c3
    ; asm: ucomiss %xmm10, %xmm5
    ; asm: setb %dl
    [-,%rdx]            v306 = fcmp ult v10, v11                ; bin: 41 0f 2e ea 0f 92 c2
    ; asm: ucomiss %xmm5, %xmm10
    ; asm: setbe %dl
    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 44 0f 2e d5 0f 96 c2
    return
 }
@@ -326,5 +356,35 @@ ebb0:
    ; asm: movq 1032(%rsp), %xmm10
    [-,%xmm10]          v211 = fill v201                        ; bin: f3 44 0f 7e 94 24 00000408
    ; Comparisons.
    ;
    ; Only `supported_floatccs` are tested here. Others are handled by
    ; legalization paterns.
    ; asm: ucomisd %xmm10, %xmm5
    ; asm: setnp %bl
    [-,%rbx]            v300 = fcmp ord v10, v11                ; bin: 66 41 0f 2e ea 0f 9b c3
    ; asm: ucomisd %xmm5, %xmm10
    ; asm: setp %bl
    [-,%rbx]            v301 = fcmp uno v11, v10                ; bin: 66 44 0f 2e d5 0f 9a c3
    ; asm: ucomisd %xmm10, %xmm5
    ; asm: setne %dl
    [-,%rdx]            v302 = fcmp one v10, v11                ; bin: 66 41 0f 2e ea 0f 95 c2
    ; asm: ucomisd %xmm5, %xmm10
    ; asm: sete %dl
    [-,%rdx]            v303 = fcmp ueq v11, v10                ; bin: 66 44 0f 2e d5 0f 94 c2
    ; asm: ucomisd %xmm10, %xmm5
    ; asm: seta %bl
    [-,%rbx]            v304 = fcmp gt v10, v11                 ; bin: 66 41 0f 2e ea 0f 97 c3
    ; asm: ucomisd %xmm5, %xmm10
    ; asm: setae %bl
    [-,%rbx]            v305 = fcmp ge v11, v10                 ; bin: 66 44 0f 2e d5 0f 93 c3
    ; asm: ucomisd %xmm10, %xmm5
    ; asm: setb %dl
    [-,%rdx]            v306 = fcmp ult v10, v11                ; bin: 66 41 0f 2e ea 0f 92 c2
    ; asm: ucomisd %xmm5, %xmm10
    ; asm: setbe %dl
    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 66 44 0f 2e d5 0f 96 c2
    return
 }
--- a/cranelift/filetests/wasm/f32-compares.cton
+++ b/cranelift/filetests/wasm/f32-compares.cton
@@ -0,0 +1,50 @@
 ; Test code generation for WebAssembly f32 comparison operators.
 test compile
 set is_64bit=0
 isa intel haswell
 set is_64bit=1
 isa intel haswell
 function %f32_eq(f32, f32) -> i32 {
 ebb0(v0: f32, v1: f32):
    v2 = fcmp eq v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %f32_ne(f32, f32) -> i32 {
 ebb0(v0: f32, v1: f32):
    v2 = fcmp ne v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %f32_lt(f32, f32) -> i32 {
 ebb0(v0: f32, v1: f32):
    v2 = fcmp lt v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %f32_gt(f32, f32) -> i32 {
 ebb0(v0: f32, v1: f32):
    v2 = fcmp gt v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %f32_le(f32, f32) -> i32 {
 ebb0(v0: f32, v1: f32):
    v2 = fcmp le v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %f32_ge(f32, f32) -> i32 {
 ebb0(v0: f32, v1: f32):
    v2 = fcmp ge v0, v1
    v3 = bint.i32 v2
    return v3
 }
--- a/cranelift/filetests/wasm/f64-compares.cton
+++ b/cranelift/filetests/wasm/f64-compares.cton
@@ -0,0 +1,50 @@
 ; Test code generation for WebAssembly f64 comparison operators.
 test compile
 set is_64bit=0
 isa intel haswell
 set is_64bit=1
 isa intel haswell
 function %f64_eq(f64, f64) -> i32 {
 ebb0(v0: f64, v1: f64):
    v2 = fcmp eq v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %f64_ne(f64, f64) -> i32 {
 ebb0(v0: f64, v1: f64):
    v2 = fcmp ne v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %f64_lt(f64, f64) -> i32 {
 ebb0(v0: f64, v1: f64):
    v2 = fcmp lt v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %f64_gt(f64, f64) -> i32 {
 ebb0(v0: f64, v1: f64):
    v2 = fcmp gt v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %f64_le(f64, f64) -> i32 {
 ebb0(v0: f64, v1: f64):
    v2 = fcmp le v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %f64_ge(f64, f64) -> i32 {
 ebb0(v0: f64, v1: f64):
    v2 = fcmp ge v0, v1
    v3 = bint.i32 v2
    return v3
 }
--- a/lib/cretonne/meta/gen_legalizer.py
+++ b/lib/cretonne/meta/gen_legalizer.py
@@ -169,10 +169,12 @@ def unwrap_inst(iref, node, fmt):
    iform = expr.inst.format
    nvops = iform.num_value_operands
-    # The tuple of locals we're extracting is `expr.args`.
+    # The tuple of locals to extract is the `Var` instances in `expr.args`.
    arg_names = tuple(
            arg.name if isinstance(arg, Var) else '_' for arg in expr.args)
    with fmt.indented(
            'let ({}, predicate) = if let ir::InstructionData::{} {{'
-            .format(', '.join(map(str, expr.args)), iform.name), '};'):
+            .format(', '.join(map(str, arg_names)), iform.name), '};'):
        # Fields are encoded directly.
        for f in iform.imm_fields:
            fmt.line('{},'.format(f.member))
--- a/lib/cretonne/meta/isa/intel/defs.py
+++ b/lib/cretonne/meta/isa/intel/defs.py
@@ -7,9 +7,22 @@ from __future__ import absolute_import
 from cdsl.isa import TargetISA, CPUMode
 import base.instructions
 from . import instructions as x86
 from base.immediates import floatcc
 ISA = TargetISA('intel', [base.instructions.GROUP, x86.GROUP])
 # CPU modes for 32-bit and 64-bit operation.
 I64 = CPUMode('I64', ISA)
 I32 = CPUMode('I32', ISA)
 # The set of floating point condition codes that are directly supported.
 # Other condition codes need to be reversed or expressed as two tests.
 supported_floatccs = [
        floatcc.ord,
        floatcc.uno,
        floatcc.one,
        floatcc.ueq,
        floatcc.gt,
        floatcc.ge,
        floatcc.ult,
        floatcc.ule]
--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -26,8 +26,8 @@ I32.legalize_type(
        default=narrow,
        b1=expand,
        i32=intel_expand,
-        f32=expand,
+        f32=intel_expand,
-        f64=expand)
+        f64=intel_expand)
 I64.legalize_monomorphic(expand)
 I64.legalize_type(
@@ -35,8 +35,8 @@ I64.legalize_type(
        b1=expand,
        i32=intel_expand,
        i64=intel_expand,
-        f32=expand,
+        f32=intel_expand,
-        f64=expand)
+        f64=intel_expand)
 #
@@ -106,6 +106,13 @@ for inst,           opc in [
        (base.bxor, 0x31)]:
    enc_i32_i64(inst, r.rr, opc)
 # Also add a `b1` encodings for the logic instructions.
 # TODO: Should this be done with 8-bit instructions? It would improve
 # partial register dependencies.
 enc_flt(base.band.b1, r.rr, 0x21)
 enc_flt(base.bor.b1,  r.rr, 0x09)
 enc_flt(base.bxor.b1, r.rr, 0x31)
 enc_i32_i64(base.imul, r.rrx, 0x0f, 0xaf)
 enc_i32_i64(x86.sdivmodx, r.div, 0xf7, rrr=7)
 enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6)
@@ -391,3 +398,10 @@ for inst,               opc in [
        (base.bxor,     0x57)]:
    enc_flt(inst.f32, r.frm, 0x0f, opc)
    enc_flt(inst.f64, r.frm, 0x0f, opc)
 # Comparisons.
 #
 # This only covers the condition codes in `supported_floatccs`, the rest are
 # handled by legalization patterns.
 enc_flt(base.fcmp.f32, r.fcscc, 0x0f, 0x2e)
 enc_flt(base.fcmp.f64, r.fcscc, 0x66, 0x0f, 0x2e)
--- a/lib/cretonne/meta/isa/intel/legalize.py
+++ b/lib/cretonne/meta/isa/intel/legalize.py
@@ -4,7 +4,7 @@ Custom legalization patterns for Intel.
 from __future__ import absolute_import
 from cdsl.ast import Var
 from cdsl.xform import Rtl, XFormGroup
-from base.immediates import imm64
+from base.immediates import imm64, floatcc
 from base.types import i32, i64
 from base import legalize as shared
 from base import instructions as insts
@@ -25,6 +25,8 @@ dead = Var('dead')
 x = Var('x')
 xhi = Var('xhi')
 y = Var('y')
 a1 = Var('a1')
 a2 = Var('a2')
 #
 # Division and remainder.
@@ -56,3 +58,37 @@ for ty in [i32, i64]:
                xhi << insts.sshr_imm(x, imm64(ty.lane_bits() - 1)),
                (dead, a) << x86.sdivmodx(x, xhi, y)
            ))
 # Floating point condition codes.
 #
 # The 8 condition codes in `supported_floatccs` are directly supported by a
 # `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
 # patterns.
 # Equality needs an explicit `ord` test which checks the parity bit.
 intel_expand.legalize(
        a << insts.fcmp(floatcc.eq, x, y),
        Rtl(
            a1 << insts.fcmp(floatcc.ord, x, y),
            a2 << insts.fcmp(floatcc.ueq, x, y),
            a << insts.band(a1, a2)
        ))
 intel_expand.legalize(
        a << insts.fcmp(floatcc.ne, x, y),
        Rtl(
            a1 << insts.fcmp(floatcc.uno, x, y),
            a2 << insts.fcmp(floatcc.one, x, y),
            a << insts.bor(a1, a2)
        ))
 # Inequalities that need to be reversed.
 for cc,               rev_cc in [
        (floatcc.lt,  floatcc.gt),
        (floatcc.le,  floatcc.ge),
        (floatcc.ugt, floatcc.ult),
        (floatcc.uge, floatcc.ule)]:
    intel_expand.legalize(
            a << insts.fcmp(cc, x, y),
            Rtl(
                a << insts.fcmp(rev_cc, y, x)
            ))
--- a/lib/cretonne/meta/isa/intel/recipes.py
+++ b/lib/cretonne/meta/isa/intel/recipes.py
@@ -3,12 +3,13 @@ Intel Encoding recipes.
 """
 from __future__ import absolute_import
 from cdsl.isa import EncRecipe
-from cdsl.predicates import IsSignedInt, IsEqual
+from cdsl.predicates import IsSignedInt, IsEqual, Or
 from base.formats import Unary, UnaryImm, Binary, BinaryImm, MultiAry
 from base.formats import Trap, Call, IndirectCall, Store, Load
-from base.formats import IntCompare
+from base.formats import IntCompare, FloatCompare
 from base.formats import RegMove, Ternary, Jump, Branch, FuncAddr
 from .registers import GPR, ABCD, FPR, GPR8, FPR8, StackGPR32, StackFPR32
 from .defs import supported_floatccs
 try:
    from typing import Tuple, Dict, Sequence  # noqa
@@ -696,7 +697,7 @@ t8jccb_abcd = TailRecipe(
 # This bandaid macro doesn't support a REX prefix for the final `setCC`
 # instruction, so it is limited to the `ABCD` register class for booleans.
 icscc = TailRecipe(
-        'cscc', IntCompare, size=1 + 3, ins=(GPR, GPR), outs=ABCD,
+        'icscc', IntCompare, size=1 + 3, ins=(GPR, GPR), outs=ABCD,
        emit='''
        // Comparison instruction.
        PUT_OP(bits, rex2(in_reg0, in_reg1), sink);
@@ -719,3 +720,49 @@ icscc = TailRecipe(
        sink.put1(setcc);
        modrm_rr(out_reg0, 0, sink);
        ''')
 # Make a FloatCompare instruction predicate with the supported condition codes.
 # Same thing for floating point.
 #
 # The ucomiss/ucomisd instructions set the EFLAGS bits CF/PF/CF like this:
 #
 #    ZPC OSA
 # UN 111 000
 # GT 000 000
 # LT 001 000
 # EQ 100 000
 #
 # Not all floating point condition codes are supported.
 fcscc = TailRecipe(
        'fcscc', FloatCompare, size=1 + 3, ins=(FPR, FPR), outs=ABCD,
        instp=Or(*(IsEqual(FloatCompare.cond, cc)
                   for cc in supported_floatccs)),
        emit='''
        // Comparison instruction.
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_rr(in_reg1, in_reg0, sink);
        // `setCC` instruction, no REX.
        use ir::condcodes::FloatCC::*;
        let setcc = match cond {
            Ordered                    => 0x9b, // EQ|LT|GT => setnp (P=0)
            Unordered                  => 0x9a, // UN       => setp  (P=1)
            OrderedNotEqual            => 0x95, // LT|GT    => setne (Z=0),
            UnorderedOrEqual           => 0x94, // UN|EQ    => sete  (Z=1)
            GreaterThan                => 0x97, // GT       => seta  (C=0&Z=0)
            GreaterThanOrEqual         => 0x93, // GT|EQ    => setae (C=0)
            UnorderedOrLessThan        => 0x92, // UN|LT    => setb  (C=1)
            UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1)
            Equal |                       // EQ
            NotEqual |                    // UN|LT|GT
            LessThan |                    // LT
            LessThanOrEqual |             // LT|EQ
            UnorderedOrGreaterThan |      // UN|GT
            UnorderedOrGreaterThanOrEqual // UN|GT|EQ
            => panic!("{} not supported by fcscc", cond),
        };
        sink.put1(0x0f);
        sink.put1(setcc);
        modrm_rr(out_reg0, 0, sink);
        ''')