Optimize 0.0 floating point constants. (#344)

* Optimize 0.0 floating point constants. Rather than using the existing process of emitting bit patterns and moving them into floating point registers, use the `xorps` instruction to zero out the register. * is_zero predicate function will not accept negative zero. Fixed formatting for encoding recipe and filetests.
2018-05-24 17:16:25 -04:00
parent 4afb28ef59
commit 191bab162b
6 changed files with 134 additions and 2 deletions
--- a/cranelift/filetests/isa/x86/optimized-zero-constants-32bit.cton
+++ b/cranelift/filetests/isa/x86/optimized-zero-constants-32bit.cton
@@ -0,0 +1,19 @@
+; Check that floating-point constants equal to zero are optimized correctly.
+test binemit
+set is_64bit=0
+isa x86
+
+function %foo() -> f32 fast {
+ebb0:
+  ; asm: xorps %xmm0, %xmm0
+  [-,%xmm0]    v0 = f32const 0.0     ; bin: 0f 57 c0
+  return v0
+}
+
+function %bar() -> f64 fast {
+ebb0:
+  ; asm: xorpd %xmm0, %xmm0
+  [-,%xmm0]    v1 = f64const 0.0     ; bin: 66 0f 57 c0
+  return v1
+}
+
--- a/cranelift/filetests/isa/x86/optimized-zero-constants.cton
+++ b/cranelift/filetests/isa/x86/optimized-zero-constants.cton
@@ -0,0 +1,33 @@
+; Check that floating-point constants equal to zero are optimized correctly.
+test binemit
+set is_64bit=1
+isa x86
+
+function %zero_const_32bit_no_rex() -> f32 fast {
+ebb0:
+  ; asm: xorps %xmm0, %xmm0
+  [-,%xmm0]    v0 = f32const 0.0     ; bin: 40 0f 57 c0
+  return v0
+}
+
+function %zero_const_32bit_rex() -> f32 fast {
+ebb0:
+  ; asm: xorps %xmm8, %xmm8
+  [-,%xmm8]    v1 = f32const 0.0     ; bin: 45 0f 57 c0
+  return v1
+}
+
+function %zero_const_64bit_no_rex() -> f64 fast {
+ebb0:
+  ; asm: xorpd %xmm0, %xmm0
+  [-,%xmm0]    v0 = f64const 0.0     ; bin: 66 40 0f 57 c0
+  return v0
+}
+
+function %zero_const_64bit_rex() -> f64 fast {
+ebb0:
+  ; asm: xorpd %xmm8, %xmm8
+  [-,%xmm8]    v1 = f64const 0.0     ; bin: 66 45 0f 57 c0
+  return v1
+}
+
--- a/lib/codegen/meta/cdsl/predicates.py
+++ b/lib/codegen/meta/cdsl/predicates.py
@@ -262,6 +262,38 @@ class IsEqual(FieldPredicate):
        self.value = value


+class IsZero32BitFloat(FieldPredicate):
+    """
+    Instruction predicate that checks if an immediate instruction format field
+    is equal to zero.
+
+    :param field: `FormatField` to be checked.
+    :param value: The constant value to check.
+    """
+
+    def __init__(self, field):
+        # type: (FormatField) -> None
+        super(IsZero32BitFloat, self).__init__(field,
+                                               'is_zero_32_bit_float',
+                                               ())
+
+
+class IsZero64BitFloat(FieldPredicate):
+    """
+    Instruction predicate that checks if an immediate instruction format field
+    is equal to zero.
+
+    :param field: `FormatField` to be checked.
+    :param value: The constant value to check.
+    """
+
+    def __init__(self, field):
+        # type: (FormatField) -> None
+        super(IsZero64BitFloat, self).__init__(field,
+                                               'is_zero_64_bit_float',
+                                               ())
+
+
 class IsSignedInt(FieldPredicate):
    """
    Instruction predicate that checks if an immediate instruction format field
--- a/lib/codegen/meta/isa/x86/encodings.py
+++ b/lib/codegen/meta/isa/x86/encodings.py
@@ -2,10 +2,12 @@
 x86 Encodings.
 """
 from __future__ import absolute_import
+from cdsl.predicates import IsZero32BitFloat, IsZero64BitFloat
 from cdsl.predicates import IsUnsignedInt, Not, And
 from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
 from base import instructions as base
-from base.formats import UnaryImm, FuncAddr, Call, LoadComplex, StoreComplex
+from base.formats import UnaryIeee32, UnaryIeee64, UnaryImm
+from base.formats import FuncAddr, Call, LoadComplex, StoreComplex
 from .defs import X86_64, X86_32
 from . import recipes as r
 from . import settings as cfg
@@ -604,6 +606,18 @@ X86_64.enc(base.uextend.i64.i32, *r.umr(0x89))
 # Floating point
 #

+# floating-point constants equal to 0.0 can be encoded using either
+# `xorps` or `xorpd`, for 32-bit and 64-bit floats respectively.
+X86_32.enc(base.f32const, *r.f32imm_z(0x0f, 0x57),
+           instp=IsZero32BitFloat(UnaryIeee32.imm))
+X86_32.enc(base.f64const, *r.f64imm_z(0x66, 0x0f, 0x57),
+           instp=IsZero64BitFloat(UnaryIeee64.imm))
+
+enc_x86_64_instp(base.f32const, r.f32imm_z,
+                 IsZero32BitFloat(UnaryIeee32.imm), 0x0f, 0x57)
+enc_x86_64_instp(base.f64const, r.f64imm_z,
+                 IsZero64BitFloat(UnaryIeee64.imm), 0x66, 0x0f, 0x57)
+
 # movd
 enc_both(base.bitcast.f32.i32, r.frurm, 0x66, 0x0f, 0x6e)
 enc_both(base.bitcast.i32.f32, r.rfumr, 0x66, 0x0f, 0x7e)
--- a/lib/codegen/meta/isa/x86/recipes.py
+++ b/lib/codegen/meta/isa/x86/recipes.py
@@ -4,8 +4,10 @@ x86 Encoding recipes.
 from __future__ import absolute_import
 from cdsl.isa import EncRecipe
 from cdsl.predicates import IsSignedInt, IsEqual, Or
+from cdsl.predicates import IsZero32BitFloat, IsZero64BitFloat
 from cdsl.registers import RegClass
-from base.formats import Unary, UnaryImm, UnaryBool, Binary, BinaryImm
+from base.formats import Unary, UnaryIeee32, UnaryIeee64, UnaryImm, UnaryBool
+from base.formats import Binary, BinaryImm
 from base.formats import MultiAry, NullAry
 from base.formats import Trap, Call, CallIndirect, Store, Load
 from base.formats import IntCompare, IntCompareImm, FloatCompare
@@ -544,6 +546,24 @@ pu_iq = TailRecipe(
        sink.put8(imm as u64);
        ''')

+# XX /n Unary with floating point 32-bit immediate equal to zero.
+f32imm_z = TailRecipe(
+    'f32imm_z', UnaryIeee32, size=1, ins=(), outs=FPR,
+    instp=IsZero32BitFloat(UnaryIeee32.imm),
+    emit='''
+        PUT_OP(bits, rex2(out_reg0, out_reg0), sink);
+        modrm_rr(out_reg0, out_reg0, sink);
+    ''')
+
+# XX /n Unary with floating point 64-bit immediate equal to zero.
+f64imm_z = TailRecipe(
+    'f64imm_z', UnaryIeee64, size=1, ins=(), outs=FPR,
+    instp=IsZero64BitFloat(UnaryIeee64.imm),
+    emit='''
+        PUT_OP(bits, rex2(out_reg0, out_reg0), sink);
+        modrm_rr(out_reg0, out_reg0, sink);
+    ''')
+
 pushq = TailRecipe(
    'pushq', Unary, size=0, ins=GPR, outs=(),
    emit='''
--- a/lib/codegen/src/predicates.rs
+++ b/lib/codegen/src/predicates.rs
@@ -11,6 +11,20 @@

 use ir;

+/// Check that a 64-bit floating point value is zero.
+#[allow(dead_code)]
+pub fn is_zero_64_bit_float<T: Into<ir::immediates::Ieee64>>(x: T) -> bool {
+    let x64 = x.into();
+    x64.bits() == 0
+}
+
+/// Check that a 32-bit floating point value is zero.
+#[allow(dead_code)]
+pub fn is_zero_32_bit_float<T: Into<ir::immediates::Ieee32>>(x: T) -> bool {
+    let x32 = x.into();
+    x32.bits() == 0
+}
+
 /// Check that `x` is the same as `y`.
 #[allow(dead_code)]
 pub fn is_equal<T: Eq + Copy, O: Into<T> + Copy>(x: T, y: O) -> bool {