Optimize 0.0 floating point constants. (#344)

* Optimize 0.0 floating point constants. Rather than using the existing process of emitting bit patterns and moving them into floating point registers, use the `xorps` instruction to zero out the register. * is_zero predicate function will not accept negative zero. Fixed formatting for encoding recipe and filetests.
2018-05-24 17:16:25 -04:00
parent 4afb28ef59
commit 191bab162b
6 changed files with 134 additions and 2 deletions
--- a/cranelift/filetests/isa/x86/optimized-zero-constants-32bit.cton
+++ b/cranelift/filetests/isa/x86/optimized-zero-constants-32bit.cton
@@ -0,0 +1,19 @@
 ; Check that floating-point constants equal to zero are optimized correctly.
 test binemit
 set is_64bit=0
 isa x86
 function %foo() -> f32 fast {
 ebb0:
  ; asm: xorps %xmm0, %xmm0
  [-,%xmm0]    v0 = f32const 0.0     ; bin: 0f 57 c0
  return v0
 }
 function %bar() -> f64 fast {
 ebb0:
  ; asm: xorpd %xmm0, %xmm0
  [-,%xmm0]    v1 = f64const 0.0     ; bin: 66 0f 57 c0
  return v1
 }
--- a/cranelift/filetests/isa/x86/optimized-zero-constants.cton
+++ b/cranelift/filetests/isa/x86/optimized-zero-constants.cton
@@ -0,0 +1,33 @@
 ; Check that floating-point constants equal to zero are optimized correctly.
 test binemit
 set is_64bit=1
 isa x86
 function %zero_const_32bit_no_rex() -> f32 fast {
 ebb0:
  ; asm: xorps %xmm0, %xmm0
  [-,%xmm0]    v0 = f32const 0.0     ; bin: 40 0f 57 c0
  return v0
 }
 function %zero_const_32bit_rex() -> f32 fast {
 ebb0:
  ; asm: xorps %xmm8, %xmm8
  [-,%xmm8]    v1 = f32const 0.0     ; bin: 45 0f 57 c0
  return v1
 }
 function %zero_const_64bit_no_rex() -> f64 fast {
 ebb0:
  ; asm: xorpd %xmm0, %xmm0
  [-,%xmm0]    v0 = f64const 0.0     ; bin: 66 40 0f 57 c0
  return v0
 }
 function %zero_const_64bit_rex() -> f64 fast {
 ebb0:
  ; asm: xorpd %xmm8, %xmm8
  [-,%xmm8]    v1 = f64const 0.0     ; bin: 66 45 0f 57 c0
  return v1
 }
--- a/lib/codegen/meta/cdsl/predicates.py
+++ b/lib/codegen/meta/cdsl/predicates.py
@@ -262,6 +262,38 @@ class IsEqual(FieldPredicate):
        self.value = value
 class IsZero32BitFloat(FieldPredicate):
    """
    Instruction predicate that checks if an immediate instruction format field
    is equal to zero.
    :param field: `FormatField` to be checked.
    :param value: The constant value to check.
    """
    def __init__(self, field):
        # type: (FormatField) -> None
        super(IsZero32BitFloat, self).__init__(field,
                                               'is_zero_32_bit_float',
                                               ())
 class IsZero64BitFloat(FieldPredicate):
    """
    Instruction predicate that checks if an immediate instruction format field
    is equal to zero.
    :param field: `FormatField` to be checked.
    :param value: The constant value to check.
    """
    def __init__(self, field):
        # type: (FormatField) -> None
        super(IsZero64BitFloat, self).__init__(field,
                                               'is_zero_64_bit_float',
                                               ())
 class IsSignedInt(FieldPredicate):
    """
    Instruction predicate that checks if an immediate instruction format field
--- a/lib/codegen/meta/isa/x86/encodings.py
+++ b/lib/codegen/meta/isa/x86/encodings.py
@@ -2,10 +2,12 @@
 x86 Encodings.
 """
 from __future__ import absolute_import
 from cdsl.predicates import IsZero32BitFloat, IsZero64BitFloat
 from cdsl.predicates import IsUnsignedInt, Not, And
 from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
 from base import instructions as base
-from base.formats import UnaryImm, FuncAddr, Call, LoadComplex, StoreComplex
+from base.formats import UnaryIeee32, UnaryIeee64, UnaryImm
 from base.formats import FuncAddr, Call, LoadComplex, StoreComplex
 from .defs import X86_64, X86_32
 from . import recipes as r
 from . import settings as cfg
@@ -604,6 +606,18 @@ X86_64.enc(base.uextend.i64.i32, *r.umr(0x89))
 # Floating point
 #
 # floating-point constants equal to 0.0 can be encoded using either
 # `xorps` or `xorpd`, for 32-bit and 64-bit floats respectively.
 X86_32.enc(base.f32const, *r.f32imm_z(0x0f, 0x57),
           instp=IsZero32BitFloat(UnaryIeee32.imm))
 X86_32.enc(base.f64const, *r.f64imm_z(0x66, 0x0f, 0x57),
           instp=IsZero64BitFloat(UnaryIeee64.imm))
 enc_x86_64_instp(base.f32const, r.f32imm_z,
                 IsZero32BitFloat(UnaryIeee32.imm), 0x0f, 0x57)
 enc_x86_64_instp(base.f64const, r.f64imm_z,
                 IsZero64BitFloat(UnaryIeee64.imm), 0x66, 0x0f, 0x57)
 # movd
 enc_both(base.bitcast.f32.i32, r.frurm, 0x66, 0x0f, 0x6e)
 enc_both(base.bitcast.i32.f32, r.rfumr, 0x66, 0x0f, 0x7e)
--- a/lib/codegen/meta/isa/x86/recipes.py
+++ b/lib/codegen/meta/isa/x86/recipes.py
@@ -4,8 +4,10 @@ x86 Encoding recipes.
 from __future__ import absolute_import
 from cdsl.isa import EncRecipe
 from cdsl.predicates import IsSignedInt, IsEqual, Or
 from cdsl.predicates import IsZero32BitFloat, IsZero64BitFloat
 from cdsl.registers import RegClass
-from base.formats import Unary, UnaryImm, UnaryBool, Binary, BinaryImm
+from base.formats import Unary, UnaryIeee32, UnaryIeee64, UnaryImm, UnaryBool
 from base.formats import Binary, BinaryImm
 from base.formats import MultiAry, NullAry
 from base.formats import Trap, Call, CallIndirect, Store, Load
 from base.formats import IntCompare, IntCompareImm, FloatCompare
@@ -544,6 +546,24 @@ pu_iq = TailRecipe(
        sink.put8(imm as u64);
        ''')
 # XX /n Unary with floating point 32-bit immediate equal to zero.
 f32imm_z = TailRecipe(
    'f32imm_z', UnaryIeee32, size=1, ins=(), outs=FPR,
    instp=IsZero32BitFloat(UnaryIeee32.imm),
    emit='''
        PUT_OP(bits, rex2(out_reg0, out_reg0), sink);
        modrm_rr(out_reg0, out_reg0, sink);
    ''')
 # XX /n Unary with floating point 64-bit immediate equal to zero.
 f64imm_z = TailRecipe(
    'f64imm_z', UnaryIeee64, size=1, ins=(), outs=FPR,
    instp=IsZero64BitFloat(UnaryIeee64.imm),
    emit='''
        PUT_OP(bits, rex2(out_reg0, out_reg0), sink);
        modrm_rr(out_reg0, out_reg0, sink);
    ''')
 pushq = TailRecipe(
    'pushq', Unary, size=0, ins=GPR, outs=(),
    emit='''
--- a/lib/codegen/src/predicates.rs
+++ b/lib/codegen/src/predicates.rs
@@ -11,6 +11,20 @@
 use ir;
 /// Check that a 64-bit floating point value is zero.
 #[allow(dead_code)]
 pub fn is_zero_64_bit_float<T: Into<ir::immediates::Ieee64>>(x: T) -> bool {
    let x64 = x.into();
    x64.bits() == 0
 }
 /// Check that a 32-bit floating point value is zero.
 #[allow(dead_code)]
 pub fn is_zero_32_bit_float<T: Into<ir::immediates::Ieee32>>(x: T) -> bool {
    let x32 = x.into();
    x32.bits() == 0
 }
 /// Check that `x` is the same as `y`.
 #[allow(dead_code)]
 pub fn is_equal<T: Eq + Copy, O: Into<T> + Copy>(x: T, y: O) -> bool {