Optimize 0.0 floating point constants. (#344)
* Optimize 0.0 floating point constants. Rather than using the existing process of emitting bit patterns and moving them into floating point registers, use the `xorps` instruction to zero out the register. * is_zero predicate function will not accept negative zero. Fixed formatting for encoding recipe and filetests.
This commit is contained in:
@@ -0,0 +1,19 @@
|
|||||||
|
; Check that floating-point constants equal to zero are optimized correctly.
|
||||||
|
test binemit
|
||||||
|
set is_64bit=0
|
||||||
|
isa x86
|
||||||
|
|
||||||
|
function %foo() -> f32 fast {
|
||||||
|
ebb0:
|
||||||
|
; asm: xorps %xmm0, %xmm0
|
||||||
|
[-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0
|
||||||
|
return v0
|
||||||
|
}
|
||||||
|
|
||||||
|
function %bar() -> f64 fast {
|
||||||
|
ebb0:
|
||||||
|
; asm: xorpd %xmm0, %xmm0
|
||||||
|
[-,%xmm0] v1 = f64const 0.0 ; bin: 66 0f 57 c0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
33
cranelift/filetests/isa/x86/optimized-zero-constants.cton
Normal file
33
cranelift/filetests/isa/x86/optimized-zero-constants.cton
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
; Check that floating-point constants equal to zero are optimized correctly.
|
||||||
|
test binemit
|
||||||
|
set is_64bit=1
|
||||||
|
isa x86
|
||||||
|
|
||||||
|
function %zero_const_32bit_no_rex() -> f32 fast {
|
||||||
|
ebb0:
|
||||||
|
; asm: xorps %xmm0, %xmm0
|
||||||
|
[-,%xmm0] v0 = f32const 0.0 ; bin: 40 0f 57 c0
|
||||||
|
return v0
|
||||||
|
}
|
||||||
|
|
||||||
|
function %zero_const_32bit_rex() -> f32 fast {
|
||||||
|
ebb0:
|
||||||
|
; asm: xorps %xmm8, %xmm8
|
||||||
|
[-,%xmm8] v1 = f32const 0.0 ; bin: 45 0f 57 c0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
function %zero_const_64bit_no_rex() -> f64 fast {
|
||||||
|
ebb0:
|
||||||
|
; asm: xorpd %xmm0, %xmm0
|
||||||
|
[-,%xmm0] v0 = f64const 0.0 ; bin: 66 40 0f 57 c0
|
||||||
|
return v0
|
||||||
|
}
|
||||||
|
|
||||||
|
function %zero_const_64bit_rex() -> f64 fast {
|
||||||
|
ebb0:
|
||||||
|
; asm: xorpd %xmm8, %xmm8
|
||||||
|
[-,%xmm8] v1 = f64const 0.0 ; bin: 66 45 0f 57 c0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
@@ -262,6 +262,38 @@ class IsEqual(FieldPredicate):
|
|||||||
self.value = value
|
self.value = value
|
||||||
|
|
||||||
|
|
||||||
|
class IsZero32BitFloat(FieldPredicate):
|
||||||
|
"""
|
||||||
|
Instruction predicate that checks if an immediate instruction format field
|
||||||
|
is equal to zero.
|
||||||
|
|
||||||
|
:param field: `FormatField` to be checked.
|
||||||
|
:param value: The constant value to check.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, field):
|
||||||
|
# type: (FormatField) -> None
|
||||||
|
super(IsZero32BitFloat, self).__init__(field,
|
||||||
|
'is_zero_32_bit_float',
|
||||||
|
())
|
||||||
|
|
||||||
|
|
||||||
|
class IsZero64BitFloat(FieldPredicate):
|
||||||
|
"""
|
||||||
|
Instruction predicate that checks if an immediate instruction format field
|
||||||
|
is equal to zero.
|
||||||
|
|
||||||
|
:param field: `FormatField` to be checked.
|
||||||
|
:param value: The constant value to check.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, field):
|
||||||
|
# type: (FormatField) -> None
|
||||||
|
super(IsZero64BitFloat, self).__init__(field,
|
||||||
|
'is_zero_64_bit_float',
|
||||||
|
())
|
||||||
|
|
||||||
|
|
||||||
class IsSignedInt(FieldPredicate):
|
class IsSignedInt(FieldPredicate):
|
||||||
"""
|
"""
|
||||||
Instruction predicate that checks if an immediate instruction format field
|
Instruction predicate that checks if an immediate instruction format field
|
||||||
|
|||||||
@@ -2,10 +2,12 @@
|
|||||||
x86 Encodings.
|
x86 Encodings.
|
||||||
"""
|
"""
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
from cdsl.predicates import IsZero32BitFloat, IsZero64BitFloat
|
||||||
from cdsl.predicates import IsUnsignedInt, Not, And
|
from cdsl.predicates import IsUnsignedInt, Not, And
|
||||||
from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
|
from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
|
||||||
from base import instructions as base
|
from base import instructions as base
|
||||||
from base.formats import UnaryImm, FuncAddr, Call, LoadComplex, StoreComplex
|
from base.formats import UnaryIeee32, UnaryIeee64, UnaryImm
|
||||||
|
from base.formats import FuncAddr, Call, LoadComplex, StoreComplex
|
||||||
from .defs import X86_64, X86_32
|
from .defs import X86_64, X86_32
|
||||||
from . import recipes as r
|
from . import recipes as r
|
||||||
from . import settings as cfg
|
from . import settings as cfg
|
||||||
@@ -604,6 +606,18 @@ X86_64.enc(base.uextend.i64.i32, *r.umr(0x89))
|
|||||||
# Floating point
|
# Floating point
|
||||||
#
|
#
|
||||||
|
|
||||||
|
# floating-point constants equal to 0.0 can be encoded using either
|
||||||
|
# `xorps` or `xorpd`, for 32-bit and 64-bit floats respectively.
|
||||||
|
X86_32.enc(base.f32const, *r.f32imm_z(0x0f, 0x57),
|
||||||
|
instp=IsZero32BitFloat(UnaryIeee32.imm))
|
||||||
|
X86_32.enc(base.f64const, *r.f64imm_z(0x66, 0x0f, 0x57),
|
||||||
|
instp=IsZero64BitFloat(UnaryIeee64.imm))
|
||||||
|
|
||||||
|
enc_x86_64_instp(base.f32const, r.f32imm_z,
|
||||||
|
IsZero32BitFloat(UnaryIeee32.imm), 0x0f, 0x57)
|
||||||
|
enc_x86_64_instp(base.f64const, r.f64imm_z,
|
||||||
|
IsZero64BitFloat(UnaryIeee64.imm), 0x66, 0x0f, 0x57)
|
||||||
|
|
||||||
# movd
|
# movd
|
||||||
enc_both(base.bitcast.f32.i32, r.frurm, 0x66, 0x0f, 0x6e)
|
enc_both(base.bitcast.f32.i32, r.frurm, 0x66, 0x0f, 0x6e)
|
||||||
enc_both(base.bitcast.i32.f32, r.rfumr, 0x66, 0x0f, 0x7e)
|
enc_both(base.bitcast.i32.f32, r.rfumr, 0x66, 0x0f, 0x7e)
|
||||||
|
|||||||
@@ -4,8 +4,10 @@ x86 Encoding recipes.
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from cdsl.isa import EncRecipe
|
from cdsl.isa import EncRecipe
|
||||||
from cdsl.predicates import IsSignedInt, IsEqual, Or
|
from cdsl.predicates import IsSignedInt, IsEqual, Or
|
||||||
|
from cdsl.predicates import IsZero32BitFloat, IsZero64BitFloat
|
||||||
from cdsl.registers import RegClass
|
from cdsl.registers import RegClass
|
||||||
from base.formats import Unary, UnaryImm, UnaryBool, Binary, BinaryImm
|
from base.formats import Unary, UnaryIeee32, UnaryIeee64, UnaryImm, UnaryBool
|
||||||
|
from base.formats import Binary, BinaryImm
|
||||||
from base.formats import MultiAry, NullAry
|
from base.formats import MultiAry, NullAry
|
||||||
from base.formats import Trap, Call, CallIndirect, Store, Load
|
from base.formats import Trap, Call, CallIndirect, Store, Load
|
||||||
from base.formats import IntCompare, IntCompareImm, FloatCompare
|
from base.formats import IntCompare, IntCompareImm, FloatCompare
|
||||||
@@ -544,6 +546,24 @@ pu_iq = TailRecipe(
|
|||||||
sink.put8(imm as u64);
|
sink.put8(imm as u64);
|
||||||
''')
|
''')
|
||||||
|
|
||||||
|
# XX /n Unary with floating point 32-bit immediate equal to zero.
|
||||||
|
f32imm_z = TailRecipe(
|
||||||
|
'f32imm_z', UnaryIeee32, size=1, ins=(), outs=FPR,
|
||||||
|
instp=IsZero32BitFloat(UnaryIeee32.imm),
|
||||||
|
emit='''
|
||||||
|
PUT_OP(bits, rex2(out_reg0, out_reg0), sink);
|
||||||
|
modrm_rr(out_reg0, out_reg0, sink);
|
||||||
|
''')
|
||||||
|
|
||||||
|
# XX /n Unary with floating point 64-bit immediate equal to zero.
|
||||||
|
f64imm_z = TailRecipe(
|
||||||
|
'f64imm_z', UnaryIeee64, size=1, ins=(), outs=FPR,
|
||||||
|
instp=IsZero64BitFloat(UnaryIeee64.imm),
|
||||||
|
emit='''
|
||||||
|
PUT_OP(bits, rex2(out_reg0, out_reg0), sink);
|
||||||
|
modrm_rr(out_reg0, out_reg0, sink);
|
||||||
|
''')
|
||||||
|
|
||||||
pushq = TailRecipe(
|
pushq = TailRecipe(
|
||||||
'pushq', Unary, size=0, ins=GPR, outs=(),
|
'pushq', Unary, size=0, ins=GPR, outs=(),
|
||||||
emit='''
|
emit='''
|
||||||
|
|||||||
@@ -11,6 +11,20 @@
|
|||||||
|
|
||||||
use ir;
|
use ir;
|
||||||
|
|
||||||
|
/// Check that a 64-bit floating point value is zero.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn is_zero_64_bit_float<T: Into<ir::immediates::Ieee64>>(x: T) -> bool {
|
||||||
|
let x64 = x.into();
|
||||||
|
x64.bits() == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check that a 32-bit floating point value is zero.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn is_zero_32_bit_float<T: Into<ir::immediates::Ieee32>>(x: T) -> bool {
|
||||||
|
let x32 = x.into();
|
||||||
|
x32.bits() == 0
|
||||||
|
}
|
||||||
|
|
||||||
/// Check that `x` is the same as `y`.
|
/// Check that `x` is the same as `y`.
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub fn is_equal<T: Eq + Copy, O: Into<T> + Copy>(x: T, y: O) -> bool {
|
pub fn is_equal<T: Eq + Copy, O: Into<T> + Copy>(x: T, y: O) -> bool {
|
||||||
|
|||||||
Reference in New Issue
Block a user