From 7fb6159a85ac4ca0c424bc47850b5c1a5563d97e Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 26 Sep 2017 09:54:54 -0700 Subject: [PATCH] Add Intel encodings for the fcmp instruction. Not all floating point condition codes are directly supported by the ucimiss/ucomisd instructions. Some inequalities need to be reversed and eq+ne require two separate tests. --- .../filetests/isa/intel/binary32-float.cton | 60 +++++++++++++++++++ .../filetests/isa/intel/binary64-float.cton | 60 +++++++++++++++++++ cranelift/filetests/wasm/f32-compares.cton | 50 ++++++++++++++++ cranelift/filetests/wasm/f64-compares.cton | 50 ++++++++++++++++ lib/cretonne/meta/gen_legalizer.py | 6 +- lib/cretonne/meta/isa/intel/defs.py | 13 ++++ lib/cretonne/meta/isa/intel/encodings.py | 22 +++++-- lib/cretonne/meta/isa/intel/legalize.py | 38 +++++++++++- lib/cretonne/meta/isa/intel/recipes.py | 53 +++++++++++++++- 9 files changed, 342 insertions(+), 10 deletions(-) create mode 100644 cranelift/filetests/wasm/f32-compares.cton create mode 100644 cranelift/filetests/wasm/f64-compares.cton diff --git a/cranelift/filetests/isa/intel/binary32-float.cton b/cranelift/filetests/isa/intel/binary32-float.cton index 4fa083b3de..b0a2c83c8f 100644 --- a/cranelift/filetests/isa/intel/binary32-float.cton +++ b/cranelift/filetests/isa/intel/binary32-float.cton @@ -157,6 +157,36 @@ ebb0: ; asm: movd 1032(%esp), %xmm2 [-,%xmm2] v211 = fill v201 ; bin: 66 0f 6e 94 24 00000408 + ; Comparisons. + ; + ; Only `supported_floatccs` are tested here. Others are handled by + ; legalization paterns. + + ; asm: ucomiss %xmm2, %xmm5 + ; asm: setnp %bl + [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 0f 2e ea 0f 9b c3 + ; asm: ucomiss %xmm5, %xmm2 + ; asm: setp %bl + [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 0f 2e d5 0f 9a c3 + ; asm: ucomiss %xmm2, %xmm5 + ; asm: setne %dl + [-,%rdx] v302 = fcmp one v10, v11 ; bin: 0f 2e ea 0f 95 c2 + ; asm: ucomiss %xmm5, %xmm2 + ; asm: sete %dl + [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 0f 2e d5 0f 94 c2 + ; asm: ucomiss %xmm2, %xmm5 + ; asm: seta %bl + [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 0f 2e ea 0f 97 c3 + ; asm: ucomiss %xmm5, %xmm2 + ; asm: setae %bl + [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 0f 2e d5 0f 93 c3 + ; asm: ucomiss %xmm2, %xmm5 + ; asm: setb %dl + [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 0f 2e ea 0f 92 c2 + ; asm: ucomiss %xmm5, %xmm2 + ; asm: setbe %dl + [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 0f 2e d5 0f 96 c2 + return } @@ -302,5 +332,35 @@ ebb0: ; asm: movq 1032(%esp), %xmm2 [-,%xmm2] v211 = fill v201 ; bin: f3 0f 7e 94 24 00000408 + ; Comparisons. + ; + ; Only `supported_floatccs` are tested here. Others are handled by + ; legalization paterns. + + ; asm: ucomisd %xmm2, %xmm5 + ; asm: setnp %bl + [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 0f 2e ea 0f 9b c3 + ; asm: ucomisd %xmm5, %xmm2 + ; asm: setp %bl + [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 0f 2e d5 0f 9a c3 + ; asm: ucomisd %xmm2, %xmm5 + ; asm: setne %dl + [-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 0f 2e ea 0f 95 c2 + ; asm: ucomisd %xmm5, %xmm2 + ; asm: sete %dl + [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 0f 2e d5 0f 94 c2 + ; asm: ucomisd %xmm2, %xmm5 + ; asm: seta %bl + [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 0f 2e ea 0f 97 c3 + ; asm: ucomisd %xmm5, %xmm2 + ; asm: setae %bl + [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 0f 2e d5 0f 93 c3 + ; asm: ucomisd %xmm2, %xmm5 + ; asm: setb %dl + [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 0f 2e ea 0f 92 c2 + ; asm: ucomisd %xmm5, %xmm2 + ; asm: setbe %dl + [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 0f 2e d5 0f 96 c2 + return } diff --git a/cranelift/filetests/isa/intel/binary64-float.cton b/cranelift/filetests/isa/intel/binary64-float.cton index c8d4df232f..542a712f26 100644 --- a/cranelift/filetests/isa/intel/binary64-float.cton +++ b/cranelift/filetests/isa/intel/binary64-float.cton @@ -166,6 +166,36 @@ ebb0: ; asm: movd 1032(%rsp), %xmm10 [-,%xmm10] v211 = fill v201 ; bin: 66 44 0f 6e 94 24 00000408 + ; Comparisons. + ; + ; Only `supported_floatccs` are tested here. Others are handled by + ; legalization paterns. + + ; asm: ucomiss %xmm10, %xmm5 + ; asm: setnp %bl + [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 41 0f 2e ea 0f 9b c3 + ; asm: ucomiss %xmm5, %xmm10 + ; asm: setp %bl + [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 44 0f 2e d5 0f 9a c3 + ; asm: ucomiss %xmm10, %xmm5 + ; asm: setne %dl + [-,%rdx] v302 = fcmp one v10, v11 ; bin: 41 0f 2e ea 0f 95 c2 + ; asm: ucomiss %xmm5, %xmm10 + ; asm: sete %dl + [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 44 0f 2e d5 0f 94 c2 + ; asm: ucomiss %xmm10, %xmm5 + ; asm: seta %bl + [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 41 0f 2e ea 0f 97 c3 + ; asm: ucomiss %xmm5, %xmm10 + ; asm: setae %bl + [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 44 0f 2e d5 0f 93 c3 + ; asm: ucomiss %xmm10, %xmm5 + ; asm: setb %dl + [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 41 0f 2e ea 0f 92 c2 + ; asm: ucomiss %xmm5, %xmm10 + ; asm: setbe %dl + [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 44 0f 2e d5 0f 96 c2 + return } @@ -326,5 +356,35 @@ ebb0: ; asm: movq 1032(%rsp), %xmm10 [-,%xmm10] v211 = fill v201 ; bin: f3 44 0f 7e 94 24 00000408 + ; Comparisons. + ; + ; Only `supported_floatccs` are tested here. Others are handled by + ; legalization paterns. + + ; asm: ucomisd %xmm10, %xmm5 + ; asm: setnp %bl + [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 41 0f 2e ea 0f 9b c3 + ; asm: ucomisd %xmm5, %xmm10 + ; asm: setp %bl + [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 44 0f 2e d5 0f 9a c3 + ; asm: ucomisd %xmm10, %xmm5 + ; asm: setne %dl + [-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 41 0f 2e ea 0f 95 c2 + ; asm: ucomisd %xmm5, %xmm10 + ; asm: sete %dl + [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 44 0f 2e d5 0f 94 c2 + ; asm: ucomisd %xmm10, %xmm5 + ; asm: seta %bl + [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 41 0f 2e ea 0f 97 c3 + ; asm: ucomisd %xmm5, %xmm10 + ; asm: setae %bl + [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 44 0f 2e d5 0f 93 c3 + ; asm: ucomisd %xmm10, %xmm5 + ; asm: setb %dl + [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 41 0f 2e ea 0f 92 c2 + ; asm: ucomisd %xmm5, %xmm10 + ; asm: setbe %dl + [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 44 0f 2e d5 0f 96 c2 + return } diff --git a/cranelift/filetests/wasm/f32-compares.cton b/cranelift/filetests/wasm/f32-compares.cton new file mode 100644 index 0000000000..560b86ebcb --- /dev/null +++ b/cranelift/filetests/wasm/f32-compares.cton @@ -0,0 +1,50 @@ +; Test code generation for WebAssembly f32 comparison operators. +test compile + +set is_64bit=0 +isa intel haswell + +set is_64bit=1 +isa intel haswell + +function %f32_eq(f32, f32) -> i32 { +ebb0(v0: f32, v1: f32): + v2 = fcmp eq v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f32_ne(f32, f32) -> i32 { +ebb0(v0: f32, v1: f32): + v2 = fcmp ne v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f32_lt(f32, f32) -> i32 { +ebb0(v0: f32, v1: f32): + v2 = fcmp lt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f32_gt(f32, f32) -> i32 { +ebb0(v0: f32, v1: f32): + v2 = fcmp gt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f32_le(f32, f32) -> i32 { +ebb0(v0: f32, v1: f32): + v2 = fcmp le v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f32_ge(f32, f32) -> i32 { +ebb0(v0: f32, v1: f32): + v2 = fcmp ge v0, v1 + v3 = bint.i32 v2 + return v3 +} diff --git a/cranelift/filetests/wasm/f64-compares.cton b/cranelift/filetests/wasm/f64-compares.cton new file mode 100644 index 0000000000..78a260ef27 --- /dev/null +++ b/cranelift/filetests/wasm/f64-compares.cton @@ -0,0 +1,50 @@ +; Test code generation for WebAssembly f64 comparison operators. +test compile + +set is_64bit=0 +isa intel haswell + +set is_64bit=1 +isa intel haswell + +function %f64_eq(f64, f64) -> i32 { +ebb0(v0: f64, v1: f64): + v2 = fcmp eq v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f64_ne(f64, f64) -> i32 { +ebb0(v0: f64, v1: f64): + v2 = fcmp ne v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f64_lt(f64, f64) -> i32 { +ebb0(v0: f64, v1: f64): + v2 = fcmp lt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f64_gt(f64, f64) -> i32 { +ebb0(v0: f64, v1: f64): + v2 = fcmp gt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f64_le(f64, f64) -> i32 { +ebb0(v0: f64, v1: f64): + v2 = fcmp le v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f64_ge(f64, f64) -> i32 { +ebb0(v0: f64, v1: f64): + v2 = fcmp ge v0, v1 + v3 = bint.i32 v2 + return v3 +} diff --git a/lib/cretonne/meta/gen_legalizer.py b/lib/cretonne/meta/gen_legalizer.py index 0d8407bb30..1b39918658 100644 --- a/lib/cretonne/meta/gen_legalizer.py +++ b/lib/cretonne/meta/gen_legalizer.py @@ -169,10 +169,12 @@ def unwrap_inst(iref, node, fmt): iform = expr.inst.format nvops = iform.num_value_operands - # The tuple of locals we're extracting is `expr.args`. + # The tuple of locals to extract is the `Var` instances in `expr.args`. + arg_names = tuple( + arg.name if isinstance(arg, Var) else '_' for arg in expr.args) with fmt.indented( 'let ({}, predicate) = if let ir::InstructionData::{} {{' - .format(', '.join(map(str, expr.args)), iform.name), '};'): + .format(', '.join(map(str, arg_names)), iform.name), '};'): # Fields are encoded directly. for f in iform.imm_fields: fmt.line('{},'.format(f.member)) diff --git a/lib/cretonne/meta/isa/intel/defs.py b/lib/cretonne/meta/isa/intel/defs.py index ad13741ebc..d5bb0b5a1f 100644 --- a/lib/cretonne/meta/isa/intel/defs.py +++ b/lib/cretonne/meta/isa/intel/defs.py @@ -7,9 +7,22 @@ from __future__ import absolute_import from cdsl.isa import TargetISA, CPUMode import base.instructions from . import instructions as x86 +from base.immediates import floatcc ISA = TargetISA('intel', [base.instructions.GROUP, x86.GROUP]) # CPU modes for 32-bit and 64-bit operation. I64 = CPUMode('I64', ISA) I32 = CPUMode('I32', ISA) + +# The set of floating point condition codes that are directly supported. +# Other condition codes need to be reversed or expressed as two tests. +supported_floatccs = [ + floatcc.ord, + floatcc.uno, + floatcc.one, + floatcc.ueq, + floatcc.gt, + floatcc.ge, + floatcc.ult, + floatcc.ule] diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index 6d46111d7f..c7f9a7c0b6 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -26,8 +26,8 @@ I32.legalize_type( default=narrow, b1=expand, i32=intel_expand, - f32=expand, - f64=expand) + f32=intel_expand, + f64=intel_expand) I64.legalize_monomorphic(expand) I64.legalize_type( @@ -35,8 +35,8 @@ I64.legalize_type( b1=expand, i32=intel_expand, i64=intel_expand, - f32=expand, - f64=expand) + f32=intel_expand, + f64=intel_expand) # @@ -106,6 +106,13 @@ for inst, opc in [ (base.bxor, 0x31)]: enc_i32_i64(inst, r.rr, opc) +# Also add a `b1` encodings for the logic instructions. +# TODO: Should this be done with 8-bit instructions? It would improve +# partial register dependencies. +enc_flt(base.band.b1, r.rr, 0x21) +enc_flt(base.bor.b1, r.rr, 0x09) +enc_flt(base.bxor.b1, r.rr, 0x31) + enc_i32_i64(base.imul, r.rrx, 0x0f, 0xaf) enc_i32_i64(x86.sdivmodx, r.div, 0xf7, rrr=7) enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6) @@ -391,3 +398,10 @@ for inst, opc in [ (base.bxor, 0x57)]: enc_flt(inst.f32, r.frm, 0x0f, opc) enc_flt(inst.f64, r.frm, 0x0f, opc) + +# Comparisons. +# +# This only covers the condition codes in `supported_floatccs`, the rest are +# handled by legalization patterns. +enc_flt(base.fcmp.f32, r.fcscc, 0x0f, 0x2e) +enc_flt(base.fcmp.f64, r.fcscc, 0x66, 0x0f, 0x2e) diff --git a/lib/cretonne/meta/isa/intel/legalize.py b/lib/cretonne/meta/isa/intel/legalize.py index cc46846d81..6125dcd0e4 100644 --- a/lib/cretonne/meta/isa/intel/legalize.py +++ b/lib/cretonne/meta/isa/intel/legalize.py @@ -4,7 +4,7 @@ Custom legalization patterns for Intel. from __future__ import absolute_import from cdsl.ast import Var from cdsl.xform import Rtl, XFormGroup -from base.immediates import imm64 +from base.immediates import imm64, floatcc from base.types import i32, i64 from base import legalize as shared from base import instructions as insts @@ -25,6 +25,8 @@ dead = Var('dead') x = Var('x') xhi = Var('xhi') y = Var('y') +a1 = Var('a1') +a2 = Var('a2') # # Division and remainder. @@ -56,3 +58,37 @@ for ty in [i32, i64]: xhi << insts.sshr_imm(x, imm64(ty.lane_bits() - 1)), (dead, a) << x86.sdivmodx(x, xhi, y) )) + +# Floating point condition codes. +# +# The 8 condition codes in `supported_floatccs` are directly supported by a +# `ucomiss` or `ucomisd` instruction. The remaining codes need legalization +# patterns. + +# Equality needs an explicit `ord` test which checks the parity bit. +intel_expand.legalize( + a << insts.fcmp(floatcc.eq, x, y), + Rtl( + a1 << insts.fcmp(floatcc.ord, x, y), + a2 << insts.fcmp(floatcc.ueq, x, y), + a << insts.band(a1, a2) + )) +intel_expand.legalize( + a << insts.fcmp(floatcc.ne, x, y), + Rtl( + a1 << insts.fcmp(floatcc.uno, x, y), + a2 << insts.fcmp(floatcc.one, x, y), + a << insts.bor(a1, a2) + )) + +# Inequalities that need to be reversed. +for cc, rev_cc in [ + (floatcc.lt, floatcc.gt), + (floatcc.le, floatcc.ge), + (floatcc.ugt, floatcc.ult), + (floatcc.uge, floatcc.ule)]: + intel_expand.legalize( + a << insts.fcmp(cc, x, y), + Rtl( + a << insts.fcmp(rev_cc, y, x) + )) diff --git a/lib/cretonne/meta/isa/intel/recipes.py b/lib/cretonne/meta/isa/intel/recipes.py index 04c7c91891..74391b0746 100644 --- a/lib/cretonne/meta/isa/intel/recipes.py +++ b/lib/cretonne/meta/isa/intel/recipes.py @@ -3,12 +3,13 @@ Intel Encoding recipes. """ from __future__ import absolute_import from cdsl.isa import EncRecipe -from cdsl.predicates import IsSignedInt, IsEqual +from cdsl.predicates import IsSignedInt, IsEqual, Or from base.formats import Unary, UnaryImm, Binary, BinaryImm, MultiAry from base.formats import Trap, Call, IndirectCall, Store, Load -from base.formats import IntCompare +from base.formats import IntCompare, FloatCompare from base.formats import RegMove, Ternary, Jump, Branch, FuncAddr from .registers import GPR, ABCD, FPR, GPR8, FPR8, StackGPR32, StackFPR32 +from .defs import supported_floatccs try: from typing import Tuple, Dict, Sequence # noqa @@ -696,7 +697,7 @@ t8jccb_abcd = TailRecipe( # This bandaid macro doesn't support a REX prefix for the final `setCC` # instruction, so it is limited to the `ABCD` register class for booleans. icscc = TailRecipe( - 'cscc', IntCompare, size=1 + 3, ins=(GPR, GPR), outs=ABCD, + 'icscc', IntCompare, size=1 + 3, ins=(GPR, GPR), outs=ABCD, emit=''' // Comparison instruction. PUT_OP(bits, rex2(in_reg0, in_reg1), sink); @@ -719,3 +720,49 @@ icscc = TailRecipe( sink.put1(setcc); modrm_rr(out_reg0, 0, sink); ''') + + +# Make a FloatCompare instruction predicate with the supported condition codes. + +# Same thing for floating point. +# +# The ucomiss/ucomisd instructions set the EFLAGS bits CF/PF/CF like this: +# +# ZPC OSA +# UN 111 000 +# GT 000 000 +# LT 001 000 +# EQ 100 000 +# +# Not all floating point condition codes are supported. +fcscc = TailRecipe( + 'fcscc', FloatCompare, size=1 + 3, ins=(FPR, FPR), outs=ABCD, + instp=Or(*(IsEqual(FloatCompare.cond, cc) + for cc in supported_floatccs)), + emit=''' + // Comparison instruction. + PUT_OP(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + // `setCC` instruction, no REX. + use ir::condcodes::FloatCC::*; + let setcc = match cond { + Ordered => 0x9b, // EQ|LT|GT => setnp (P=0) + Unordered => 0x9a, // UN => setp (P=1) + OrderedNotEqual => 0x95, // LT|GT => setne (Z=0), + UnorderedOrEqual => 0x94, // UN|EQ => sete (Z=1) + GreaterThan => 0x97, // GT => seta (C=0&Z=0) + GreaterThanOrEqual => 0x93, // GT|EQ => setae (C=0) + UnorderedOrLessThan => 0x92, // UN|LT => setb (C=1) + UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1) + Equal | // EQ + NotEqual | // UN|LT|GT + LessThan | // LT + LessThanOrEqual | // LT|EQ + UnorderedOrGreaterThan | // UN|GT + UnorderedOrGreaterThanOrEqual // UN|GT|EQ + => panic!("{} not supported by fcscc", cond), + }; + sink.put1(0x0f); + sink.put1(setcc); + modrm_rr(out_reg0, 0, sink); + ''')