Add Intel encodings for the fcmp instruction.
Not all floating point condition codes are directly supported by the ucimiss/ucomisd instructions. Some inequalities need to be reversed and eq+ne require two separate tests.
This commit is contained in:
@@ -157,6 +157,36 @@ ebb0:
|
||||
; asm: movd 1032(%esp), %xmm2
|
||||
[-,%xmm2] v211 = fill v201 ; bin: 66 0f 6e 94 24 00000408
|
||||
|
||||
; Comparisons.
|
||||
;
|
||||
; Only `supported_floatccs` are tested here. Others are handled by
|
||||
; legalization paterns.
|
||||
|
||||
; asm: ucomiss %xmm2, %xmm5
|
||||
; asm: setnp %bl
|
||||
[-,%rbx] v300 = fcmp ord v10, v11 ; bin: 0f 2e ea 0f 9b c3
|
||||
; asm: ucomiss %xmm5, %xmm2
|
||||
; asm: setp %bl
|
||||
[-,%rbx] v301 = fcmp uno v11, v10 ; bin: 0f 2e d5 0f 9a c3
|
||||
; asm: ucomiss %xmm2, %xmm5
|
||||
; asm: setne %dl
|
||||
[-,%rdx] v302 = fcmp one v10, v11 ; bin: 0f 2e ea 0f 95 c2
|
||||
; asm: ucomiss %xmm5, %xmm2
|
||||
; asm: sete %dl
|
||||
[-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 0f 2e d5 0f 94 c2
|
||||
; asm: ucomiss %xmm2, %xmm5
|
||||
; asm: seta %bl
|
||||
[-,%rbx] v304 = fcmp gt v10, v11 ; bin: 0f 2e ea 0f 97 c3
|
||||
; asm: ucomiss %xmm5, %xmm2
|
||||
; asm: setae %bl
|
||||
[-,%rbx] v305 = fcmp ge v11, v10 ; bin: 0f 2e d5 0f 93 c3
|
||||
; asm: ucomiss %xmm2, %xmm5
|
||||
; asm: setb %dl
|
||||
[-,%rdx] v306 = fcmp ult v10, v11 ; bin: 0f 2e ea 0f 92 c2
|
||||
; asm: ucomiss %xmm5, %xmm2
|
||||
; asm: setbe %dl
|
||||
[-,%rdx] v307 = fcmp ule v11, v10 ; bin: 0f 2e d5 0f 96 c2
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -302,5 +332,35 @@ ebb0:
|
||||
; asm: movq 1032(%esp), %xmm2
|
||||
[-,%xmm2] v211 = fill v201 ; bin: f3 0f 7e 94 24 00000408
|
||||
|
||||
; Comparisons.
|
||||
;
|
||||
; Only `supported_floatccs` are tested here. Others are handled by
|
||||
; legalization paterns.
|
||||
|
||||
; asm: ucomisd %xmm2, %xmm5
|
||||
; asm: setnp %bl
|
||||
[-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 0f 2e ea 0f 9b c3
|
||||
; asm: ucomisd %xmm5, %xmm2
|
||||
; asm: setp %bl
|
||||
[-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 0f 2e d5 0f 9a c3
|
||||
; asm: ucomisd %xmm2, %xmm5
|
||||
; asm: setne %dl
|
||||
[-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 0f 2e ea 0f 95 c2
|
||||
; asm: ucomisd %xmm5, %xmm2
|
||||
; asm: sete %dl
|
||||
[-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 0f 2e d5 0f 94 c2
|
||||
; asm: ucomisd %xmm2, %xmm5
|
||||
; asm: seta %bl
|
||||
[-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 0f 2e ea 0f 97 c3
|
||||
; asm: ucomisd %xmm5, %xmm2
|
||||
; asm: setae %bl
|
||||
[-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 0f 2e d5 0f 93 c3
|
||||
; asm: ucomisd %xmm2, %xmm5
|
||||
; asm: setb %dl
|
||||
[-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 0f 2e ea 0f 92 c2
|
||||
; asm: ucomisd %xmm5, %xmm2
|
||||
; asm: setbe %dl
|
||||
[-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 0f 2e d5 0f 96 c2
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -166,6 +166,36 @@ ebb0:
|
||||
; asm: movd 1032(%rsp), %xmm10
|
||||
[-,%xmm10] v211 = fill v201 ; bin: 66 44 0f 6e 94 24 00000408
|
||||
|
||||
; Comparisons.
|
||||
;
|
||||
; Only `supported_floatccs` are tested here. Others are handled by
|
||||
; legalization paterns.
|
||||
|
||||
; asm: ucomiss %xmm10, %xmm5
|
||||
; asm: setnp %bl
|
||||
[-,%rbx] v300 = fcmp ord v10, v11 ; bin: 41 0f 2e ea 0f 9b c3
|
||||
; asm: ucomiss %xmm5, %xmm10
|
||||
; asm: setp %bl
|
||||
[-,%rbx] v301 = fcmp uno v11, v10 ; bin: 44 0f 2e d5 0f 9a c3
|
||||
; asm: ucomiss %xmm10, %xmm5
|
||||
; asm: setne %dl
|
||||
[-,%rdx] v302 = fcmp one v10, v11 ; bin: 41 0f 2e ea 0f 95 c2
|
||||
; asm: ucomiss %xmm5, %xmm10
|
||||
; asm: sete %dl
|
||||
[-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 44 0f 2e d5 0f 94 c2
|
||||
; asm: ucomiss %xmm10, %xmm5
|
||||
; asm: seta %bl
|
||||
[-,%rbx] v304 = fcmp gt v10, v11 ; bin: 41 0f 2e ea 0f 97 c3
|
||||
; asm: ucomiss %xmm5, %xmm10
|
||||
; asm: setae %bl
|
||||
[-,%rbx] v305 = fcmp ge v11, v10 ; bin: 44 0f 2e d5 0f 93 c3
|
||||
; asm: ucomiss %xmm10, %xmm5
|
||||
; asm: setb %dl
|
||||
[-,%rdx] v306 = fcmp ult v10, v11 ; bin: 41 0f 2e ea 0f 92 c2
|
||||
; asm: ucomiss %xmm5, %xmm10
|
||||
; asm: setbe %dl
|
||||
[-,%rdx] v307 = fcmp ule v11, v10 ; bin: 44 0f 2e d5 0f 96 c2
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -326,5 +356,35 @@ ebb0:
|
||||
; asm: movq 1032(%rsp), %xmm10
|
||||
[-,%xmm10] v211 = fill v201 ; bin: f3 44 0f 7e 94 24 00000408
|
||||
|
||||
; Comparisons.
|
||||
;
|
||||
; Only `supported_floatccs` are tested here. Others are handled by
|
||||
; legalization paterns.
|
||||
|
||||
; asm: ucomisd %xmm10, %xmm5
|
||||
; asm: setnp %bl
|
||||
[-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 41 0f 2e ea 0f 9b c3
|
||||
; asm: ucomisd %xmm5, %xmm10
|
||||
; asm: setp %bl
|
||||
[-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 44 0f 2e d5 0f 9a c3
|
||||
; asm: ucomisd %xmm10, %xmm5
|
||||
; asm: setne %dl
|
||||
[-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 41 0f 2e ea 0f 95 c2
|
||||
; asm: ucomisd %xmm5, %xmm10
|
||||
; asm: sete %dl
|
||||
[-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 44 0f 2e d5 0f 94 c2
|
||||
; asm: ucomisd %xmm10, %xmm5
|
||||
; asm: seta %bl
|
||||
[-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 41 0f 2e ea 0f 97 c3
|
||||
; asm: ucomisd %xmm5, %xmm10
|
||||
; asm: setae %bl
|
||||
[-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 44 0f 2e d5 0f 93 c3
|
||||
; asm: ucomisd %xmm10, %xmm5
|
||||
; asm: setb %dl
|
||||
[-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 41 0f 2e ea 0f 92 c2
|
||||
; asm: ucomisd %xmm5, %xmm10
|
||||
; asm: setbe %dl
|
||||
[-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 44 0f 2e d5 0f 96 c2
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
50
cranelift/filetests/wasm/f32-compares.cton
Normal file
50
cranelift/filetests/wasm/f32-compares.cton
Normal file
@@ -0,0 +1,50 @@
|
||||
; Test code generation for WebAssembly f32 comparison operators.
|
||||
test compile
|
||||
|
||||
set is_64bit=0
|
||||
isa intel haswell
|
||||
|
||||
set is_64bit=1
|
||||
isa intel haswell
|
||||
|
||||
function %f32_eq(f32, f32) -> i32 {
|
||||
ebb0(v0: f32, v1: f32):
|
||||
v2 = fcmp eq v0, v1
|
||||
v3 = bint.i32 v2
|
||||
return v3
|
||||
}
|
||||
|
||||
function %f32_ne(f32, f32) -> i32 {
|
||||
ebb0(v0: f32, v1: f32):
|
||||
v2 = fcmp ne v0, v1
|
||||
v3 = bint.i32 v2
|
||||
return v3
|
||||
}
|
||||
|
||||
function %f32_lt(f32, f32) -> i32 {
|
||||
ebb0(v0: f32, v1: f32):
|
||||
v2 = fcmp lt v0, v1
|
||||
v3 = bint.i32 v2
|
||||
return v3
|
||||
}
|
||||
|
||||
function %f32_gt(f32, f32) -> i32 {
|
||||
ebb0(v0: f32, v1: f32):
|
||||
v2 = fcmp gt v0, v1
|
||||
v3 = bint.i32 v2
|
||||
return v3
|
||||
}
|
||||
|
||||
function %f32_le(f32, f32) -> i32 {
|
||||
ebb0(v0: f32, v1: f32):
|
||||
v2 = fcmp le v0, v1
|
||||
v3 = bint.i32 v2
|
||||
return v3
|
||||
}
|
||||
|
||||
function %f32_ge(f32, f32) -> i32 {
|
||||
ebb0(v0: f32, v1: f32):
|
||||
v2 = fcmp ge v0, v1
|
||||
v3 = bint.i32 v2
|
||||
return v3
|
||||
}
|
||||
50
cranelift/filetests/wasm/f64-compares.cton
Normal file
50
cranelift/filetests/wasm/f64-compares.cton
Normal file
@@ -0,0 +1,50 @@
|
||||
; Test code generation for WebAssembly f64 comparison operators.
|
||||
test compile
|
||||
|
||||
set is_64bit=0
|
||||
isa intel haswell
|
||||
|
||||
set is_64bit=1
|
||||
isa intel haswell
|
||||
|
||||
function %f64_eq(f64, f64) -> i32 {
|
||||
ebb0(v0: f64, v1: f64):
|
||||
v2 = fcmp eq v0, v1
|
||||
v3 = bint.i32 v2
|
||||
return v3
|
||||
}
|
||||
|
||||
function %f64_ne(f64, f64) -> i32 {
|
||||
ebb0(v0: f64, v1: f64):
|
||||
v2 = fcmp ne v0, v1
|
||||
v3 = bint.i32 v2
|
||||
return v3
|
||||
}
|
||||
|
||||
function %f64_lt(f64, f64) -> i32 {
|
||||
ebb0(v0: f64, v1: f64):
|
||||
v2 = fcmp lt v0, v1
|
||||
v3 = bint.i32 v2
|
||||
return v3
|
||||
}
|
||||
|
||||
function %f64_gt(f64, f64) -> i32 {
|
||||
ebb0(v0: f64, v1: f64):
|
||||
v2 = fcmp gt v0, v1
|
||||
v3 = bint.i32 v2
|
||||
return v3
|
||||
}
|
||||
|
||||
function %f64_le(f64, f64) -> i32 {
|
||||
ebb0(v0: f64, v1: f64):
|
||||
v2 = fcmp le v0, v1
|
||||
v3 = bint.i32 v2
|
||||
return v3
|
||||
}
|
||||
|
||||
function %f64_ge(f64, f64) -> i32 {
|
||||
ebb0(v0: f64, v1: f64):
|
||||
v2 = fcmp ge v0, v1
|
||||
v3 = bint.i32 v2
|
||||
return v3
|
||||
}
|
||||
@@ -169,10 +169,12 @@ def unwrap_inst(iref, node, fmt):
|
||||
iform = expr.inst.format
|
||||
nvops = iform.num_value_operands
|
||||
|
||||
# The tuple of locals we're extracting is `expr.args`.
|
||||
# The tuple of locals to extract is the `Var` instances in `expr.args`.
|
||||
arg_names = tuple(
|
||||
arg.name if isinstance(arg, Var) else '_' for arg in expr.args)
|
||||
with fmt.indented(
|
||||
'let ({}, predicate) = if let ir::InstructionData::{} {{'
|
||||
.format(', '.join(map(str, expr.args)), iform.name), '};'):
|
||||
.format(', '.join(map(str, arg_names)), iform.name), '};'):
|
||||
# Fields are encoded directly.
|
||||
for f in iform.imm_fields:
|
||||
fmt.line('{},'.format(f.member))
|
||||
|
||||
@@ -7,9 +7,22 @@ from __future__ import absolute_import
|
||||
from cdsl.isa import TargetISA, CPUMode
|
||||
import base.instructions
|
||||
from . import instructions as x86
|
||||
from base.immediates import floatcc
|
||||
|
||||
ISA = TargetISA('intel', [base.instructions.GROUP, x86.GROUP])
|
||||
|
||||
# CPU modes for 32-bit and 64-bit operation.
|
||||
I64 = CPUMode('I64', ISA)
|
||||
I32 = CPUMode('I32', ISA)
|
||||
|
||||
# The set of floating point condition codes that are directly supported.
|
||||
# Other condition codes need to be reversed or expressed as two tests.
|
||||
supported_floatccs = [
|
||||
floatcc.ord,
|
||||
floatcc.uno,
|
||||
floatcc.one,
|
||||
floatcc.ueq,
|
||||
floatcc.gt,
|
||||
floatcc.ge,
|
||||
floatcc.ult,
|
||||
floatcc.ule]
|
||||
|
||||
@@ -26,8 +26,8 @@ I32.legalize_type(
|
||||
default=narrow,
|
||||
b1=expand,
|
||||
i32=intel_expand,
|
||||
f32=expand,
|
||||
f64=expand)
|
||||
f32=intel_expand,
|
||||
f64=intel_expand)
|
||||
|
||||
I64.legalize_monomorphic(expand)
|
||||
I64.legalize_type(
|
||||
@@ -35,8 +35,8 @@ I64.legalize_type(
|
||||
b1=expand,
|
||||
i32=intel_expand,
|
||||
i64=intel_expand,
|
||||
f32=expand,
|
||||
f64=expand)
|
||||
f32=intel_expand,
|
||||
f64=intel_expand)
|
||||
|
||||
|
||||
#
|
||||
@@ -106,6 +106,13 @@ for inst, opc in [
|
||||
(base.bxor, 0x31)]:
|
||||
enc_i32_i64(inst, r.rr, opc)
|
||||
|
||||
# Also add a `b1` encodings for the logic instructions.
|
||||
# TODO: Should this be done with 8-bit instructions? It would improve
|
||||
# partial register dependencies.
|
||||
enc_flt(base.band.b1, r.rr, 0x21)
|
||||
enc_flt(base.bor.b1, r.rr, 0x09)
|
||||
enc_flt(base.bxor.b1, r.rr, 0x31)
|
||||
|
||||
enc_i32_i64(base.imul, r.rrx, 0x0f, 0xaf)
|
||||
enc_i32_i64(x86.sdivmodx, r.div, 0xf7, rrr=7)
|
||||
enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6)
|
||||
@@ -391,3 +398,10 @@ for inst, opc in [
|
||||
(base.bxor, 0x57)]:
|
||||
enc_flt(inst.f32, r.frm, 0x0f, opc)
|
||||
enc_flt(inst.f64, r.frm, 0x0f, opc)
|
||||
|
||||
# Comparisons.
|
||||
#
|
||||
# This only covers the condition codes in `supported_floatccs`, the rest are
|
||||
# handled by legalization patterns.
|
||||
enc_flt(base.fcmp.f32, r.fcscc, 0x0f, 0x2e)
|
||||
enc_flt(base.fcmp.f64, r.fcscc, 0x66, 0x0f, 0x2e)
|
||||
|
||||
@@ -4,7 +4,7 @@ Custom legalization patterns for Intel.
|
||||
from __future__ import absolute_import
|
||||
from cdsl.ast import Var
|
||||
from cdsl.xform import Rtl, XFormGroup
|
||||
from base.immediates import imm64
|
||||
from base.immediates import imm64, floatcc
|
||||
from base.types import i32, i64
|
||||
from base import legalize as shared
|
||||
from base import instructions as insts
|
||||
@@ -25,6 +25,8 @@ dead = Var('dead')
|
||||
x = Var('x')
|
||||
xhi = Var('xhi')
|
||||
y = Var('y')
|
||||
a1 = Var('a1')
|
||||
a2 = Var('a2')
|
||||
|
||||
#
|
||||
# Division and remainder.
|
||||
@@ -56,3 +58,37 @@ for ty in [i32, i64]:
|
||||
xhi << insts.sshr_imm(x, imm64(ty.lane_bits() - 1)),
|
||||
(dead, a) << x86.sdivmodx(x, xhi, y)
|
||||
))
|
||||
|
||||
# Floating point condition codes.
|
||||
#
|
||||
# The 8 condition codes in `supported_floatccs` are directly supported by a
|
||||
# `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
|
||||
# patterns.
|
||||
|
||||
# Equality needs an explicit `ord` test which checks the parity bit.
|
||||
intel_expand.legalize(
|
||||
a << insts.fcmp(floatcc.eq, x, y),
|
||||
Rtl(
|
||||
a1 << insts.fcmp(floatcc.ord, x, y),
|
||||
a2 << insts.fcmp(floatcc.ueq, x, y),
|
||||
a << insts.band(a1, a2)
|
||||
))
|
||||
intel_expand.legalize(
|
||||
a << insts.fcmp(floatcc.ne, x, y),
|
||||
Rtl(
|
||||
a1 << insts.fcmp(floatcc.uno, x, y),
|
||||
a2 << insts.fcmp(floatcc.one, x, y),
|
||||
a << insts.bor(a1, a2)
|
||||
))
|
||||
|
||||
# Inequalities that need to be reversed.
|
||||
for cc, rev_cc in [
|
||||
(floatcc.lt, floatcc.gt),
|
||||
(floatcc.le, floatcc.ge),
|
||||
(floatcc.ugt, floatcc.ult),
|
||||
(floatcc.uge, floatcc.ule)]:
|
||||
intel_expand.legalize(
|
||||
a << insts.fcmp(cc, x, y),
|
||||
Rtl(
|
||||
a << insts.fcmp(rev_cc, y, x)
|
||||
))
|
||||
|
||||
@@ -3,12 +3,13 @@ Intel Encoding recipes.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.isa import EncRecipe
|
||||
from cdsl.predicates import IsSignedInt, IsEqual
|
||||
from cdsl.predicates import IsSignedInt, IsEqual, Or
|
||||
from base.formats import Unary, UnaryImm, Binary, BinaryImm, MultiAry
|
||||
from base.formats import Trap, Call, IndirectCall, Store, Load
|
||||
from base.formats import IntCompare
|
||||
from base.formats import IntCompare, FloatCompare
|
||||
from base.formats import RegMove, Ternary, Jump, Branch, FuncAddr
|
||||
from .registers import GPR, ABCD, FPR, GPR8, FPR8, StackGPR32, StackFPR32
|
||||
from .defs import supported_floatccs
|
||||
|
||||
try:
|
||||
from typing import Tuple, Dict, Sequence # noqa
|
||||
@@ -696,7 +697,7 @@ t8jccb_abcd = TailRecipe(
|
||||
# This bandaid macro doesn't support a REX prefix for the final `setCC`
|
||||
# instruction, so it is limited to the `ABCD` register class for booleans.
|
||||
icscc = TailRecipe(
|
||||
'cscc', IntCompare, size=1 + 3, ins=(GPR, GPR), outs=ABCD,
|
||||
'icscc', IntCompare, size=1 + 3, ins=(GPR, GPR), outs=ABCD,
|
||||
emit='''
|
||||
// Comparison instruction.
|
||||
PUT_OP(bits, rex2(in_reg0, in_reg1), sink);
|
||||
@@ -719,3 +720,49 @@ icscc = TailRecipe(
|
||||
sink.put1(setcc);
|
||||
modrm_rr(out_reg0, 0, sink);
|
||||
''')
|
||||
|
||||
|
||||
# Make a FloatCompare instruction predicate with the supported condition codes.
|
||||
|
||||
# Same thing for floating point.
|
||||
#
|
||||
# The ucomiss/ucomisd instructions set the EFLAGS bits CF/PF/CF like this:
|
||||
#
|
||||
# ZPC OSA
|
||||
# UN 111 000
|
||||
# GT 000 000
|
||||
# LT 001 000
|
||||
# EQ 100 000
|
||||
#
|
||||
# Not all floating point condition codes are supported.
|
||||
fcscc = TailRecipe(
|
||||
'fcscc', FloatCompare, size=1 + 3, ins=(FPR, FPR), outs=ABCD,
|
||||
instp=Or(*(IsEqual(FloatCompare.cond, cc)
|
||||
for cc in supported_floatccs)),
|
||||
emit='''
|
||||
// Comparison instruction.
|
||||
PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
|
||||
modrm_rr(in_reg1, in_reg0, sink);
|
||||
// `setCC` instruction, no REX.
|
||||
use ir::condcodes::FloatCC::*;
|
||||
let setcc = match cond {
|
||||
Ordered => 0x9b, // EQ|LT|GT => setnp (P=0)
|
||||
Unordered => 0x9a, // UN => setp (P=1)
|
||||
OrderedNotEqual => 0x95, // LT|GT => setne (Z=0),
|
||||
UnorderedOrEqual => 0x94, // UN|EQ => sete (Z=1)
|
||||
GreaterThan => 0x97, // GT => seta (C=0&Z=0)
|
||||
GreaterThanOrEqual => 0x93, // GT|EQ => setae (C=0)
|
||||
UnorderedOrLessThan => 0x92, // UN|LT => setb (C=1)
|
||||
UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1)
|
||||
Equal | // EQ
|
||||
NotEqual | // UN|LT|GT
|
||||
LessThan | // LT
|
||||
LessThanOrEqual | // LT|EQ
|
||||
UnorderedOrGreaterThan | // UN|GT
|
||||
UnorderedOrGreaterThanOrEqual // UN|GT|EQ
|
||||
=> panic!("{} not supported by fcscc", cond),
|
||||
};
|
||||
sink.put1(0x0f);
|
||||
sink.put1(setcc);
|
||||
modrm_rr(out_reg0, 0, sink);
|
||||
''')
|
||||
|
||||
Reference in New Issue
Block a user