Add an Intel-specific x86_cvtt2si instruction.

This is used to represent the non-trapping semantics of the cvttss2si and cvttsd2si instructions (and their vectorized counterparts). The overflow behavior of this instruction is specific to the Intel ISAs. There is no float-to-i64 instruction on the 32-bit Intel ISA.
2017-09-26 15:35:32 -07:00
parent d13f29cfe4
commit ac69f3bfdf
5 changed files with 77 additions and 0 deletions
--- a/cranelift/filetests/isa/intel/binary32-float.cton
+++ b/cranelift/filetests/isa/intel/binary32-float.cton
@@ -87,6 +87,13 @@ ebb0:
    ; asm: xorps %xmm5, %xmm2
    [-,%xmm2]           v37 = bxor v11, v10                     ; bin: 0f 57 d5

+    ; Convert float to int. (No i64 dest on i386).
+
+    ; asm: cvttss2si %xmm5, %ecx
+    [-,%rcx]            v40 = x86_cvtt2si.i32 v10               ; bin: f3 0f 2c cd
+    ; asm: cvttss2si %xmm2, %esi
+    [-,%rsi]            v41 = x86_cvtt2si.i32 v11               ; bin: f3 0f 2c f2
+
    ; Unary arithmetic.

    ; asm: sqrtss %xmm5, %xmm2
@@ -267,6 +274,13 @@ ebb0:
    ; asm: xorps %xmm5, %xmm2
    [-,%xmm2]           v37 = bxor v11, v10                     ; bin: 0f 57 d5

+    ; Convert float to int. (No i64 dest on i386).
+
+    ; asm: cvttsd2si %xmm5, %ecx
+    [-,%rcx]            v40 = x86_cvtt2si.i32 v10               ; bin: f2 0f 2c cd
+    ; asm: cvttsd2si %xmm2, %esi
+    [-,%rsi]            v41 = x86_cvtt2si.i32 v11               ; bin: f2 0f 2c f2
+
    ; Unary arithmetic.

    ; asm: sqrtsd %xmm5, %xmm2
--- a/cranelift/filetests/isa/intel/binary64-float.cton
+++ b/cranelift/filetests/isa/intel/binary64-float.cton
@@ -96,6 +96,18 @@ ebb0:
    ; asm: movaps %xmm5, %xmm10
    [-,%xmm10]          v39 = copy v10                          ; bin: 44 0f 28 d5

+    ; Convert float to int.
+
+    ; asm: cvttss2si %xmm5, %ecx
+    [-,%rcx]            v40 = x86_cvtt2si.i32 v10               ; bin: f3 0f 2c cd
+    ; asm: cvttss2si %xmm10, %esi
+    [-,%rsi]            v41 = x86_cvtt2si.i32 v11               ; bin: f3 41 0f 2c f2
+
+    ; asm: cvttss2si %xmm5, %rcx
+    [-,%rcx]            v42 = x86_cvtt2si.i64 v10               ; bin: f3 48 0f 2c cd
+    ; asm: cvttss2si %xmm10, %rsi
+    [-,%rsi]            v43 = x86_cvtt2si.i64 v11               ; bin: f3 49 0f 2c f2
+
    ; Unary arithmetic.

    ; asm: sqrtss %xmm5, %xmm10
@@ -291,6 +303,18 @@ ebb0:
    ; asm: movaps %xmm5, %xmm10
    [-,%xmm10]          v39 = copy v10                          ; bin: 44 0f 28 d5

+    ; Convert float to int.
+
+    ; asm: cvttsd2si %xmm5, %ecx
+    [-,%rcx]            v40 = x86_cvtt2si.i32 v10               ; bin: f2 0f 2c cd
+    ; asm: cvttsd2si %xmm10, %esi
+    [-,%rsi]            v41 = x86_cvtt2si.i32 v11               ; bin: f2 41 0f 2c f2
+
+    ; asm: cvttsd2si %xmm5, %rcx
+    [-,%rcx]            v42 = x86_cvtt2si.i64 v10               ; bin: f2 48 0f 2c cd
+    ; asm: cvttsd2si %xmm10, %rsi
+    [-,%rsi]            v43 = x86_cvtt2si.i64 v11               ; bin: f2 49 0f 2c f2
+
    ; Unary arithmetic.

    ; asm: sqrtsd %xmm5, %xmm10
--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -371,6 +371,14 @@ enc_flt(base.fpromote.f64.f32, r.furm, 0xf3, 0x0f, 0x5a)
 # cvtsd2ss
 enc_flt(base.fdemote.f32.f64, r.furm, 0xf2, 0x0f, 0x5a)

+# cvttss2si
+enc_flt(x86.cvtt2si.i32.f32, r.rfurm, 0xf3, 0x0f, 0x2c)
+I64.enc(x86.cvtt2si.i64.f32, *r.rfurm.rex(0xf3, 0x0f, 0x2c, w=1))
+
+# cvttsd2si
+enc_flt(x86.cvtt2si.i32.f64, r.rfurm, 0xf2, 0x0f, 0x2c)
+I64.enc(x86.cvtt2si.i64.f64, *r.rfurm.rex(0xf2, 0x0f, 0x2c, w=1))
+
 # Exact square roots.
 enc_flt(base.sqrt.f32, r.furm, 0xf3, 0x0f, 0x51)
 enc_flt(base.sqrt.f64, r.furm, 0xf2, 0x0f, 0x51)
--- a/lib/cretonne/meta/isa/intel/instructions.py
+++ b/lib/cretonne/meta/isa/intel/instructions.py
@@ -46,4 +46,27 @@ sdivmodx = Instruction(
        """,
        ins=(nlo, nhi, d), outs=(q, r), can_trap=True)

+
+Float = TypeVar(
+        'Float', 'A scalar or vector floating point number',
+        floats=True, simd=True)
+IntTo = TypeVar(
+        'IntTo', 'An integer type with the same number of lanes',
+        ints=(32, 64), simd=True)
+
+x = Operand('x', Float)
+a = Operand('a', IntTo)
+
+cvtt2si = Instruction(
+        'x86_cvtt2si', r"""
+        Convert with truncation floating point to signed integer.
+
+        The source floating point operand is converted to a signed integer by
+        rounding towards zero. If the result can't be represented in the output
+        type, returns the smallest signed value the output type can represent.
+
+        This instruction does not trap.
+        """,
+        ins=x, outs=a)
+
 GROUP.close()
--- a/lib/cretonne/meta/isa/intel/recipes.py
+++ b/lib/cretonne/meta/isa/intel/recipes.py
@@ -290,6 +290,14 @@ frurm = TailRecipe(
        modrm_rr(in_reg0, out_reg0, sink);
        ''')

+# XX /r, RM form, FPR -> GPR.
+rfurm = TailRecipe(
+        'rfurm', Unary, size=1, ins=FPR, outs=GPR,
+        emit='''
+        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
+        modrm_rr(in_reg0, out_reg0, sink);
+        ''')
+
 # XX /r, RMI form for one of the roundXX SSE 4.1 instructions.
 furmi_rnd = TailRecipe(
        'furmi_rnd', Unary, size=2, ins=FPR, outs=FPR,