From ac69f3bfdff12a686b860cffda965f98a3ede1df Mon Sep 17 00:00:00 2001
From: Jakob Stoklund Olesen <jolesen@mozilla.com>
Date: Tue, 26 Sep 2017 15:35:32 -0700
Subject: [PATCH] Add an Intel-specific x86_cvtt2si instruction.

This is used to represent the non-trapping semantics of the cvttss2si and
cvttsd2si instructions (and their vectorized counterparts).

The overflow behavior of this instruction is specific to the Intel ISAs.

There is no float-to-i64 instruction on the 32-bit Intel ISA.
---
 .../filetests/isa/intel/binary32-float.cton   | 14 +++++++++++
 .../filetests/isa/intel/binary64-float.cton   | 24 +++++++++++++++++++
 lib/cretonne/meta/isa/intel/encodings.py      |  8 +++++++
 lib/cretonne/meta/isa/intel/instructions.py   | 23 ++++++++++++++++++
 lib/cretonne/meta/isa/intel/recipes.py        |  8 +++++++
 5 files changed, 77 insertions(+)

diff --git a/cranelift/filetests/isa/intel/binary32-float.cton b/cranelift/filetests/isa/intel/binary32-float.cton
index 0d823e2dae..9358fc338e 100644
--- a/cranelift/filetests/isa/intel/binary32-float.cton
+++ b/cranelift/filetests/isa/intel/binary32-float.cton
@@ -87,6 +87,13 @@ ebb0:
     ; asm: xorps %xmm5, %xmm2
     [-,%xmm2]           v37 = bxor v11, v10                     ; bin: 0f 57 d5
 
+    ; Convert float to int. (No i64 dest on i386).
+
+    ; asm: cvttss2si %xmm5, %ecx
+    [-,%rcx]            v40 = x86_cvtt2si.i32 v10               ; bin: f3 0f 2c cd
+    ; asm: cvttss2si %xmm2, %esi
+    [-,%rsi]            v41 = x86_cvtt2si.i32 v11               ; bin: f3 0f 2c f2
+
     ; Unary arithmetic.
 
     ; asm: sqrtss %xmm5, %xmm2
@@ -267,6 +274,13 @@ ebb0:
     ; asm: xorps %xmm5, %xmm2
     [-,%xmm2]           v37 = bxor v11, v10                     ; bin: 0f 57 d5
 
+    ; Convert float to int. (No i64 dest on i386).
+
+    ; asm: cvttsd2si %xmm5, %ecx
+    [-,%rcx]            v40 = x86_cvtt2si.i32 v10               ; bin: f2 0f 2c cd
+    ; asm: cvttsd2si %xmm2, %esi
+    [-,%rsi]            v41 = x86_cvtt2si.i32 v11               ; bin: f2 0f 2c f2
+
     ; Unary arithmetic.
 
     ; asm: sqrtsd %xmm5, %xmm2
diff --git a/cranelift/filetests/isa/intel/binary64-float.cton b/cranelift/filetests/isa/intel/binary64-float.cton
index 12a26c937e..5763a5ab67 100644
--- a/cranelift/filetests/isa/intel/binary64-float.cton
+++ b/cranelift/filetests/isa/intel/binary64-float.cton
@@ -96,6 +96,18 @@ ebb0:
     ; asm: movaps %xmm5, %xmm10
     [-,%xmm10]          v39 = copy v10                          ; bin: 44 0f 28 d5
 
+    ; Convert float to int.
+
+    ; asm: cvttss2si %xmm5, %ecx
+    [-,%rcx]            v40 = x86_cvtt2si.i32 v10               ; bin: f3 0f 2c cd
+    ; asm: cvttss2si %xmm10, %esi
+    [-,%rsi]            v41 = x86_cvtt2si.i32 v11               ; bin: f3 41 0f 2c f2
+
+    ; asm: cvttss2si %xmm5, %rcx
+    [-,%rcx]            v42 = x86_cvtt2si.i64 v10               ; bin: f3 48 0f 2c cd
+    ; asm: cvttss2si %xmm10, %rsi
+    [-,%rsi]            v43 = x86_cvtt2si.i64 v11               ; bin: f3 49 0f 2c f2
+
     ; Unary arithmetic.
 
     ; asm: sqrtss %xmm5, %xmm10
@@ -291,6 +303,18 @@ ebb0:
     ; asm: movaps %xmm5, %xmm10
     [-,%xmm10]          v39 = copy v10                          ; bin: 44 0f 28 d5
 
+    ; Convert float to int.
+
+    ; asm: cvttsd2si %xmm5, %ecx
+    [-,%rcx]            v40 = x86_cvtt2si.i32 v10               ; bin: f2 0f 2c cd
+    ; asm: cvttsd2si %xmm10, %esi
+    [-,%rsi]            v41 = x86_cvtt2si.i32 v11               ; bin: f2 41 0f 2c f2
+
+    ; asm: cvttsd2si %xmm5, %rcx
+    [-,%rcx]            v42 = x86_cvtt2si.i64 v10               ; bin: f2 48 0f 2c cd
+    ; asm: cvttsd2si %xmm10, %rsi
+    [-,%rsi]            v43 = x86_cvtt2si.i64 v11               ; bin: f2 49 0f 2c f2
+
     ; Unary arithmetic.
 
     ; asm: sqrtsd %xmm5, %xmm10
diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py
index 5f295eddf2..9eb832572d 100644
--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -371,6 +371,14 @@ enc_flt(base.fpromote.f64.f32, r.furm, 0xf3, 0x0f, 0x5a)
 # cvtsd2ss
 enc_flt(base.fdemote.f32.f64, r.furm, 0xf2, 0x0f, 0x5a)
 
+# cvttss2si
+enc_flt(x86.cvtt2si.i32.f32, r.rfurm, 0xf3, 0x0f, 0x2c)
+I64.enc(x86.cvtt2si.i64.f32, *r.rfurm.rex(0xf3, 0x0f, 0x2c, w=1))
+
+# cvttsd2si
+enc_flt(x86.cvtt2si.i32.f64, r.rfurm, 0xf2, 0x0f, 0x2c)
+I64.enc(x86.cvtt2si.i64.f64, *r.rfurm.rex(0xf2, 0x0f, 0x2c, w=1))
+
 # Exact square roots.
 enc_flt(base.sqrt.f32, r.furm, 0xf3, 0x0f, 0x51)
 enc_flt(base.sqrt.f64, r.furm, 0xf2, 0x0f, 0x51)
diff --git a/lib/cretonne/meta/isa/intel/instructions.py b/lib/cretonne/meta/isa/intel/instructions.py
index 7921b2f431..c9739ccecb 100644
--- a/lib/cretonne/meta/isa/intel/instructions.py
+++ b/lib/cretonne/meta/isa/intel/instructions.py
@@ -46,4 +46,27 @@ sdivmodx = Instruction(
         """,
         ins=(nlo, nhi, d), outs=(q, r), can_trap=True)
 
+
+Float = TypeVar(
+        'Float', 'A scalar or vector floating point number',
+        floats=True, simd=True)
+IntTo = TypeVar(
+        'IntTo', 'An integer type with the same number of lanes',
+        ints=(32, 64), simd=True)
+
+x = Operand('x', Float)
+a = Operand('a', IntTo)
+
+cvtt2si = Instruction(
+        'x86_cvtt2si', r"""
+        Convert with truncation floating point to signed integer.
+
+        The source floating point operand is converted to a signed integer by
+        rounding towards zero. If the result can't be represented in the output
+        type, returns the smallest signed value the output type can represent.
+
+        This instruction does not trap.
+        """,
+        ins=x, outs=a)
+
 GROUP.close()
diff --git a/lib/cretonne/meta/isa/intel/recipes.py b/lib/cretonne/meta/isa/intel/recipes.py
index 74391b0746..a5ead7e6fd 100644
--- a/lib/cretonne/meta/isa/intel/recipes.py
+++ b/lib/cretonne/meta/isa/intel/recipes.py
@@ -290,6 +290,14 @@ frurm = TailRecipe(
         modrm_rr(in_reg0, out_reg0, sink);
         ''')
 
+# XX /r, RM form, FPR -> GPR.
+rfurm = TailRecipe(
+        'rfurm', Unary, size=1, ins=FPR, outs=GPR,
+        emit='''
+        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
+        modrm_rr(in_reg0, out_reg0, sink);
+        ''')
+
 # XX /r, RMI form for one of the roundXX SSE 4.1 instructions.
 furmi_rnd = TailRecipe(
         'furmi_rnd', Unary, size=2, ins=FPR, outs=FPR,