From ac69f3bfdff12a686b860cffda965f98a3ede1df Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 26 Sep 2017 15:35:32 -0700 Subject: [PATCH] Add an Intel-specific x86_cvtt2si instruction. This is used to represent the non-trapping semantics of the cvttss2si and cvttsd2si instructions (and their vectorized counterparts). The overflow behavior of this instruction is specific to the Intel ISAs. There is no float-to-i64 instruction on the 32-bit Intel ISA. --- .../filetests/isa/intel/binary32-float.cton | 14 +++++++++++ .../filetests/isa/intel/binary64-float.cton | 24 +++++++++++++++++++ lib/cretonne/meta/isa/intel/encodings.py | 8 +++++++ lib/cretonne/meta/isa/intel/instructions.py | 23 ++++++++++++++++++ lib/cretonne/meta/isa/intel/recipes.py | 8 +++++++ 5 files changed, 77 insertions(+) diff --git a/cranelift/filetests/isa/intel/binary32-float.cton b/cranelift/filetests/isa/intel/binary32-float.cton index 0d823e2dae..9358fc338e 100644 --- a/cranelift/filetests/isa/intel/binary32-float.cton +++ b/cranelift/filetests/isa/intel/binary32-float.cton @@ -87,6 +87,13 @@ ebb0: ; asm: xorps %xmm5, %xmm2 [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 + ; Convert float to int. (No i64 dest on i386). + + ; asm: cvttss2si %xmm5, %ecx + [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd + ; asm: cvttss2si %xmm2, %esi + [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 0f 2c f2 + ; Unary arithmetic. ; asm: sqrtss %xmm5, %xmm2 @@ -267,6 +274,13 @@ ebb0: ; asm: xorps %xmm5, %xmm2 [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 + ; Convert float to int. (No i64 dest on i386). + + ; asm: cvttsd2si %xmm5, %ecx + [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd + ; asm: cvttsd2si %xmm2, %esi + [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 0f 2c f2 + ; Unary arithmetic. ; asm: sqrtsd %xmm5, %xmm2 diff --git a/cranelift/filetests/isa/intel/binary64-float.cton b/cranelift/filetests/isa/intel/binary64-float.cton index 12a26c937e..5763a5ab67 100644 --- a/cranelift/filetests/isa/intel/binary64-float.cton +++ b/cranelift/filetests/isa/intel/binary64-float.cton @@ -96,6 +96,18 @@ ebb0: ; asm: movaps %xmm5, %xmm10 [-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5 + ; Convert float to int. + + ; asm: cvttss2si %xmm5, %ecx + [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd + ; asm: cvttss2si %xmm10, %esi + [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 41 0f 2c f2 + + ; asm: cvttss2si %xmm5, %rcx + [-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f3 48 0f 2c cd + ; asm: cvttss2si %xmm10, %rsi + [-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f3 49 0f 2c f2 + ; Unary arithmetic. ; asm: sqrtss %xmm5, %xmm10 @@ -291,6 +303,18 @@ ebb0: ; asm: movaps %xmm5, %xmm10 [-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5 + ; Convert float to int. + + ; asm: cvttsd2si %xmm5, %ecx + [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd + ; asm: cvttsd2si %xmm10, %esi + [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 41 0f 2c f2 + + ; asm: cvttsd2si %xmm5, %rcx + [-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f2 48 0f 2c cd + ; asm: cvttsd2si %xmm10, %rsi + [-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f2 49 0f 2c f2 + ; Unary arithmetic. ; asm: sqrtsd %xmm5, %xmm10 diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index 5f295eddf2..9eb832572d 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -371,6 +371,14 @@ enc_flt(base.fpromote.f64.f32, r.furm, 0xf3, 0x0f, 0x5a) # cvtsd2ss enc_flt(base.fdemote.f32.f64, r.furm, 0xf2, 0x0f, 0x5a) +# cvttss2si +enc_flt(x86.cvtt2si.i32.f32, r.rfurm, 0xf3, 0x0f, 0x2c) +I64.enc(x86.cvtt2si.i64.f32, *r.rfurm.rex(0xf3, 0x0f, 0x2c, w=1)) + +# cvttsd2si +enc_flt(x86.cvtt2si.i32.f64, r.rfurm, 0xf2, 0x0f, 0x2c) +I64.enc(x86.cvtt2si.i64.f64, *r.rfurm.rex(0xf2, 0x0f, 0x2c, w=1)) + # Exact square roots. enc_flt(base.sqrt.f32, r.furm, 0xf3, 0x0f, 0x51) enc_flt(base.sqrt.f64, r.furm, 0xf2, 0x0f, 0x51) diff --git a/lib/cretonne/meta/isa/intel/instructions.py b/lib/cretonne/meta/isa/intel/instructions.py index 7921b2f431..c9739ccecb 100644 --- a/lib/cretonne/meta/isa/intel/instructions.py +++ b/lib/cretonne/meta/isa/intel/instructions.py @@ -46,4 +46,27 @@ sdivmodx = Instruction( """, ins=(nlo, nhi, d), outs=(q, r), can_trap=True) + +Float = TypeVar( + 'Float', 'A scalar or vector floating point number', + floats=True, simd=True) +IntTo = TypeVar( + 'IntTo', 'An integer type with the same number of lanes', + ints=(32, 64), simd=True) + +x = Operand('x', Float) +a = Operand('a', IntTo) + +cvtt2si = Instruction( + 'x86_cvtt2si', r""" + Convert with truncation floating point to signed integer. + + The source floating point operand is converted to a signed integer by + rounding towards zero. If the result can't be represented in the output + type, returns the smallest signed value the output type can represent. + + This instruction does not trap. + """, + ins=x, outs=a) + GROUP.close() diff --git a/lib/cretonne/meta/isa/intel/recipes.py b/lib/cretonne/meta/isa/intel/recipes.py index 74391b0746..a5ead7e6fd 100644 --- a/lib/cretonne/meta/isa/intel/recipes.py +++ b/lib/cretonne/meta/isa/intel/recipes.py @@ -290,6 +290,14 @@ frurm = TailRecipe( modrm_rr(in_reg0, out_reg0, sink); ''') +# XX /r, RM form, FPR -> GPR. +rfurm = TailRecipe( + 'rfurm', Unary, size=1, ins=FPR, outs=GPR, + emit=''' + PUT_OP(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + ''') + # XX /r, RMI form for one of the roundXX SSE 4.1 instructions. furmi_rnd = TailRecipe( 'furmi_rnd', Unary, size=2, ins=FPR, outs=FPR,