Add an Intel-specific x86_cvtt2si instruction.

This is used to represent the non-trapping semantics of the cvttss2si and
cvttsd2si instructions (and their vectorized counterparts).

The overflow behavior of this instruction is specific to the Intel ISAs.

There is no float-to-i64 instruction on the 32-bit Intel ISA.
This commit is contained in:
Jakob Stoklund Olesen
2017-09-26 15:35:32 -07:00
parent d13f29cfe4
commit ac69f3bfdf
5 changed files with 77 additions and 0 deletions

View File

@@ -87,6 +87,13 @@ ebb0:
; asm: xorps %xmm5, %xmm2
[-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5
; Convert float to int. (No i64 dest on i386).
; asm: cvttss2si %xmm5, %ecx
[-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd
; asm: cvttss2si %xmm2, %esi
[-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 0f 2c f2
; Unary arithmetic.
; asm: sqrtss %xmm5, %xmm2
@@ -267,6 +274,13 @@ ebb0:
; asm: xorps %xmm5, %xmm2
[-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5
; Convert float to int. (No i64 dest on i386).
; asm: cvttsd2si %xmm5, %ecx
[-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd
; asm: cvttsd2si %xmm2, %esi
[-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 0f 2c f2
; Unary arithmetic.
; asm: sqrtsd %xmm5, %xmm2

View File

@@ -96,6 +96,18 @@ ebb0:
; asm: movaps %xmm5, %xmm10
[-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5
; Convert float to int.
; asm: cvttss2si %xmm5, %ecx
[-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd
; asm: cvttss2si %xmm10, %esi
[-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 41 0f 2c f2
; asm: cvttss2si %xmm5, %rcx
[-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f3 48 0f 2c cd
; asm: cvttss2si %xmm10, %rsi
[-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f3 49 0f 2c f2
; Unary arithmetic.
; asm: sqrtss %xmm5, %xmm10
@@ -291,6 +303,18 @@ ebb0:
; asm: movaps %xmm5, %xmm10
[-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5
; Convert float to int.
; asm: cvttsd2si %xmm5, %ecx
[-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd
; asm: cvttsd2si %xmm10, %esi
[-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 41 0f 2c f2
; asm: cvttsd2si %xmm5, %rcx
[-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f2 48 0f 2c cd
; asm: cvttsd2si %xmm10, %rsi
[-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f2 49 0f 2c f2
; Unary arithmetic.
; asm: sqrtsd %xmm5, %xmm10

View File

@@ -371,6 +371,14 @@ enc_flt(base.fpromote.f64.f32, r.furm, 0xf3, 0x0f, 0x5a)
# cvtsd2ss
enc_flt(base.fdemote.f32.f64, r.furm, 0xf2, 0x0f, 0x5a)
# cvttss2si
enc_flt(x86.cvtt2si.i32.f32, r.rfurm, 0xf3, 0x0f, 0x2c)
I64.enc(x86.cvtt2si.i64.f32, *r.rfurm.rex(0xf3, 0x0f, 0x2c, w=1))
# cvttsd2si
enc_flt(x86.cvtt2si.i32.f64, r.rfurm, 0xf2, 0x0f, 0x2c)
I64.enc(x86.cvtt2si.i64.f64, *r.rfurm.rex(0xf2, 0x0f, 0x2c, w=1))
# Exact square roots.
enc_flt(base.sqrt.f32, r.furm, 0xf3, 0x0f, 0x51)
enc_flt(base.sqrt.f64, r.furm, 0xf2, 0x0f, 0x51)

View File

@@ -46,4 +46,27 @@ sdivmodx = Instruction(
""",
ins=(nlo, nhi, d), outs=(q, r), can_trap=True)
Float = TypeVar(
'Float', 'A scalar or vector floating point number',
floats=True, simd=True)
IntTo = TypeVar(
'IntTo', 'An integer type with the same number of lanes',
ints=(32, 64), simd=True)
x = Operand('x', Float)
a = Operand('a', IntTo)
cvtt2si = Instruction(
'x86_cvtt2si', r"""
Convert with truncation floating point to signed integer.
The source floating point operand is converted to a signed integer by
rounding towards zero. If the result can't be represented in the output
type, returns the smallest signed value the output type can represent.
This instruction does not trap.
""",
ins=x, outs=a)
GROUP.close()

View File

@@ -290,6 +290,14 @@ frurm = TailRecipe(
modrm_rr(in_reg0, out_reg0, sink);
''')
# XX /r, RM form, FPR -> GPR.
rfurm = TailRecipe(
'rfurm', Unary, size=1, ins=FPR, outs=GPR,
emit='''
PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
modrm_rr(in_reg0, out_reg0, sink);
''')
# XX /r, RMI form for one of the roundXX SSE 4.1 instructions.
furmi_rnd = TailRecipe(
'furmi_rnd', Unary, size=2, ins=FPR, outs=FPR,