Add an Intel-specific x86_cvtt2si instruction.
This is used to represent the non-trapping semantics of the cvttss2si and cvttsd2si instructions (and their vectorized counterparts). The overflow behavior of this instruction is specific to the Intel ISAs. There is no float-to-i64 instruction on the 32-bit Intel ISA.
This commit is contained in:
@@ -87,6 +87,13 @@ ebb0:
|
|||||||
; asm: xorps %xmm5, %xmm2
|
; asm: xorps %xmm5, %xmm2
|
||||||
[-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5
|
[-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5
|
||||||
|
|
||||||
|
; Convert float to int. (No i64 dest on i386).
|
||||||
|
|
||||||
|
; asm: cvttss2si %xmm5, %ecx
|
||||||
|
[-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd
|
||||||
|
; asm: cvttss2si %xmm2, %esi
|
||||||
|
[-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 0f 2c f2
|
||||||
|
|
||||||
; Unary arithmetic.
|
; Unary arithmetic.
|
||||||
|
|
||||||
; asm: sqrtss %xmm5, %xmm2
|
; asm: sqrtss %xmm5, %xmm2
|
||||||
@@ -267,6 +274,13 @@ ebb0:
|
|||||||
; asm: xorps %xmm5, %xmm2
|
; asm: xorps %xmm5, %xmm2
|
||||||
[-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5
|
[-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5
|
||||||
|
|
||||||
|
; Convert float to int. (No i64 dest on i386).
|
||||||
|
|
||||||
|
; asm: cvttsd2si %xmm5, %ecx
|
||||||
|
[-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd
|
||||||
|
; asm: cvttsd2si %xmm2, %esi
|
||||||
|
[-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 0f 2c f2
|
||||||
|
|
||||||
; Unary arithmetic.
|
; Unary arithmetic.
|
||||||
|
|
||||||
; asm: sqrtsd %xmm5, %xmm2
|
; asm: sqrtsd %xmm5, %xmm2
|
||||||
|
|||||||
@@ -96,6 +96,18 @@ ebb0:
|
|||||||
; asm: movaps %xmm5, %xmm10
|
; asm: movaps %xmm5, %xmm10
|
||||||
[-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5
|
[-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5
|
||||||
|
|
||||||
|
; Convert float to int.
|
||||||
|
|
||||||
|
; asm: cvttss2si %xmm5, %ecx
|
||||||
|
[-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd
|
||||||
|
; asm: cvttss2si %xmm10, %esi
|
||||||
|
[-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 41 0f 2c f2
|
||||||
|
|
||||||
|
; asm: cvttss2si %xmm5, %rcx
|
||||||
|
[-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f3 48 0f 2c cd
|
||||||
|
; asm: cvttss2si %xmm10, %rsi
|
||||||
|
[-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f3 49 0f 2c f2
|
||||||
|
|
||||||
; Unary arithmetic.
|
; Unary arithmetic.
|
||||||
|
|
||||||
; asm: sqrtss %xmm5, %xmm10
|
; asm: sqrtss %xmm5, %xmm10
|
||||||
@@ -291,6 +303,18 @@ ebb0:
|
|||||||
; asm: movaps %xmm5, %xmm10
|
; asm: movaps %xmm5, %xmm10
|
||||||
[-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5
|
[-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5
|
||||||
|
|
||||||
|
; Convert float to int.
|
||||||
|
|
||||||
|
; asm: cvttsd2si %xmm5, %ecx
|
||||||
|
[-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd
|
||||||
|
; asm: cvttsd2si %xmm10, %esi
|
||||||
|
[-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 41 0f 2c f2
|
||||||
|
|
||||||
|
; asm: cvttsd2si %xmm5, %rcx
|
||||||
|
[-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f2 48 0f 2c cd
|
||||||
|
; asm: cvttsd2si %xmm10, %rsi
|
||||||
|
[-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f2 49 0f 2c f2
|
||||||
|
|
||||||
; Unary arithmetic.
|
; Unary arithmetic.
|
||||||
|
|
||||||
; asm: sqrtsd %xmm5, %xmm10
|
; asm: sqrtsd %xmm5, %xmm10
|
||||||
|
|||||||
@@ -371,6 +371,14 @@ enc_flt(base.fpromote.f64.f32, r.furm, 0xf3, 0x0f, 0x5a)
|
|||||||
# cvtsd2ss
|
# cvtsd2ss
|
||||||
enc_flt(base.fdemote.f32.f64, r.furm, 0xf2, 0x0f, 0x5a)
|
enc_flt(base.fdemote.f32.f64, r.furm, 0xf2, 0x0f, 0x5a)
|
||||||
|
|
||||||
|
# cvttss2si
|
||||||
|
enc_flt(x86.cvtt2si.i32.f32, r.rfurm, 0xf3, 0x0f, 0x2c)
|
||||||
|
I64.enc(x86.cvtt2si.i64.f32, *r.rfurm.rex(0xf3, 0x0f, 0x2c, w=1))
|
||||||
|
|
||||||
|
# cvttsd2si
|
||||||
|
enc_flt(x86.cvtt2si.i32.f64, r.rfurm, 0xf2, 0x0f, 0x2c)
|
||||||
|
I64.enc(x86.cvtt2si.i64.f64, *r.rfurm.rex(0xf2, 0x0f, 0x2c, w=1))
|
||||||
|
|
||||||
# Exact square roots.
|
# Exact square roots.
|
||||||
enc_flt(base.sqrt.f32, r.furm, 0xf3, 0x0f, 0x51)
|
enc_flt(base.sqrt.f32, r.furm, 0xf3, 0x0f, 0x51)
|
||||||
enc_flt(base.sqrt.f64, r.furm, 0xf2, 0x0f, 0x51)
|
enc_flt(base.sqrt.f64, r.furm, 0xf2, 0x0f, 0x51)
|
||||||
|
|||||||
@@ -46,4 +46,27 @@ sdivmodx = Instruction(
|
|||||||
""",
|
""",
|
||||||
ins=(nlo, nhi, d), outs=(q, r), can_trap=True)
|
ins=(nlo, nhi, d), outs=(q, r), can_trap=True)
|
||||||
|
|
||||||
|
|
||||||
|
Float = TypeVar(
|
||||||
|
'Float', 'A scalar or vector floating point number',
|
||||||
|
floats=True, simd=True)
|
||||||
|
IntTo = TypeVar(
|
||||||
|
'IntTo', 'An integer type with the same number of lanes',
|
||||||
|
ints=(32, 64), simd=True)
|
||||||
|
|
||||||
|
x = Operand('x', Float)
|
||||||
|
a = Operand('a', IntTo)
|
||||||
|
|
||||||
|
cvtt2si = Instruction(
|
||||||
|
'x86_cvtt2si', r"""
|
||||||
|
Convert with truncation floating point to signed integer.
|
||||||
|
|
||||||
|
The source floating point operand is converted to a signed integer by
|
||||||
|
rounding towards zero. If the result can't be represented in the output
|
||||||
|
type, returns the smallest signed value the output type can represent.
|
||||||
|
|
||||||
|
This instruction does not trap.
|
||||||
|
""",
|
||||||
|
ins=x, outs=a)
|
||||||
|
|
||||||
GROUP.close()
|
GROUP.close()
|
||||||
|
|||||||
@@ -290,6 +290,14 @@ frurm = TailRecipe(
|
|||||||
modrm_rr(in_reg0, out_reg0, sink);
|
modrm_rr(in_reg0, out_reg0, sink);
|
||||||
''')
|
''')
|
||||||
|
|
||||||
|
# XX /r, RM form, FPR -> GPR.
|
||||||
|
rfurm = TailRecipe(
|
||||||
|
'rfurm', Unary, size=1, ins=FPR, outs=GPR,
|
||||||
|
emit='''
|
||||||
|
PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
|
||||||
|
modrm_rr(in_reg0, out_reg0, sink);
|
||||||
|
''')
|
||||||
|
|
||||||
# XX /r, RMI form for one of the roundXX SSE 4.1 instructions.
|
# XX /r, RMI form for one of the roundXX SSE 4.1 instructions.
|
||||||
furmi_rnd = TailRecipe(
|
furmi_rnd = TailRecipe(
|
||||||
'furmi_rnd', Unary, size=2, ins=FPR, outs=FPR,
|
'furmi_rnd', Unary, size=2, ins=FPR, outs=FPR,
|
||||||
|
|||||||
Reference in New Issue
Block a user