x64: Fill out more AVX instructions (#5849)
* x64: Fill out more AVX instructions This commit fills out more AVX instructions for SSE counterparts currently used. Many of these instructions do not benefit from the 3-operand form that AVX uses but instead benefit from being able to use `XmmMem` instead of `XmmMemAligned` which may be able to avoid some extra temporary registers in some cases. * Review comments
This commit is contained in:
@@ -283,6 +283,17 @@
|
|||||||
(mask Xmm)
|
(mask Xmm)
|
||||||
(dst WritableXmm))
|
(dst WritableXmm))
|
||||||
|
|
||||||
|
;; XMM unary op using a VEX encoding (aka AVX).
|
||||||
|
(XmmUnaryRmRVex (op AvxOpcode)
|
||||||
|
(src XmmMem)
|
||||||
|
(dst WritableXmm))
|
||||||
|
|
||||||
|
;; XMM unary op using a VEX encoding (aka AVX) with an immediate.
|
||||||
|
(XmmUnaryRmRImmVex (op AvxOpcode)
|
||||||
|
(src XmmMem)
|
||||||
|
(dst WritableXmm)
|
||||||
|
(imm u8))
|
||||||
|
|
||||||
;; XMM (scalar or vector) binary op that relies on the EVEX
|
;; XMM (scalar or vector) binary op that relies on the EVEX
|
||||||
;; prefix. Takes two inputs.
|
;; prefix. Takes two inputs.
|
||||||
(XmmRmREvex (op Avx512Opcode)
|
(XmmRmREvex (op Avx512Opcode)
|
||||||
@@ -1314,6 +1325,37 @@
|
|||||||
Vpsllq
|
Vpsllq
|
||||||
Vpsraw
|
Vpsraw
|
||||||
Vpsrad
|
Vpsrad
|
||||||
|
Vpmovsxbw
|
||||||
|
Vpmovzxbw
|
||||||
|
Vpmovsxwd
|
||||||
|
Vpmovzxwd
|
||||||
|
Vpmovsxdq
|
||||||
|
Vpmovzxdq
|
||||||
|
Vaddss
|
||||||
|
Vaddsd
|
||||||
|
Vmulss
|
||||||
|
Vmulsd
|
||||||
|
Vsubss
|
||||||
|
Vsubsd
|
||||||
|
Vdivss
|
||||||
|
Vdivsd
|
||||||
|
Vpabsb
|
||||||
|
Vpabsw
|
||||||
|
Vpabsd
|
||||||
|
Vminss
|
||||||
|
Vminsd
|
||||||
|
Vmaxss
|
||||||
|
Vmaxsd
|
||||||
|
Vsqrtps
|
||||||
|
Vsqrtpd
|
||||||
|
Vroundps
|
||||||
|
Vroundpd
|
||||||
|
Vcvtdq2pd
|
||||||
|
Vcvtdq2ps
|
||||||
|
Vcvtpd2ps
|
||||||
|
Vcvtps2pd
|
||||||
|
Vcvttpd2dq
|
||||||
|
Vcvttps2dq
|
||||||
))
|
))
|
||||||
|
|
||||||
(type Avx512Opcode extern
|
(type Avx512Opcode extern
|
||||||
@@ -1902,33 +1944,47 @@
|
|||||||
(rule (x64_movdqu from)
|
(rule (x64_movdqu from)
|
||||||
(xmm_unary_rm_r_unaligned (SseOpcode.Movdqu) from))
|
(xmm_unary_rm_r_unaligned (SseOpcode.Movdqu) from))
|
||||||
|
|
||||||
(decl x64_movapd (XmmMem) Xmm)
|
|
||||||
(rule (x64_movapd src)
|
|
||||||
(xmm_unary_rm_r (SseOpcode.Movapd) src))
|
|
||||||
|
|
||||||
(decl x64_pmovsxbw (XmmMem) Xmm)
|
(decl x64_pmovsxbw (XmmMem) Xmm)
|
||||||
(rule (x64_pmovsxbw from)
|
(rule (x64_pmovsxbw from)
|
||||||
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxbw) from))
|
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxbw) from))
|
||||||
|
(rule 1 (x64_pmovsxbw from)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vpmovsxbw) from))
|
||||||
|
|
||||||
(decl x64_pmovzxbw (XmmMem) Xmm)
|
(decl x64_pmovzxbw (XmmMem) Xmm)
|
||||||
(rule (x64_pmovzxbw from)
|
(rule (x64_pmovzxbw from)
|
||||||
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxbw) from))
|
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxbw) from))
|
||||||
|
(rule 1 (x64_pmovzxbw from)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vpmovzxbw) from))
|
||||||
|
|
||||||
(decl x64_pmovsxwd (XmmMem) Xmm)
|
(decl x64_pmovsxwd (XmmMem) Xmm)
|
||||||
(rule (x64_pmovsxwd from)
|
(rule (x64_pmovsxwd from)
|
||||||
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxwd) from))
|
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxwd) from))
|
||||||
|
(rule 1 (x64_pmovsxwd from)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vpmovsxwd) from))
|
||||||
|
|
||||||
(decl x64_pmovzxwd (XmmMem) Xmm)
|
(decl x64_pmovzxwd (XmmMem) Xmm)
|
||||||
(rule (x64_pmovzxwd from)
|
(rule (x64_pmovzxwd from)
|
||||||
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxwd) from))
|
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxwd) from))
|
||||||
|
(rule 1 (x64_pmovzxwd from)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vpmovzxwd) from))
|
||||||
|
|
||||||
(decl x64_pmovsxdq (XmmMem) Xmm)
|
(decl x64_pmovsxdq (XmmMem) Xmm)
|
||||||
(rule (x64_pmovsxdq from)
|
(rule (x64_pmovsxdq from)
|
||||||
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxdq) from))
|
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxdq) from))
|
||||||
|
(rule 1 (x64_pmovsxdq from)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vpmovsxdq) from))
|
||||||
|
|
||||||
(decl x64_pmovzxdq (XmmMem) Xmm)
|
(decl x64_pmovzxdq (XmmMem) Xmm)
|
||||||
(rule (x64_pmovzxdq from)
|
(rule (x64_pmovzxdq from)
|
||||||
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxdq) from))
|
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxdq) from))
|
||||||
|
(rule 1 (x64_pmovzxdq from)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vpmovzxdq) from))
|
||||||
|
|
||||||
(decl x64_movrm (Type SyntheticAmode Gpr) SideEffectNoResult)
|
(decl x64_movrm (Type SyntheticAmode Gpr) SideEffectNoResult)
|
||||||
(rule (x64_movrm ty addr data)
|
(rule (x64_movrm ty addr data)
|
||||||
@@ -2702,11 +2758,17 @@
|
|||||||
(decl x64_addss (Xmm XmmMem) Xmm)
|
(decl x64_addss (Xmm XmmMem) Xmm)
|
||||||
(rule (x64_addss src1 src2)
|
(rule (x64_addss src1 src2)
|
||||||
(xmm_rm_r_unaligned (SseOpcode.Addss) src1 src2))
|
(xmm_rm_r_unaligned (SseOpcode.Addss) src1 src2))
|
||||||
|
(rule 1 (x64_addss src1 src2)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_rmir_vex (AvxOpcode.Vaddss) src1 src2))
|
||||||
|
|
||||||
;; Helper for creating `addsd` instructions.
|
;; Helper for creating `addsd` instructions.
|
||||||
(decl x64_addsd (Xmm XmmMem) Xmm)
|
(decl x64_addsd (Xmm XmmMem) Xmm)
|
||||||
(rule (x64_addsd src1 src2)
|
(rule (x64_addsd src1 src2)
|
||||||
(xmm_rm_r_unaligned (SseOpcode.Addsd) src1 src2))
|
(xmm_rm_r_unaligned (SseOpcode.Addsd) src1 src2))
|
||||||
|
(rule 1 (x64_addsd src1 src2)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_rmir_vex (AvxOpcode.Vaddsd) src1 src2))
|
||||||
|
|
||||||
;; Helper for creating `addps` instructions.
|
;; Helper for creating `addps` instructions.
|
||||||
(decl x64_addps (Xmm XmmMem) Xmm)
|
(decl x64_addps (Xmm XmmMem) Xmm)
|
||||||
@@ -2728,11 +2790,17 @@
|
|||||||
(decl x64_subss (Xmm XmmMem) Xmm)
|
(decl x64_subss (Xmm XmmMem) Xmm)
|
||||||
(rule (x64_subss src1 src2)
|
(rule (x64_subss src1 src2)
|
||||||
(xmm_rm_r_unaligned (SseOpcode.Subss) src1 src2))
|
(xmm_rm_r_unaligned (SseOpcode.Subss) src1 src2))
|
||||||
|
(rule 1 (x64_subss src1 src2)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_rmir_vex (AvxOpcode.Vsubss) src1 src2))
|
||||||
|
|
||||||
;; Helper for creating `subsd` instructions.
|
;; Helper for creating `subsd` instructions.
|
||||||
(decl x64_subsd (Xmm XmmMem) Xmm)
|
(decl x64_subsd (Xmm XmmMem) Xmm)
|
||||||
(rule (x64_subsd src1 src2)
|
(rule (x64_subsd src1 src2)
|
||||||
(xmm_rm_r_unaligned (SseOpcode.Subsd) src1 src2))
|
(xmm_rm_r_unaligned (SseOpcode.Subsd) src1 src2))
|
||||||
|
(rule 1 (x64_subsd src1 src2)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_rmir_vex (AvxOpcode.Vsubsd) src1 src2))
|
||||||
|
|
||||||
;; Helper for creating `subps` instructions.
|
;; Helper for creating `subps` instructions.
|
||||||
(decl x64_subps (Xmm XmmMem) Xmm)
|
(decl x64_subps (Xmm XmmMem) Xmm)
|
||||||
@@ -2754,11 +2822,17 @@
|
|||||||
(decl x64_mulss (Xmm XmmMem) Xmm)
|
(decl x64_mulss (Xmm XmmMem) Xmm)
|
||||||
(rule (x64_mulss src1 src2)
|
(rule (x64_mulss src1 src2)
|
||||||
(xmm_rm_r_unaligned (SseOpcode.Mulss) src1 src2))
|
(xmm_rm_r_unaligned (SseOpcode.Mulss) src1 src2))
|
||||||
|
(rule 1 (x64_mulss src1 src2)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_rmir_vex (AvxOpcode.Vmulss) src1 src2))
|
||||||
|
|
||||||
;; Helper for creating `mulsd` instructions.
|
;; Helper for creating `mulsd` instructions.
|
||||||
(decl x64_mulsd (Xmm XmmMem) Xmm)
|
(decl x64_mulsd (Xmm XmmMem) Xmm)
|
||||||
(rule (x64_mulsd src1 src2)
|
(rule (x64_mulsd src1 src2)
|
||||||
(xmm_rm_r_unaligned (SseOpcode.Mulsd) src1 src2))
|
(xmm_rm_r_unaligned (SseOpcode.Mulsd) src1 src2))
|
||||||
|
(rule 1 (x64_mulsd src1 src2)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_rmir_vex (AvxOpcode.Vmulsd) src1 src2))
|
||||||
|
|
||||||
;; Helper for creating `mulps` instructions.
|
;; Helper for creating `mulps` instructions.
|
||||||
(decl x64_mulps (Xmm XmmMem) Xmm)
|
(decl x64_mulps (Xmm XmmMem) Xmm)
|
||||||
@@ -2780,11 +2854,17 @@
|
|||||||
(decl x64_divss (Xmm XmmMem) Xmm)
|
(decl x64_divss (Xmm XmmMem) Xmm)
|
||||||
(rule (x64_divss src1 src2)
|
(rule (x64_divss src1 src2)
|
||||||
(xmm_rm_r_unaligned (SseOpcode.Divss) src1 src2))
|
(xmm_rm_r_unaligned (SseOpcode.Divss) src1 src2))
|
||||||
|
(rule 1 (x64_divss src1 src2)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_rmir_vex (AvxOpcode.Vdivss) src1 src2))
|
||||||
|
|
||||||
;; Helper for creating `divsd` instructions.
|
;; Helper for creating `divsd` instructions.
|
||||||
(decl x64_divsd (Xmm XmmMem) Xmm)
|
(decl x64_divsd (Xmm XmmMem) Xmm)
|
||||||
(rule (x64_divsd src1 src2)
|
(rule (x64_divsd src1 src2)
|
||||||
(xmm_rm_r_unaligned (SseOpcode.Divsd) src1 src2))
|
(xmm_rm_r_unaligned (SseOpcode.Divsd) src1 src2))
|
||||||
|
(rule 1 (x64_divsd src1 src2)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_rmir_vex (AvxOpcode.Vdivsd) src1 src2))
|
||||||
|
|
||||||
;; Helper for creating `divps` instructions.
|
;; Helper for creating `divps` instructions.
|
||||||
(decl x64_divps (Xmm XmmMem) Xmm)
|
(decl x64_divps (Xmm XmmMem) Xmm)
|
||||||
@@ -2816,6 +2896,20 @@
|
|||||||
(_ Unit (emit (MInst.XmmRmRBlendVex op src1 src2 mask dst))))
|
(_ Unit (emit (MInst.XmmRmRBlendVex op src1 src2 mask dst))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
|
;; Helper for creating `XmmUnaryRmRVex` instructions
|
||||||
|
(decl xmm_unary_rm_r_vex (AvxOpcode XmmMem) Xmm)
|
||||||
|
(rule (xmm_unary_rm_r_vex op src)
|
||||||
|
(let ((dst WritableXmm (temp_writable_xmm))
|
||||||
|
(_ Unit (emit (MInst.XmmUnaryRmRVex op src dst))))
|
||||||
|
dst))
|
||||||
|
|
||||||
|
;; Helper for creating `XmmUnaryRmRImmVex` instructions
|
||||||
|
(decl xmm_unary_rm_r_imm_vex (AvxOpcode XmmMem u8) Xmm)
|
||||||
|
(rule (xmm_unary_rm_r_imm_vex op src imm)
|
||||||
|
(let ((dst WritableXmm (temp_writable_xmm))
|
||||||
|
(_ Unit (emit (MInst.XmmUnaryRmRImmVex op src dst imm))))
|
||||||
|
dst))
|
||||||
|
|
||||||
;; Helper for creating `blendvp{d,s}` and `pblendvb` instructions.
|
;; Helper for creating `blendvp{d,s}` and `pblendvb` instructions.
|
||||||
(decl x64_blend (Type Xmm XmmMem Xmm) Xmm)
|
(decl x64_blend (Type Xmm XmmMem Xmm) Xmm)
|
||||||
(rule 1 (x64_blend $F32X4 mask src1 src2) (x64_blendvps src2 src1 mask))
|
(rule 1 (x64_blend $F32X4 mask src1 src2) (x64_blendvps src2 src1 mask))
|
||||||
@@ -3131,11 +3225,17 @@
|
|||||||
(decl x64_roundps (XmmMem RoundImm) Xmm)
|
(decl x64_roundps (XmmMem RoundImm) Xmm)
|
||||||
(rule (x64_roundps src1 round)
|
(rule (x64_roundps src1 round)
|
||||||
(xmm_unary_rm_r_imm (SseOpcode.Roundps) src1 (encode_round_imm round)))
|
(xmm_unary_rm_r_imm (SseOpcode.Roundps) src1 (encode_round_imm round)))
|
||||||
|
(rule 1 (x64_roundps src1 round)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_imm_vex (AvxOpcode.Vroundps) src1 (encode_round_imm round)))
|
||||||
|
|
||||||
;; Helper for creating `roundpd` instructions.
|
;; Helper for creating `roundpd` instructions.
|
||||||
(decl x64_roundpd (XmmMem RoundImm) Xmm)
|
(decl x64_roundpd (XmmMem RoundImm) Xmm)
|
||||||
(rule (x64_roundpd src1 round)
|
(rule (x64_roundpd src1 round)
|
||||||
(xmm_unary_rm_r_imm (SseOpcode.Roundpd) src1 (encode_round_imm round)))
|
(xmm_unary_rm_r_imm (SseOpcode.Roundpd) src1 (encode_round_imm round)))
|
||||||
|
(rule 1 (x64_roundpd src1 round)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_imm_vex (AvxOpcode.Vroundpd) src1 (encode_round_imm round)))
|
||||||
|
|
||||||
;; Helper for creating `pmaddwd` instructions.
|
;; Helper for creating `pmaddwd` instructions.
|
||||||
(decl x64_pmaddwd (Xmm XmmMem) Xmm)
|
(decl x64_pmaddwd (Xmm XmmMem) Xmm)
|
||||||
@@ -3207,16 +3307,25 @@
|
|||||||
(decl x64_pabsb (XmmMem) Xmm)
|
(decl x64_pabsb (XmmMem) Xmm)
|
||||||
(rule (x64_pabsb src)
|
(rule (x64_pabsb src)
|
||||||
(xmm_unary_rm_r (SseOpcode.Pabsb) src))
|
(xmm_unary_rm_r (SseOpcode.Pabsb) src))
|
||||||
|
(rule 1 (x64_pabsb src)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vpabsb) src))
|
||||||
|
|
||||||
;; Helper for creating `pabsw` instructions.
|
;; Helper for creating `pabsw` instructions.
|
||||||
(decl x64_pabsw (XmmMem) Xmm)
|
(decl x64_pabsw (XmmMem) Xmm)
|
||||||
(rule (x64_pabsw src)
|
(rule (x64_pabsw src)
|
||||||
(xmm_unary_rm_r (SseOpcode.Pabsw) src))
|
(xmm_unary_rm_r (SseOpcode.Pabsw) src))
|
||||||
|
(rule 1 (x64_pabsw src)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vpabsw) src))
|
||||||
|
|
||||||
;; Helper for creating `pabsd` instructions.
|
;; Helper for creating `pabsd` instructions.
|
||||||
(decl x64_pabsd (XmmMem) Xmm)
|
(decl x64_pabsd (XmmMem) Xmm)
|
||||||
(rule (x64_pabsd src)
|
(rule (x64_pabsd src)
|
||||||
(xmm_unary_rm_r (SseOpcode.Pabsd) src))
|
(xmm_unary_rm_r (SseOpcode.Pabsd) src))
|
||||||
|
(rule 1 (x64_pabsd src)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vpabsd) src))
|
||||||
|
|
||||||
;; Helper for creating `MInst.XmmUnaryRmREvex` instructions.
|
;; Helper for creating `MInst.XmmUnaryRmREvex` instructions.
|
||||||
(decl xmm_unary_rm_r_evex (Avx512Opcode XmmMem) Xmm)
|
(decl xmm_unary_rm_r_evex (Avx512Opcode XmmMem) Xmm)
|
||||||
@@ -3540,11 +3649,17 @@
|
|||||||
(decl x64_minss (Xmm XmmMem) Xmm)
|
(decl x64_minss (Xmm XmmMem) Xmm)
|
||||||
(rule (x64_minss x y)
|
(rule (x64_minss x y)
|
||||||
(xmm_rm_r_unaligned (SseOpcode.Minss) x y))
|
(xmm_rm_r_unaligned (SseOpcode.Minss) x y))
|
||||||
|
(rule 1 (x64_minss x y)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_rmir_vex (AvxOpcode.Vminss) x y))
|
||||||
|
|
||||||
;; Helper for creating `minsd` instructions.
|
;; Helper for creating `minsd` instructions.
|
||||||
(decl x64_minsd (Xmm XmmMem) Xmm)
|
(decl x64_minsd (Xmm XmmMem) Xmm)
|
||||||
(rule (x64_minsd x y)
|
(rule (x64_minsd x y)
|
||||||
(xmm_rm_r_unaligned (SseOpcode.Minsd) x y))
|
(xmm_rm_r_unaligned (SseOpcode.Minsd) x y))
|
||||||
|
(rule 1 (x64_minsd x y)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_rmir_vex (AvxOpcode.Vminsd) x y))
|
||||||
|
|
||||||
;; Helper for creating `minps` instructions.
|
;; Helper for creating `minps` instructions.
|
||||||
(decl x64_minps (Xmm XmmMem) Xmm)
|
(decl x64_minps (Xmm XmmMem) Xmm)
|
||||||
@@ -3566,11 +3681,17 @@
|
|||||||
(decl x64_maxss (Xmm XmmMem) Xmm)
|
(decl x64_maxss (Xmm XmmMem) Xmm)
|
||||||
(rule (x64_maxss x y)
|
(rule (x64_maxss x y)
|
||||||
(xmm_rm_r_unaligned (SseOpcode.Maxss) x y))
|
(xmm_rm_r_unaligned (SseOpcode.Maxss) x y))
|
||||||
|
(rule 1 (x64_maxss x y)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_rmir_vex (AvxOpcode.Vmaxss) x y))
|
||||||
|
|
||||||
;; Helper for creating `maxsd` instructions.
|
;; Helper for creating `maxsd` instructions.
|
||||||
(decl x64_maxsd (Xmm XmmMem) Xmm)
|
(decl x64_maxsd (Xmm XmmMem) Xmm)
|
||||||
(rule (x64_maxsd x y)
|
(rule (x64_maxsd x y)
|
||||||
(xmm_rm_r_unaligned (SseOpcode.Maxsd) x y))
|
(xmm_rm_r_unaligned (SseOpcode.Maxsd) x y))
|
||||||
|
(rule 1 (x64_maxsd x y)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_rmir_vex (AvxOpcode.Vmaxsd) x y))
|
||||||
|
|
||||||
;; Helper for creating `maxps` instructions.
|
;; Helper for creating `maxps` instructions.
|
||||||
(decl x64_maxps (Xmm XmmMem) Xmm)
|
(decl x64_maxps (Xmm XmmMem) Xmm)
|
||||||
@@ -3649,10 +3770,16 @@
|
|||||||
;; Helper for creating `sqrtps` instructions.
|
;; Helper for creating `sqrtps` instructions.
|
||||||
(decl x64_sqrtps (XmmMem) Xmm)
|
(decl x64_sqrtps (XmmMem) Xmm)
|
||||||
(rule (x64_sqrtps x) (xmm_unary_rm_r (SseOpcode.Sqrtps) x))
|
(rule (x64_sqrtps x) (xmm_unary_rm_r (SseOpcode.Sqrtps) x))
|
||||||
|
(rule 1 (x64_sqrtps x)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vsqrtps) x))
|
||||||
|
|
||||||
;; Helper for creating `sqrtpd` instructions.
|
;; Helper for creating `sqrtpd` instructions.
|
||||||
(decl x64_sqrtpd (XmmMem) Xmm)
|
(decl x64_sqrtpd (XmmMem) Xmm)
|
||||||
(rule (x64_sqrtpd x) (xmm_unary_rm_r (SseOpcode.Sqrtpd) x))
|
(rule (x64_sqrtpd x) (xmm_unary_rm_r (SseOpcode.Sqrtpd) x))
|
||||||
|
(rule 1 (x64_sqrtpd x)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vsqrtpd) x))
|
||||||
|
|
||||||
;; Helper for creating `cvtss2sd` instructions.
|
;; Helper for creating `cvtss2sd` instructions.
|
||||||
(decl x64_cvtss2sd (Xmm) Xmm)
|
(decl x64_cvtss2sd (Xmm) Xmm)
|
||||||
@@ -3665,18 +3792,30 @@
|
|||||||
;; Helper for creating `cvtdq2ps` instructions.
|
;; Helper for creating `cvtdq2ps` instructions.
|
||||||
(decl x64_cvtdq2ps (XmmMem) Xmm)
|
(decl x64_cvtdq2ps (XmmMem) Xmm)
|
||||||
(rule (x64_cvtdq2ps x) (xmm_unary_rm_r (SseOpcode.Cvtdq2ps) x))
|
(rule (x64_cvtdq2ps x) (xmm_unary_rm_r (SseOpcode.Cvtdq2ps) x))
|
||||||
|
(rule 1 (x64_cvtdq2ps x)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtdq2ps) x))
|
||||||
|
|
||||||
;; Helper for creating `cvtps2pd` instructions.
|
;; Helper for creating `cvtps2pd` instructions.
|
||||||
(decl x64_cvtps2pd (XmmMem) Xmm)
|
(decl x64_cvtps2pd (XmmMem) Xmm)
|
||||||
(rule (x64_cvtps2pd x) (xmm_unary_rm_r (SseOpcode.Cvtps2pd) x))
|
(rule (x64_cvtps2pd x) (xmm_unary_rm_r (SseOpcode.Cvtps2pd) x))
|
||||||
|
(rule 1 (x64_cvtps2pd x)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtps2pd) x))
|
||||||
|
|
||||||
;; Helper for creating `cvtpd2ps` instructions.
|
;; Helper for creating `cvtpd2ps` instructions.
|
||||||
(decl x64_cvtpd2ps (XmmMem) Xmm)
|
(decl x64_cvtpd2ps (XmmMem) Xmm)
|
||||||
(rule (x64_cvtpd2ps x) (xmm_unary_rm_r (SseOpcode.Cvtpd2ps) x))
|
(rule (x64_cvtpd2ps x) (xmm_unary_rm_r (SseOpcode.Cvtpd2ps) x))
|
||||||
|
(rule 1 (x64_cvtpd2ps x)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtpd2ps) x))
|
||||||
|
|
||||||
;; Helper for creating `cvtdq2pd` instructions.
|
;; Helper for creating `cvtdq2pd` instructions.
|
||||||
(decl x64_cvtdq2pd (XmmMem) Xmm)
|
(decl x64_cvtdq2pd (XmmMem) Xmm)
|
||||||
(rule (x64_cvtdq2pd x) (xmm_unary_rm_r (SseOpcode.Cvtdq2pd) x))
|
(rule (x64_cvtdq2pd x) (xmm_unary_rm_r (SseOpcode.Cvtdq2pd) x))
|
||||||
|
(rule 1 (x64_cvtdq2pd x)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtdq2pd) x))
|
||||||
|
|
||||||
;; Helper for creating `cvtsi2ss` instructions.
|
;; Helper for creating `cvtsi2ss` instructions.
|
||||||
(decl x64_cvtsi2ss (Type GprMem) Xmm)
|
(decl x64_cvtsi2ss (Type GprMem) Xmm)
|
||||||
@@ -3692,11 +3831,17 @@
|
|||||||
(decl x64_cvttps2dq (XmmMem) Xmm)
|
(decl x64_cvttps2dq (XmmMem) Xmm)
|
||||||
(rule (x64_cvttps2dq x)
|
(rule (x64_cvttps2dq x)
|
||||||
(xmm_unary_rm_r (SseOpcode.Cvttps2dq) x))
|
(xmm_unary_rm_r (SseOpcode.Cvttps2dq) x))
|
||||||
|
(rule 1 (x64_cvttps2dq x)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vcvttps2dq) x))
|
||||||
|
|
||||||
;; Helper for creating `cvttpd2dq` instructions.
|
;; Helper for creating `cvttpd2dq` instructions.
|
||||||
(decl x64_cvttpd2dq (XmmMem) Xmm)
|
(decl x64_cvttpd2dq (XmmMem) Xmm)
|
||||||
(rule (x64_cvttpd2dq x)
|
(rule (x64_cvttpd2dq x)
|
||||||
(xmm_unary_rm_r (SseOpcode.Cvttpd2dq) x))
|
(xmm_unary_rm_r (SseOpcode.Cvttpd2dq) x))
|
||||||
|
(rule 1 (x64_cvttpd2dq x)
|
||||||
|
(if-let $true (has_avx))
|
||||||
|
(xmm_unary_rm_r_vex (AvxOpcode.Vcvttpd2dq) x))
|
||||||
|
|
||||||
(decl cvt_u64_to_float_seq (Type Gpr) Xmm)
|
(decl cvt_u64_to_float_seq (Type Gpr) Xmm)
|
||||||
(rule (cvt_u64_to_float_seq ty src)
|
(rule (cvt_u64_to_float_seq ty src)
|
||||||
|
|||||||
@@ -1630,7 +1630,38 @@ impl AvxOpcode {
|
|||||||
| AvxOpcode::Vpslld
|
| AvxOpcode::Vpslld
|
||||||
| AvxOpcode::Vpsllq
|
| AvxOpcode::Vpsllq
|
||||||
| AvxOpcode::Vpsraw
|
| AvxOpcode::Vpsraw
|
||||||
| AvxOpcode::Vpsrad => {
|
| AvxOpcode::Vpsrad
|
||||||
|
| AvxOpcode::Vpmovsxbw
|
||||||
|
| AvxOpcode::Vpmovzxbw
|
||||||
|
| AvxOpcode::Vpmovsxwd
|
||||||
|
| AvxOpcode::Vpmovzxwd
|
||||||
|
| AvxOpcode::Vpmovsxdq
|
||||||
|
| AvxOpcode::Vpmovzxdq
|
||||||
|
| AvxOpcode::Vaddss
|
||||||
|
| AvxOpcode::Vaddsd
|
||||||
|
| AvxOpcode::Vmulss
|
||||||
|
| AvxOpcode::Vmulsd
|
||||||
|
| AvxOpcode::Vsubss
|
||||||
|
| AvxOpcode::Vsubsd
|
||||||
|
| AvxOpcode::Vdivss
|
||||||
|
| AvxOpcode::Vdivsd
|
||||||
|
| AvxOpcode::Vpabsb
|
||||||
|
| AvxOpcode::Vpabsw
|
||||||
|
| AvxOpcode::Vpabsd
|
||||||
|
| AvxOpcode::Vminss
|
||||||
|
| AvxOpcode::Vminsd
|
||||||
|
| AvxOpcode::Vmaxss
|
||||||
|
| AvxOpcode::Vmaxsd
|
||||||
|
| AvxOpcode::Vsqrtps
|
||||||
|
| AvxOpcode::Vsqrtpd
|
||||||
|
| AvxOpcode::Vroundpd
|
||||||
|
| AvxOpcode::Vroundps
|
||||||
|
| AvxOpcode::Vcvtdq2pd
|
||||||
|
| AvxOpcode::Vcvtdq2ps
|
||||||
|
| AvxOpcode::Vcvtpd2ps
|
||||||
|
| AvxOpcode::Vcvtps2pd
|
||||||
|
| AvxOpcode::Vcvttpd2dq
|
||||||
|
| AvxOpcode::Vcvttps2dq => {
|
||||||
smallvec![InstructionSet::AVX]
|
smallvec![InstructionSet::AVX]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2182,6 +2182,18 @@ pub(crate) fn emit(
|
|||||||
AvxOpcode::Vpsllq => (LP::_66, OM::_0F, 0xF3),
|
AvxOpcode::Vpsllq => (LP::_66, OM::_0F, 0xF3),
|
||||||
AvxOpcode::Vpsraw => (LP::_66, OM::_0F, 0xE1),
|
AvxOpcode::Vpsraw => (LP::_66, OM::_0F, 0xE1),
|
||||||
AvxOpcode::Vpsrad => (LP::_66, OM::_0F, 0xE2),
|
AvxOpcode::Vpsrad => (LP::_66, OM::_0F, 0xE2),
|
||||||
|
AvxOpcode::Vaddss => (LP::_F3, OM::_0F, 0x58),
|
||||||
|
AvxOpcode::Vaddsd => (LP::_F2, OM::_0F, 0x58),
|
||||||
|
AvxOpcode::Vmulss => (LP::_F3, OM::_0F, 0x59),
|
||||||
|
AvxOpcode::Vmulsd => (LP::_F2, OM::_0F, 0x59),
|
||||||
|
AvxOpcode::Vsubss => (LP::_F3, OM::_0F, 0x5C),
|
||||||
|
AvxOpcode::Vsubsd => (LP::_F2, OM::_0F, 0x5C),
|
||||||
|
AvxOpcode::Vdivss => (LP::_F3, OM::_0F, 0x5E),
|
||||||
|
AvxOpcode::Vdivsd => (LP::_F2, OM::_0F, 0x5E),
|
||||||
|
AvxOpcode::Vminss => (LP::_F3, OM::_0F, 0x5D),
|
||||||
|
AvxOpcode::Vminsd => (LP::_F2, OM::_0F, 0x5D),
|
||||||
|
AvxOpcode::Vmaxss => (LP::_F3, OM::_0F, 0x5F),
|
||||||
|
AvxOpcode::Vmaxsd => (LP::_F2, OM::_0F, 0x5F),
|
||||||
_ => panic!("unexpected rmir vex opcode {op:?}"),
|
_ => panic!("unexpected rmir vex opcode {op:?}"),
|
||||||
};
|
};
|
||||||
VexInstruction::new()
|
VexInstruction::new()
|
||||||
@@ -2359,6 +2371,72 @@ pub(crate) fn emit(
|
|||||||
.encode(sink);
|
.encode(sink);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XmmUnaryRmRVex { op, src, dst } => {
|
||||||
|
let dst = allocs.next(dst.to_reg().to_reg());
|
||||||
|
let src = match src.clone().to_reg_mem().with_allocs(allocs) {
|
||||||
|
RegMem::Reg { reg } => {
|
||||||
|
RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into())
|
||||||
|
}
|
||||||
|
RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)),
|
||||||
|
};
|
||||||
|
|
||||||
|
let (prefix, map, opcode) = match op {
|
||||||
|
AvxOpcode::Vpmovsxbw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x20),
|
||||||
|
AvxOpcode::Vpmovzxbw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x30),
|
||||||
|
AvxOpcode::Vpmovsxwd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x23),
|
||||||
|
AvxOpcode::Vpmovzxwd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x33),
|
||||||
|
AvxOpcode::Vpmovsxdq => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x25),
|
||||||
|
AvxOpcode::Vpmovzxdq => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x35),
|
||||||
|
AvxOpcode::Vpabsb => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1C),
|
||||||
|
AvxOpcode::Vpabsw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1D),
|
||||||
|
AvxOpcode::Vpabsd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1E),
|
||||||
|
AvxOpcode::Vsqrtps => (LegacyPrefixes::None, OpcodeMap::_0F, 0x51),
|
||||||
|
AvxOpcode::Vsqrtpd => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x51),
|
||||||
|
AvxOpcode::Vcvtdq2pd => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0xE6),
|
||||||
|
AvxOpcode::Vcvtdq2ps => (LegacyPrefixes::None, OpcodeMap::_0F, 0x5B),
|
||||||
|
AvxOpcode::Vcvtpd2ps => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x5A),
|
||||||
|
AvxOpcode::Vcvtps2pd => (LegacyPrefixes::None, OpcodeMap::_0F, 0x5A),
|
||||||
|
AvxOpcode::Vcvttpd2dq => (LegacyPrefixes::_66, OpcodeMap::_0F, 0xE6),
|
||||||
|
AvxOpcode::Vcvttps2dq => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x5B),
|
||||||
|
_ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
|
||||||
|
};
|
||||||
|
|
||||||
|
VexInstruction::new()
|
||||||
|
.length(VexVectorLength::V128)
|
||||||
|
.prefix(prefix)
|
||||||
|
.map(map)
|
||||||
|
.opcode(opcode)
|
||||||
|
.reg(dst.to_real_reg().unwrap().hw_enc())
|
||||||
|
.rm(src)
|
||||||
|
.encode(sink);
|
||||||
|
}
|
||||||
|
|
||||||
|
Inst::XmmUnaryRmRImmVex { op, src, dst, imm } => {
|
||||||
|
let dst = allocs.next(dst.to_reg().to_reg());
|
||||||
|
let src = match src.clone().to_reg_mem().with_allocs(allocs) {
|
||||||
|
RegMem::Reg { reg } => {
|
||||||
|
RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into())
|
||||||
|
}
|
||||||
|
RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)),
|
||||||
|
};
|
||||||
|
|
||||||
|
let (prefix, map, opcode) = match op {
|
||||||
|
AvxOpcode::Vroundps => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x08),
|
||||||
|
AvxOpcode::Vroundpd => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x09),
|
||||||
|
_ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
|
||||||
|
};
|
||||||
|
|
||||||
|
VexInstruction::new()
|
||||||
|
.length(VexVectorLength::V128)
|
||||||
|
.prefix(prefix)
|
||||||
|
.map(map)
|
||||||
|
.opcode(opcode)
|
||||||
|
.reg(dst.to_real_reg().unwrap().hw_enc())
|
||||||
|
.rm(src)
|
||||||
|
.imm(*imm)
|
||||||
|
.encode(sink);
|
||||||
|
}
|
||||||
|
|
||||||
Inst::XmmRmREvex {
|
Inst::XmmRmREvex {
|
||||||
op,
|
op,
|
||||||
src1,
|
src1,
|
||||||
|
|||||||
@@ -151,7 +151,9 @@ impl Inst {
|
|||||||
| Inst::XmmRmRVex3 { op, .. }
|
| Inst::XmmRmRVex3 { op, .. }
|
||||||
| Inst::XmmRmRImmVex { op, .. }
|
| Inst::XmmRmRImmVex { op, .. }
|
||||||
| Inst::XmmRmRBlendVex { op, .. }
|
| Inst::XmmRmRBlendVex { op, .. }
|
||||||
| Inst::XmmVexPinsr { op, .. } => op.available_from(),
|
| Inst::XmmVexPinsr { op, .. }
|
||||||
|
| Inst::XmmUnaryRmRVex { op, .. }
|
||||||
|
| Inst::XmmUnaryRmRImmVex { op, .. } => op.available_from(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -910,6 +912,20 @@ impl PrettyPrint for Inst {
|
|||||||
format!("{} ${}, {}, {}", ljustify(op.to_string()), imm, src, dst)
|
format!("{} ${}, {}, {}", ljustify(op.to_string()), imm, src, dst)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XmmUnaryRmRVex { op, src, dst, .. } => {
|
||||||
|
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||||
|
let src = src.pretty_print(8, allocs);
|
||||||
|
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
|
||||||
|
}
|
||||||
|
|
||||||
|
Inst::XmmUnaryRmRImmVex {
|
||||||
|
op, src, dst, imm, ..
|
||||||
|
} => {
|
||||||
|
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||||
|
let src = src.pretty_print(8, allocs);
|
||||||
|
format!("{} ${imm}, {}, {}", ljustify(op.to_string()), src, dst)
|
||||||
|
}
|
||||||
|
|
||||||
Inst::XmmUnaryRmREvex { op, src, dst, .. } => {
|
Inst::XmmUnaryRmREvex { op, src, dst, .. } => {
|
||||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||||
let src = src.pretty_print(8, allocs);
|
let src = src.pretty_print(8, allocs);
|
||||||
@@ -1887,7 +1903,10 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
|||||||
collector.reg_def(dst.to_writable_reg());
|
collector.reg_def(dst.to_writable_reg());
|
||||||
src.get_operands(collector);
|
src.get_operands(collector);
|
||||||
}
|
}
|
||||||
Inst::XmmUnaryRmREvex { src, dst, .. } | Inst::XmmUnaryRmRUnaligned { src, dst, .. } => {
|
Inst::XmmUnaryRmREvex { src, dst, .. }
|
||||||
|
| Inst::XmmUnaryRmRUnaligned { src, dst, .. }
|
||||||
|
| Inst::XmmUnaryRmRVex { src, dst, .. }
|
||||||
|
| Inst::XmmUnaryRmRImmVex { src, dst, .. } => {
|
||||||
collector.reg_def(dst.to_writable_reg());
|
collector.reg_def(dst.to_writable_reg());
|
||||||
src.get_operands(collector);
|
src.get_operands(collector);
|
||||||
}
|
}
|
||||||
|
|||||||
598
cranelift/filetests/filetests/isa/x64/float-avx.clif
Normal file
598
cranelift/filetests/filetests/isa/x64/float-avx.clif
Normal file
@@ -0,0 +1,598 @@
|
|||||||
|
test compile precise-output
|
||||||
|
set enable_simd
|
||||||
|
target x86_64 has_avx
|
||||||
|
|
||||||
|
function %f32_add(f32, f32) -> f32 {
|
||||||
|
block0(v0: f32, v1: f32):
|
||||||
|
v2 = fadd v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vaddss %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vaddss %xmm1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f64_add(f64, f64) -> f64 {
|
||||||
|
block0(v0: f64, v1: f64):
|
||||||
|
v2 = fadd v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vaddsd %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vaddsd %xmm1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f32_sub(f32, f32) -> f32 {
|
||||||
|
block0(v0: f32, v1: f32):
|
||||||
|
v2 = fsub v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vsubss %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vsubss %xmm1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f64_sub(f64, f64) -> f64 {
|
||||||
|
block0(v0: f64, v1: f64):
|
||||||
|
v2 = fsub v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vsubsd %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vsubsd %xmm1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f32_mul(f32, f32) -> f32 {
|
||||||
|
block0(v0: f32, v1: f32):
|
||||||
|
v2 = fmul v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vmulss %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vmulss %xmm1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f64_mul(f64, f64) -> f64 {
|
||||||
|
block0(v0: f64, v1: f64):
|
||||||
|
v2 = fmul v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vmulsd %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vmulsd %xmm1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f32_div(f32, f32) -> f32 {
|
||||||
|
block0(v0: f32, v1: f32):
|
||||||
|
v2 = fdiv v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vdivss %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vdivss %xmm1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f64_div(f64, f64) -> f64 {
|
||||||
|
block0(v0: f64, v1: f64):
|
||||||
|
v2 = fdiv v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vdivsd %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vdivsd %xmm1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f32_min(f32, f32) -> f32 {
|
||||||
|
block0(v0: f32, v1: f32):
|
||||||
|
v2 = fmin_pseudo v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vminss %xmm1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vminss %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f64_min(f64, f64) -> f64 {
|
||||||
|
block0(v0: f64, v1: f64):
|
||||||
|
v2 = fmin_pseudo v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vminsd %xmm1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vminsd %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f32_max(f32, f32) -> f32 {
|
||||||
|
block0(v0: f32, v1: f32):
|
||||||
|
v2 = fmax_pseudo v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vmaxss %xmm1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vmaxss %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f64_max(f64, f64) -> f64 {
|
||||||
|
block0(v0: f64, v1: f64):
|
||||||
|
v2 = fmax_pseudo v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vmaxsd %xmm1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vmaxsd %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f32x4_sqrt(f32x4) -> f32x4 {
|
||||||
|
block0(v0: f32x4):
|
||||||
|
v1 = sqrt v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vsqrtps %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vsqrtps %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f64x2_sqrt(f64x2) -> f64x2 {
|
||||||
|
block0(v0: f64x2):
|
||||||
|
v1 = sqrt v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vsqrtpd %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vsqrtpd %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f32x4_floor(f32x4) -> f32x4 {
|
||||||
|
block0(v0: f32x4):
|
||||||
|
v1 = floor v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vroundps $1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vroundps $1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %f64x2_floor(f64x2) -> f64x2 {
|
||||||
|
block0(v0: f64x2):
|
||||||
|
v1 = floor v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vroundpd $1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vroundpd $1, %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %fcvt_low_from_sint(i32x4) -> f64x2 {
|
||||||
|
block0(v0: i32x4):
|
||||||
|
v1 = fcvt_low_from_sint.f64x2 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vcvtdq2pd %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vcvtdq2pd %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %fcvt_from_uint(i32x4) -> f32x4 {
|
||||||
|
block0(v0: i32x4):
|
||||||
|
v1 = fcvt_from_uint.f32x4 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vpslld %xmm0, $16, %xmm2
|
||||||
|
; vpsrld %xmm2, $16, %xmm4
|
||||||
|
; vpsubd %xmm0, %xmm4, %xmm6
|
||||||
|
; vcvtdq2ps %xmm4, %xmm8
|
||||||
|
; vpsrld %xmm6, $1, %xmm10
|
||||||
|
; vcvtdq2ps %xmm10, %xmm12
|
||||||
|
; vaddps %xmm12, %xmm12, %xmm14
|
||||||
|
; vaddps %xmm14, %xmm8, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vpslld $0x10, %xmm0, %xmm2
|
||||||
|
; vpsrld $0x10, %xmm2, %xmm4
|
||||||
|
; vpsubd %xmm4, %xmm0, %xmm6
|
||||||
|
; vcvtdq2ps %xmm4, %xmm8
|
||||||
|
; vpsrld $1, %xmm6, %xmm10
|
||||||
|
; vcvtdq2ps %xmm10, %xmm12
|
||||||
|
; vaddps %xmm12, %xmm12, %xmm14
|
||||||
|
; vaddps %xmm8, %xmm14, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %fvdemote(f64x2) -> f32x4 {
|
||||||
|
block0(v0: f64x2):
|
||||||
|
v1 = fvdemote v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vcvtpd2ps %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vcvtpd2ps %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %fvpromote_low(f32x4) -> f64x2 {
|
||||||
|
block0(v0: f32x4):
|
||||||
|
v1 = fvpromote_low v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vcvtps2pd %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vcvtps2pd %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %fcvt_to_sint_sat(f32x4) -> i32x4 {
|
||||||
|
block0(v0: f32x4):
|
||||||
|
v1 = fcvt_to_sint_sat.i32x4 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vcmpps $0 %xmm0, %xmm0, %xmm2
|
||||||
|
; vandps %xmm0, %xmm2, %xmm4
|
||||||
|
; vpxor %xmm2, %xmm4, %xmm6
|
||||||
|
; vcvttps2dq %xmm4, %xmm8
|
||||||
|
; vpand %xmm8, %xmm6, %xmm10
|
||||||
|
; vpsrad %xmm10, $31, %xmm12
|
||||||
|
; vpxor %xmm12, %xmm8, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vcmpeqps %xmm0, %xmm0, %xmm2
|
||||||
|
; vandps %xmm2, %xmm0, %xmm4
|
||||||
|
; vpxor %xmm4, %xmm2, %xmm6
|
||||||
|
; vcvttps2dq %xmm4, %xmm8
|
||||||
|
; vpand %xmm6, %xmm8, %xmm10
|
||||||
|
; vpsrad $0x1f, %xmm10, %xmm12
|
||||||
|
; vpxor %xmm8, %xmm12, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %fcvt_to_sint_sat_snarrow(f64x2) -> i32x4 {
|
||||||
|
block0(v0: f64x2):
|
||||||
|
v1 = fcvt_to_sint_sat.i64x2 v0
|
||||||
|
v2 = vconst.i64x2 0x00
|
||||||
|
v3 = snarrow v1, v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vcmppd $0 %xmm0, %xmm0, %xmm2
|
||||||
|
; movupd const(0), %xmm4
|
||||||
|
; vandps %xmm2, %xmm4, %xmm6
|
||||||
|
; vminpd %xmm0, %xmm6, %xmm8
|
||||||
|
; vcvttpd2dq %xmm8, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vcmpeqpd %xmm0, %xmm0, %xmm2
|
||||||
|
; movupd 0x1f(%rip), %xmm4
|
||||||
|
; vandps %xmm4, %xmm2, %xmm6
|
||||||
|
; vminpd %xmm6, %xmm0, %xmm8
|
||||||
|
; vcvttpd2dq %xmm8, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
; addb %al, (%rax)
|
||||||
|
; addb %al, (%rax)
|
||||||
|
; addb %al, (%rax)
|
||||||
|
; addb %al, (%rax)
|
||||||
|
; addb %al, (%rax)
|
||||||
|
; addb %al, (%rax)
|
||||||
|
; addb %al, (%rax)
|
||||||
|
; addb %al, %al
|
||||||
|
|
||||||
@@ -1213,7 +1213,7 @@ block0(v0: i8x16):
|
|||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; vpalignr $8 %xmm0, %xmm0, %xmm2
|
; vpalignr $8 %xmm0, %xmm0, %xmm2
|
||||||
; pmovzxbw %xmm2, %xmm0
|
; vpmovzxbw %xmm2, %xmm0
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
@@ -1224,7 +1224,7 @@ block0(v0: i8x16):
|
|||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block1: ; offset 0x4
|
; block1: ; offset 0x4
|
||||||
; vpalignr $8, %xmm0, %xmm0, %xmm2
|
; vpalignr $8, %xmm0, %xmm0, %xmm2
|
||||||
; pmovzxbw %xmm2, %xmm0
|
; vpmovzxbw %xmm2, %xmm0
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; retq
|
; retq
|
||||||
@@ -1359,7 +1359,7 @@ block0(v0: f64x2):
|
|||||||
; vmaxpd %xmm0, %xmm2, %xmm4
|
; vmaxpd %xmm0, %xmm2, %xmm4
|
||||||
; movupd const(0), %xmm6
|
; movupd const(0), %xmm6
|
||||||
; vminpd %xmm4, %xmm6, %xmm8
|
; vminpd %xmm4, %xmm6, %xmm8
|
||||||
; roundpd $3, %xmm8, %xmm10
|
; vroundpd $3, %xmm8, %xmm10
|
||||||
; movupd const(1), %xmm12
|
; movupd const(1), %xmm12
|
||||||
; vaddpd %xmm10, %xmm12, %xmm14
|
; vaddpd %xmm10, %xmm12, %xmm14
|
||||||
; vshufps $136 %xmm14, %xmm2, %xmm0
|
; vshufps $136 %xmm14, %xmm2, %xmm0
|
||||||
@@ -1376,8 +1376,8 @@ block0(v0: f64x2):
|
|||||||
; vmaxpd %xmm2, %xmm0, %xmm4
|
; vmaxpd %xmm2, %xmm0, %xmm4
|
||||||
; movupd 0x2c(%rip), %xmm6
|
; movupd 0x2c(%rip), %xmm6
|
||||||
; vminpd %xmm6, %xmm4, %xmm8
|
; vminpd %xmm6, %xmm4, %xmm8
|
||||||
; roundpd $3, %xmm8, %xmm10
|
; vroundpd $3, %xmm8, %xmm10
|
||||||
; movupd 0x28(%rip), %xmm12
|
; movupd 0x29(%rip), %xmm12
|
||||||
; vaddpd %xmm12, %xmm10, %xmm14
|
; vaddpd %xmm12, %xmm10, %xmm14
|
||||||
; vshufps $0x88, %xmm2, %xmm14, %xmm0
|
; vshufps $0x88, %xmm2, %xmm14, %xmm0
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
@@ -1388,7 +1388,8 @@ block0(v0: f64x2):
|
|||||||
; addb %al, (%rax)
|
; addb %al, (%rax)
|
||||||
; addb %al, (%rax)
|
; addb %al, (%rax)
|
||||||
; addb %al, (%rax)
|
; addb %al, (%rax)
|
||||||
; addb %ah, %al
|
; addb %al, (%rax)
|
||||||
|
; loopne 0x43
|
||||||
|
|
||||||
function %i8x16_shl(i8x16, i32) -> i8x16 {
|
function %i8x16_shl(i8x16, i32) -> i8x16 {
|
||||||
block0(v0: i8x16, v1: i32):
|
block0(v0: i8x16, v1: i32):
|
||||||
@@ -1884,3 +1885,78 @@ block0(v0: i64x2):
|
|||||||
; popq %rbp
|
; popq %rbp
|
||||||
; retq
|
; retq
|
||||||
|
|
||||||
|
function %i8x16_abs(i8x16) -> i8x16 {
|
||||||
|
block0(v0: i8x16):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vpabsb %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vpabsb %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %i16x8_abs(i16x8) -> i16x8 {
|
||||||
|
block0(v0: i16x8):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vpabsw %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vpabsw %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %i32x4_abs(i32x4) -> i32x4 {
|
||||||
|
block0(v0: i32x4):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vpabsd %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vpabsd %xmm0, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
|||||||
154
cranelift/filetests/filetests/isa/x64/simd-load-avx.clif
Normal file
154
cranelift/filetests/filetests/isa/x64/simd-load-avx.clif
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
test compile precise-output
|
||||||
|
set enable_simd
|
||||||
|
target x86_64 has_avx
|
||||||
|
|
||||||
|
function %sload8x8(i64) -> i16x8 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = sload8x8 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vpmovsxbw 0(%rdi), %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vpmovsxbw (%rdi), %xmm0 ; trap: heap_oob
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %uload8x8(i64) -> i16x8 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = uload8x8 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vpmovzxbw 0(%rdi), %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vpmovzxbw (%rdi), %xmm0 ; trap: heap_oob
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %sload16x4(i64) -> i32x4 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = sload16x4 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vpmovsxwd 0(%rdi), %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vpmovsxwd (%rdi), %xmm0 ; trap: heap_oob
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %uload16x4(i64) -> i32x4 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = uload16x4 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vpmovzxwd 0(%rdi), %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vpmovzxwd (%rdi), %xmm0 ; trap: heap_oob
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %sload32x2(i64) -> i64x2 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = sload32x2 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vpmovsxdq 0(%rdi), %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vpmovsxdq (%rdi), %xmm0 ; trap: heap_oob
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %uload32x2(i64) -> i64x2 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = uload32x2 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; vpmovzxdq 0(%rdi), %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; vpmovzxdq (%rdi), %xmm0 ; trap: heap_oob
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
Reference in New Issue
Block a user