x64: Fill out more AVX instructions (#5849)

* x64: Fill out more AVX instructions

This commit fills out more AVX instructions for SSE counterparts
currently used. Many of these instructions do not benefit from the
3-operand form that AVX uses but instead benefit from being able to use
`XmmMem` instead of `XmmMemAligned` which may be able to avoid some
extra temporary registers in some cases.

* Review comments
This commit is contained in:
Alex Crichton
2023-02-23 16:31:31 -06:00
committed by GitHub
parent 8abfe928d6
commit 3fc3bc9ec8
7 changed files with 1114 additions and 13 deletions

View File

@@ -283,6 +283,17 @@
(mask Xmm) (mask Xmm)
(dst WritableXmm)) (dst WritableXmm))
;; XMM unary op using a VEX encoding (aka AVX).
(XmmUnaryRmRVex (op AvxOpcode)
(src XmmMem)
(dst WritableXmm))
;; XMM unary op using a VEX encoding (aka AVX) with an immediate.
(XmmUnaryRmRImmVex (op AvxOpcode)
(src XmmMem)
(dst WritableXmm)
(imm u8))
;; XMM (scalar or vector) binary op that relies on the EVEX ;; XMM (scalar or vector) binary op that relies on the EVEX
;; prefix. Takes two inputs. ;; prefix. Takes two inputs.
(XmmRmREvex (op Avx512Opcode) (XmmRmREvex (op Avx512Opcode)
@@ -1314,6 +1325,37 @@
Vpsllq Vpsllq
Vpsraw Vpsraw
Vpsrad Vpsrad
Vpmovsxbw
Vpmovzxbw
Vpmovsxwd
Vpmovzxwd
Vpmovsxdq
Vpmovzxdq
Vaddss
Vaddsd
Vmulss
Vmulsd
Vsubss
Vsubsd
Vdivss
Vdivsd
Vpabsb
Vpabsw
Vpabsd
Vminss
Vminsd
Vmaxss
Vmaxsd
Vsqrtps
Vsqrtpd
Vroundps
Vroundpd
Vcvtdq2pd
Vcvtdq2ps
Vcvtpd2ps
Vcvtps2pd
Vcvttpd2dq
Vcvttps2dq
)) ))
(type Avx512Opcode extern (type Avx512Opcode extern
@@ -1902,33 +1944,47 @@
(rule (x64_movdqu from) (rule (x64_movdqu from)
(xmm_unary_rm_r_unaligned (SseOpcode.Movdqu) from)) (xmm_unary_rm_r_unaligned (SseOpcode.Movdqu) from))
(decl x64_movapd (XmmMem) Xmm)
(rule (x64_movapd src)
(xmm_unary_rm_r (SseOpcode.Movapd) src))
(decl x64_pmovsxbw (XmmMem) Xmm) (decl x64_pmovsxbw (XmmMem) Xmm)
(rule (x64_pmovsxbw from) (rule (x64_pmovsxbw from)
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxbw) from)) (xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxbw) from))
(rule 1 (x64_pmovsxbw from)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vpmovsxbw) from))
(decl x64_pmovzxbw (XmmMem) Xmm) (decl x64_pmovzxbw (XmmMem) Xmm)
(rule (x64_pmovzxbw from) (rule (x64_pmovzxbw from)
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxbw) from)) (xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxbw) from))
(rule 1 (x64_pmovzxbw from)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vpmovzxbw) from))
(decl x64_pmovsxwd (XmmMem) Xmm) (decl x64_pmovsxwd (XmmMem) Xmm)
(rule (x64_pmovsxwd from) (rule (x64_pmovsxwd from)
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxwd) from)) (xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxwd) from))
(rule 1 (x64_pmovsxwd from)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vpmovsxwd) from))
(decl x64_pmovzxwd (XmmMem) Xmm) (decl x64_pmovzxwd (XmmMem) Xmm)
(rule (x64_pmovzxwd from) (rule (x64_pmovzxwd from)
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxwd) from)) (xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxwd) from))
(rule 1 (x64_pmovzxwd from)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vpmovzxwd) from))
(decl x64_pmovsxdq (XmmMem) Xmm) (decl x64_pmovsxdq (XmmMem) Xmm)
(rule (x64_pmovsxdq from) (rule (x64_pmovsxdq from)
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxdq) from)) (xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxdq) from))
(rule 1 (x64_pmovsxdq from)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vpmovsxdq) from))
(decl x64_pmovzxdq (XmmMem) Xmm) (decl x64_pmovzxdq (XmmMem) Xmm)
(rule (x64_pmovzxdq from) (rule (x64_pmovzxdq from)
(xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxdq) from)) (xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxdq) from))
(rule 1 (x64_pmovzxdq from)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vpmovzxdq) from))
(decl x64_movrm (Type SyntheticAmode Gpr) SideEffectNoResult) (decl x64_movrm (Type SyntheticAmode Gpr) SideEffectNoResult)
(rule (x64_movrm ty addr data) (rule (x64_movrm ty addr data)
@@ -2702,11 +2758,17 @@
(decl x64_addss (Xmm XmmMem) Xmm) (decl x64_addss (Xmm XmmMem) Xmm)
(rule (x64_addss src1 src2) (rule (x64_addss src1 src2)
(xmm_rm_r_unaligned (SseOpcode.Addss) src1 src2)) (xmm_rm_r_unaligned (SseOpcode.Addss) src1 src2))
(rule 1 (x64_addss src1 src2)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vaddss) src1 src2))
;; Helper for creating `addsd` instructions. ;; Helper for creating `addsd` instructions.
(decl x64_addsd (Xmm XmmMem) Xmm) (decl x64_addsd (Xmm XmmMem) Xmm)
(rule (x64_addsd src1 src2) (rule (x64_addsd src1 src2)
(xmm_rm_r_unaligned (SseOpcode.Addsd) src1 src2)) (xmm_rm_r_unaligned (SseOpcode.Addsd) src1 src2))
(rule 1 (x64_addsd src1 src2)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vaddsd) src1 src2))
;; Helper for creating `addps` instructions. ;; Helper for creating `addps` instructions.
(decl x64_addps (Xmm XmmMem) Xmm) (decl x64_addps (Xmm XmmMem) Xmm)
@@ -2728,11 +2790,17 @@
(decl x64_subss (Xmm XmmMem) Xmm) (decl x64_subss (Xmm XmmMem) Xmm)
(rule (x64_subss src1 src2) (rule (x64_subss src1 src2)
(xmm_rm_r_unaligned (SseOpcode.Subss) src1 src2)) (xmm_rm_r_unaligned (SseOpcode.Subss) src1 src2))
(rule 1 (x64_subss src1 src2)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vsubss) src1 src2))
;; Helper for creating `subsd` instructions. ;; Helper for creating `subsd` instructions.
(decl x64_subsd (Xmm XmmMem) Xmm) (decl x64_subsd (Xmm XmmMem) Xmm)
(rule (x64_subsd src1 src2) (rule (x64_subsd src1 src2)
(xmm_rm_r_unaligned (SseOpcode.Subsd) src1 src2)) (xmm_rm_r_unaligned (SseOpcode.Subsd) src1 src2))
(rule 1 (x64_subsd src1 src2)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vsubsd) src1 src2))
;; Helper for creating `subps` instructions. ;; Helper for creating `subps` instructions.
(decl x64_subps (Xmm XmmMem) Xmm) (decl x64_subps (Xmm XmmMem) Xmm)
@@ -2754,11 +2822,17 @@
(decl x64_mulss (Xmm XmmMem) Xmm) (decl x64_mulss (Xmm XmmMem) Xmm)
(rule (x64_mulss src1 src2) (rule (x64_mulss src1 src2)
(xmm_rm_r_unaligned (SseOpcode.Mulss) src1 src2)) (xmm_rm_r_unaligned (SseOpcode.Mulss) src1 src2))
(rule 1 (x64_mulss src1 src2)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vmulss) src1 src2))
;; Helper for creating `mulsd` instructions. ;; Helper for creating `mulsd` instructions.
(decl x64_mulsd (Xmm XmmMem) Xmm) (decl x64_mulsd (Xmm XmmMem) Xmm)
(rule (x64_mulsd src1 src2) (rule (x64_mulsd src1 src2)
(xmm_rm_r_unaligned (SseOpcode.Mulsd) src1 src2)) (xmm_rm_r_unaligned (SseOpcode.Mulsd) src1 src2))
(rule 1 (x64_mulsd src1 src2)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vmulsd) src1 src2))
;; Helper for creating `mulps` instructions. ;; Helper for creating `mulps` instructions.
(decl x64_mulps (Xmm XmmMem) Xmm) (decl x64_mulps (Xmm XmmMem) Xmm)
@@ -2780,11 +2854,17 @@
(decl x64_divss (Xmm XmmMem) Xmm) (decl x64_divss (Xmm XmmMem) Xmm)
(rule (x64_divss src1 src2) (rule (x64_divss src1 src2)
(xmm_rm_r_unaligned (SseOpcode.Divss) src1 src2)) (xmm_rm_r_unaligned (SseOpcode.Divss) src1 src2))
(rule 1 (x64_divss src1 src2)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vdivss) src1 src2))
;; Helper for creating `divsd` instructions. ;; Helper for creating `divsd` instructions.
(decl x64_divsd (Xmm XmmMem) Xmm) (decl x64_divsd (Xmm XmmMem) Xmm)
(rule (x64_divsd src1 src2) (rule (x64_divsd src1 src2)
(xmm_rm_r_unaligned (SseOpcode.Divsd) src1 src2)) (xmm_rm_r_unaligned (SseOpcode.Divsd) src1 src2))
(rule 1 (x64_divsd src1 src2)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vdivsd) src1 src2))
;; Helper for creating `divps` instructions. ;; Helper for creating `divps` instructions.
(decl x64_divps (Xmm XmmMem) Xmm) (decl x64_divps (Xmm XmmMem) Xmm)
@@ -2816,6 +2896,20 @@
(_ Unit (emit (MInst.XmmRmRBlendVex op src1 src2 mask dst)))) (_ Unit (emit (MInst.XmmRmRBlendVex op src1 src2 mask dst))))
dst)) dst))
;; Helper for creating `XmmUnaryRmRVex` instructions
(decl xmm_unary_rm_r_vex (AvxOpcode XmmMem) Xmm)
(rule (xmm_unary_rm_r_vex op src)
(let ((dst WritableXmm (temp_writable_xmm))
(_ Unit (emit (MInst.XmmUnaryRmRVex op src dst))))
dst))
;; Helper for creating `XmmUnaryRmRImmVex` instructions
(decl xmm_unary_rm_r_imm_vex (AvxOpcode XmmMem u8) Xmm)
(rule (xmm_unary_rm_r_imm_vex op src imm)
(let ((dst WritableXmm (temp_writable_xmm))
(_ Unit (emit (MInst.XmmUnaryRmRImmVex op src dst imm))))
dst))
;; Helper for creating `blendvp{d,s}` and `pblendvb` instructions. ;; Helper for creating `blendvp{d,s}` and `pblendvb` instructions.
(decl x64_blend (Type Xmm XmmMem Xmm) Xmm) (decl x64_blend (Type Xmm XmmMem Xmm) Xmm)
(rule 1 (x64_blend $F32X4 mask src1 src2) (x64_blendvps src2 src1 mask)) (rule 1 (x64_blend $F32X4 mask src1 src2) (x64_blendvps src2 src1 mask))
@@ -3131,11 +3225,17 @@
(decl x64_roundps (XmmMem RoundImm) Xmm) (decl x64_roundps (XmmMem RoundImm) Xmm)
(rule (x64_roundps src1 round) (rule (x64_roundps src1 round)
(xmm_unary_rm_r_imm (SseOpcode.Roundps) src1 (encode_round_imm round))) (xmm_unary_rm_r_imm (SseOpcode.Roundps) src1 (encode_round_imm round)))
(rule 1 (x64_roundps src1 round)
(if-let $true (has_avx))
(xmm_unary_rm_r_imm_vex (AvxOpcode.Vroundps) src1 (encode_round_imm round)))
;; Helper for creating `roundpd` instructions. ;; Helper for creating `roundpd` instructions.
(decl x64_roundpd (XmmMem RoundImm) Xmm) (decl x64_roundpd (XmmMem RoundImm) Xmm)
(rule (x64_roundpd src1 round) (rule (x64_roundpd src1 round)
(xmm_unary_rm_r_imm (SseOpcode.Roundpd) src1 (encode_round_imm round))) (xmm_unary_rm_r_imm (SseOpcode.Roundpd) src1 (encode_round_imm round)))
(rule 1 (x64_roundpd src1 round)
(if-let $true (has_avx))
(xmm_unary_rm_r_imm_vex (AvxOpcode.Vroundpd) src1 (encode_round_imm round)))
;; Helper for creating `pmaddwd` instructions. ;; Helper for creating `pmaddwd` instructions.
(decl x64_pmaddwd (Xmm XmmMem) Xmm) (decl x64_pmaddwd (Xmm XmmMem) Xmm)
@@ -3207,16 +3307,25 @@
(decl x64_pabsb (XmmMem) Xmm) (decl x64_pabsb (XmmMem) Xmm)
(rule (x64_pabsb src) (rule (x64_pabsb src)
(xmm_unary_rm_r (SseOpcode.Pabsb) src)) (xmm_unary_rm_r (SseOpcode.Pabsb) src))
(rule 1 (x64_pabsb src)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vpabsb) src))
;; Helper for creating `pabsw` instructions. ;; Helper for creating `pabsw` instructions.
(decl x64_pabsw (XmmMem) Xmm) (decl x64_pabsw (XmmMem) Xmm)
(rule (x64_pabsw src) (rule (x64_pabsw src)
(xmm_unary_rm_r (SseOpcode.Pabsw) src)) (xmm_unary_rm_r (SseOpcode.Pabsw) src))
(rule 1 (x64_pabsw src)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vpabsw) src))
;; Helper for creating `pabsd` instructions. ;; Helper for creating `pabsd` instructions.
(decl x64_pabsd (XmmMem) Xmm) (decl x64_pabsd (XmmMem) Xmm)
(rule (x64_pabsd src) (rule (x64_pabsd src)
(xmm_unary_rm_r (SseOpcode.Pabsd) src)) (xmm_unary_rm_r (SseOpcode.Pabsd) src))
(rule 1 (x64_pabsd src)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vpabsd) src))
;; Helper for creating `MInst.XmmUnaryRmREvex` instructions. ;; Helper for creating `MInst.XmmUnaryRmREvex` instructions.
(decl xmm_unary_rm_r_evex (Avx512Opcode XmmMem) Xmm) (decl xmm_unary_rm_r_evex (Avx512Opcode XmmMem) Xmm)
@@ -3540,11 +3649,17 @@
(decl x64_minss (Xmm XmmMem) Xmm) (decl x64_minss (Xmm XmmMem) Xmm)
(rule (x64_minss x y) (rule (x64_minss x y)
(xmm_rm_r_unaligned (SseOpcode.Minss) x y)) (xmm_rm_r_unaligned (SseOpcode.Minss) x y))
(rule 1 (x64_minss x y)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vminss) x y))
;; Helper for creating `minsd` instructions. ;; Helper for creating `minsd` instructions.
(decl x64_minsd (Xmm XmmMem) Xmm) (decl x64_minsd (Xmm XmmMem) Xmm)
(rule (x64_minsd x y) (rule (x64_minsd x y)
(xmm_rm_r_unaligned (SseOpcode.Minsd) x y)) (xmm_rm_r_unaligned (SseOpcode.Minsd) x y))
(rule 1 (x64_minsd x y)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vminsd) x y))
;; Helper for creating `minps` instructions. ;; Helper for creating `minps` instructions.
(decl x64_minps (Xmm XmmMem) Xmm) (decl x64_minps (Xmm XmmMem) Xmm)
@@ -3566,11 +3681,17 @@
(decl x64_maxss (Xmm XmmMem) Xmm) (decl x64_maxss (Xmm XmmMem) Xmm)
(rule (x64_maxss x y) (rule (x64_maxss x y)
(xmm_rm_r_unaligned (SseOpcode.Maxss) x y)) (xmm_rm_r_unaligned (SseOpcode.Maxss) x y))
(rule 1 (x64_maxss x y)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vmaxss) x y))
;; Helper for creating `maxsd` instructions. ;; Helper for creating `maxsd` instructions.
(decl x64_maxsd (Xmm XmmMem) Xmm) (decl x64_maxsd (Xmm XmmMem) Xmm)
(rule (x64_maxsd x y) (rule (x64_maxsd x y)
(xmm_rm_r_unaligned (SseOpcode.Maxsd) x y)) (xmm_rm_r_unaligned (SseOpcode.Maxsd) x y))
(rule 1 (x64_maxsd x y)
(if-let $true (has_avx))
(xmm_rmir_vex (AvxOpcode.Vmaxsd) x y))
;; Helper for creating `maxps` instructions. ;; Helper for creating `maxps` instructions.
(decl x64_maxps (Xmm XmmMem) Xmm) (decl x64_maxps (Xmm XmmMem) Xmm)
@@ -3649,10 +3770,16 @@
;; Helper for creating `sqrtps` instructions. ;; Helper for creating `sqrtps` instructions.
(decl x64_sqrtps (XmmMem) Xmm) (decl x64_sqrtps (XmmMem) Xmm)
(rule (x64_sqrtps x) (xmm_unary_rm_r (SseOpcode.Sqrtps) x)) (rule (x64_sqrtps x) (xmm_unary_rm_r (SseOpcode.Sqrtps) x))
(rule 1 (x64_sqrtps x)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vsqrtps) x))
;; Helper for creating `sqrtpd` instructions. ;; Helper for creating `sqrtpd` instructions.
(decl x64_sqrtpd (XmmMem) Xmm) (decl x64_sqrtpd (XmmMem) Xmm)
(rule (x64_sqrtpd x) (xmm_unary_rm_r (SseOpcode.Sqrtpd) x)) (rule (x64_sqrtpd x) (xmm_unary_rm_r (SseOpcode.Sqrtpd) x))
(rule 1 (x64_sqrtpd x)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vsqrtpd) x))
;; Helper for creating `cvtss2sd` instructions. ;; Helper for creating `cvtss2sd` instructions.
(decl x64_cvtss2sd (Xmm) Xmm) (decl x64_cvtss2sd (Xmm) Xmm)
@@ -3665,18 +3792,30 @@
;; Helper for creating `cvtdq2ps` instructions. ;; Helper for creating `cvtdq2ps` instructions.
(decl x64_cvtdq2ps (XmmMem) Xmm) (decl x64_cvtdq2ps (XmmMem) Xmm)
(rule (x64_cvtdq2ps x) (xmm_unary_rm_r (SseOpcode.Cvtdq2ps) x)) (rule (x64_cvtdq2ps x) (xmm_unary_rm_r (SseOpcode.Cvtdq2ps) x))
(rule 1 (x64_cvtdq2ps x)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtdq2ps) x))
;; Helper for creating `cvtps2pd` instructions. ;; Helper for creating `cvtps2pd` instructions.
(decl x64_cvtps2pd (XmmMem) Xmm) (decl x64_cvtps2pd (XmmMem) Xmm)
(rule (x64_cvtps2pd x) (xmm_unary_rm_r (SseOpcode.Cvtps2pd) x)) (rule (x64_cvtps2pd x) (xmm_unary_rm_r (SseOpcode.Cvtps2pd) x))
(rule 1 (x64_cvtps2pd x)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtps2pd) x))
;; Helper for creating `cvtpd2ps` instructions. ;; Helper for creating `cvtpd2ps` instructions.
(decl x64_cvtpd2ps (XmmMem) Xmm) (decl x64_cvtpd2ps (XmmMem) Xmm)
(rule (x64_cvtpd2ps x) (xmm_unary_rm_r (SseOpcode.Cvtpd2ps) x)) (rule (x64_cvtpd2ps x) (xmm_unary_rm_r (SseOpcode.Cvtpd2ps) x))
(rule 1 (x64_cvtpd2ps x)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtpd2ps) x))
;; Helper for creating `cvtdq2pd` instructions. ;; Helper for creating `cvtdq2pd` instructions.
(decl x64_cvtdq2pd (XmmMem) Xmm) (decl x64_cvtdq2pd (XmmMem) Xmm)
(rule (x64_cvtdq2pd x) (xmm_unary_rm_r (SseOpcode.Cvtdq2pd) x)) (rule (x64_cvtdq2pd x) (xmm_unary_rm_r (SseOpcode.Cvtdq2pd) x))
(rule 1 (x64_cvtdq2pd x)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtdq2pd) x))
;; Helper for creating `cvtsi2ss` instructions. ;; Helper for creating `cvtsi2ss` instructions.
(decl x64_cvtsi2ss (Type GprMem) Xmm) (decl x64_cvtsi2ss (Type GprMem) Xmm)
@@ -3692,11 +3831,17 @@
(decl x64_cvttps2dq (XmmMem) Xmm) (decl x64_cvttps2dq (XmmMem) Xmm)
(rule (x64_cvttps2dq x) (rule (x64_cvttps2dq x)
(xmm_unary_rm_r (SseOpcode.Cvttps2dq) x)) (xmm_unary_rm_r (SseOpcode.Cvttps2dq) x))
(rule 1 (x64_cvttps2dq x)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vcvttps2dq) x))
;; Helper for creating `cvttpd2dq` instructions. ;; Helper for creating `cvttpd2dq` instructions.
(decl x64_cvttpd2dq (XmmMem) Xmm) (decl x64_cvttpd2dq (XmmMem) Xmm)
(rule (x64_cvttpd2dq x) (rule (x64_cvttpd2dq x)
(xmm_unary_rm_r (SseOpcode.Cvttpd2dq) x)) (xmm_unary_rm_r (SseOpcode.Cvttpd2dq) x))
(rule 1 (x64_cvttpd2dq x)
(if-let $true (has_avx))
(xmm_unary_rm_r_vex (AvxOpcode.Vcvttpd2dq) x))
(decl cvt_u64_to_float_seq (Type Gpr) Xmm) (decl cvt_u64_to_float_seq (Type Gpr) Xmm)
(rule (cvt_u64_to_float_seq ty src) (rule (cvt_u64_to_float_seq ty src)

View File

@@ -1630,7 +1630,38 @@ impl AvxOpcode {
| AvxOpcode::Vpslld | AvxOpcode::Vpslld
| AvxOpcode::Vpsllq | AvxOpcode::Vpsllq
| AvxOpcode::Vpsraw | AvxOpcode::Vpsraw
| AvxOpcode::Vpsrad => { | AvxOpcode::Vpsrad
| AvxOpcode::Vpmovsxbw
| AvxOpcode::Vpmovzxbw
| AvxOpcode::Vpmovsxwd
| AvxOpcode::Vpmovzxwd
| AvxOpcode::Vpmovsxdq
| AvxOpcode::Vpmovzxdq
| AvxOpcode::Vaddss
| AvxOpcode::Vaddsd
| AvxOpcode::Vmulss
| AvxOpcode::Vmulsd
| AvxOpcode::Vsubss
| AvxOpcode::Vsubsd
| AvxOpcode::Vdivss
| AvxOpcode::Vdivsd
| AvxOpcode::Vpabsb
| AvxOpcode::Vpabsw
| AvxOpcode::Vpabsd
| AvxOpcode::Vminss
| AvxOpcode::Vminsd
| AvxOpcode::Vmaxss
| AvxOpcode::Vmaxsd
| AvxOpcode::Vsqrtps
| AvxOpcode::Vsqrtpd
| AvxOpcode::Vroundpd
| AvxOpcode::Vroundps
| AvxOpcode::Vcvtdq2pd
| AvxOpcode::Vcvtdq2ps
| AvxOpcode::Vcvtpd2ps
| AvxOpcode::Vcvtps2pd
| AvxOpcode::Vcvttpd2dq
| AvxOpcode::Vcvttps2dq => {
smallvec![InstructionSet::AVX] smallvec![InstructionSet::AVX]
} }
} }

View File

@@ -2182,6 +2182,18 @@ pub(crate) fn emit(
AvxOpcode::Vpsllq => (LP::_66, OM::_0F, 0xF3), AvxOpcode::Vpsllq => (LP::_66, OM::_0F, 0xF3),
AvxOpcode::Vpsraw => (LP::_66, OM::_0F, 0xE1), AvxOpcode::Vpsraw => (LP::_66, OM::_0F, 0xE1),
AvxOpcode::Vpsrad => (LP::_66, OM::_0F, 0xE2), AvxOpcode::Vpsrad => (LP::_66, OM::_0F, 0xE2),
AvxOpcode::Vaddss => (LP::_F3, OM::_0F, 0x58),
AvxOpcode::Vaddsd => (LP::_F2, OM::_0F, 0x58),
AvxOpcode::Vmulss => (LP::_F3, OM::_0F, 0x59),
AvxOpcode::Vmulsd => (LP::_F2, OM::_0F, 0x59),
AvxOpcode::Vsubss => (LP::_F3, OM::_0F, 0x5C),
AvxOpcode::Vsubsd => (LP::_F2, OM::_0F, 0x5C),
AvxOpcode::Vdivss => (LP::_F3, OM::_0F, 0x5E),
AvxOpcode::Vdivsd => (LP::_F2, OM::_0F, 0x5E),
AvxOpcode::Vminss => (LP::_F3, OM::_0F, 0x5D),
AvxOpcode::Vminsd => (LP::_F2, OM::_0F, 0x5D),
AvxOpcode::Vmaxss => (LP::_F3, OM::_0F, 0x5F),
AvxOpcode::Vmaxsd => (LP::_F2, OM::_0F, 0x5F),
_ => panic!("unexpected rmir vex opcode {op:?}"), _ => panic!("unexpected rmir vex opcode {op:?}"),
}; };
VexInstruction::new() VexInstruction::new()
@@ -2359,6 +2371,72 @@ pub(crate) fn emit(
.encode(sink); .encode(sink);
} }
Inst::XmmUnaryRmRVex { op, src, dst } => {
let dst = allocs.next(dst.to_reg().to_reg());
let src = match src.clone().to_reg_mem().with_allocs(allocs) {
RegMem::Reg { reg } => {
RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into())
}
RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)),
};
let (prefix, map, opcode) = match op {
AvxOpcode::Vpmovsxbw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x20),
AvxOpcode::Vpmovzxbw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x30),
AvxOpcode::Vpmovsxwd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x23),
AvxOpcode::Vpmovzxwd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x33),
AvxOpcode::Vpmovsxdq => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x25),
AvxOpcode::Vpmovzxdq => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x35),
AvxOpcode::Vpabsb => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1C),
AvxOpcode::Vpabsw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1D),
AvxOpcode::Vpabsd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1E),
AvxOpcode::Vsqrtps => (LegacyPrefixes::None, OpcodeMap::_0F, 0x51),
AvxOpcode::Vsqrtpd => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x51),
AvxOpcode::Vcvtdq2pd => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0xE6),
AvxOpcode::Vcvtdq2ps => (LegacyPrefixes::None, OpcodeMap::_0F, 0x5B),
AvxOpcode::Vcvtpd2ps => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x5A),
AvxOpcode::Vcvtps2pd => (LegacyPrefixes::None, OpcodeMap::_0F, 0x5A),
AvxOpcode::Vcvttpd2dq => (LegacyPrefixes::_66, OpcodeMap::_0F, 0xE6),
AvxOpcode::Vcvttps2dq => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x5B),
_ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
};
VexInstruction::new()
.length(VexVectorLength::V128)
.prefix(prefix)
.map(map)
.opcode(opcode)
.reg(dst.to_real_reg().unwrap().hw_enc())
.rm(src)
.encode(sink);
}
Inst::XmmUnaryRmRImmVex { op, src, dst, imm } => {
let dst = allocs.next(dst.to_reg().to_reg());
let src = match src.clone().to_reg_mem().with_allocs(allocs) {
RegMem::Reg { reg } => {
RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into())
}
RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)),
};
let (prefix, map, opcode) = match op {
AvxOpcode::Vroundps => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x08),
AvxOpcode::Vroundpd => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x09),
_ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
};
VexInstruction::new()
.length(VexVectorLength::V128)
.prefix(prefix)
.map(map)
.opcode(opcode)
.reg(dst.to_real_reg().unwrap().hw_enc())
.rm(src)
.imm(*imm)
.encode(sink);
}
Inst::XmmRmREvex { Inst::XmmRmREvex {
op, op,
src1, src1,

View File

@@ -151,7 +151,9 @@ impl Inst {
| Inst::XmmRmRVex3 { op, .. } | Inst::XmmRmRVex3 { op, .. }
| Inst::XmmRmRImmVex { op, .. } | Inst::XmmRmRImmVex { op, .. }
| Inst::XmmRmRBlendVex { op, .. } | Inst::XmmRmRBlendVex { op, .. }
| Inst::XmmVexPinsr { op, .. } => op.available_from(), | Inst::XmmVexPinsr { op, .. }
| Inst::XmmUnaryRmRVex { op, .. }
| Inst::XmmUnaryRmRImmVex { op, .. } => op.available_from(),
} }
} }
} }
@@ -910,6 +912,20 @@ impl PrettyPrint for Inst {
format!("{} ${}, {}, {}", ljustify(op.to_string()), imm, src, dst) format!("{} ${}, {}, {}", ljustify(op.to_string()), imm, src, dst)
} }
Inst::XmmUnaryRmRVex { op, src, dst, .. } => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src = src.pretty_print(8, allocs);
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
}
Inst::XmmUnaryRmRImmVex {
op, src, dst, imm, ..
} => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src = src.pretty_print(8, allocs);
format!("{} ${imm}, {}, {}", ljustify(op.to_string()), src, dst)
}
Inst::XmmUnaryRmREvex { op, src, dst, .. } => { Inst::XmmUnaryRmREvex { op, src, dst, .. } => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src = src.pretty_print(8, allocs); let src = src.pretty_print(8, allocs);
@@ -1887,7 +1903,10 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
collector.reg_def(dst.to_writable_reg()); collector.reg_def(dst.to_writable_reg());
src.get_operands(collector); src.get_operands(collector);
} }
Inst::XmmUnaryRmREvex { src, dst, .. } | Inst::XmmUnaryRmRUnaligned { src, dst, .. } => { Inst::XmmUnaryRmREvex { src, dst, .. }
| Inst::XmmUnaryRmRUnaligned { src, dst, .. }
| Inst::XmmUnaryRmRVex { src, dst, .. }
| Inst::XmmUnaryRmRImmVex { src, dst, .. } => {
collector.reg_def(dst.to_writable_reg()); collector.reg_def(dst.to_writable_reg());
src.get_operands(collector); src.get_operands(collector);
} }

View File

@@ -0,0 +1,598 @@
test compile precise-output
set enable_simd
target x86_64 has_avx
function %f32_add(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fadd v0, v1
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vaddss %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vaddss %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f64_add(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fadd v0, v1
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vaddsd %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vaddsd %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f32_sub(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fsub v0, v1
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vsubss %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vsubss %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f64_sub(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fsub v0, v1
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vsubsd %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vsubsd %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f32_mul(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fmul v0, v1
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vmulss %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vmulss %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f64_mul(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fmul v0, v1
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vmulsd %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vmulsd %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f32_div(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fdiv v0, v1
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vdivss %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vdivss %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f64_div(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fdiv v0, v1
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vdivsd %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vdivsd %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f32_min(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fmin_pseudo v0, v1
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vminss %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vminss %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f64_min(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fmin_pseudo v0, v1
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vminsd %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vminsd %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f32_max(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fmax_pseudo v0, v1
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vmaxss %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vmaxss %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f64_max(f64, f64) -> f64 {
block0(v0: f64, v1: f64):
v2 = fmax_pseudo v0, v1
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vmaxsd %xmm1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vmaxsd %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f32x4_sqrt(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = sqrt v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vsqrtps %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vsqrtps %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f64x2_sqrt(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = sqrt v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vsqrtpd %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vsqrtpd %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f32x4_floor(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = floor v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vroundps $1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vroundps $1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f64x2_floor(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = floor v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vroundpd $1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vroundpd $1, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %fcvt_low_from_sint(i32x4) -> f64x2 {
block0(v0: i32x4):
v1 = fcvt_low_from_sint.f64x2 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vcvtdq2pd %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vcvtdq2pd %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %fcvt_from_uint(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_uint.f32x4 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpslld %xmm0, $16, %xmm2
; vpsrld %xmm2, $16, %xmm4
; vpsubd %xmm0, %xmm4, %xmm6
; vcvtdq2ps %xmm4, %xmm8
; vpsrld %xmm6, $1, %xmm10
; vcvtdq2ps %xmm10, %xmm12
; vaddps %xmm12, %xmm12, %xmm14
; vaddps %xmm14, %xmm8, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpslld $0x10, %xmm0, %xmm2
; vpsrld $0x10, %xmm2, %xmm4
; vpsubd %xmm4, %xmm0, %xmm6
; vcvtdq2ps %xmm4, %xmm8
; vpsrld $1, %xmm6, %xmm10
; vcvtdq2ps %xmm10, %xmm12
; vaddps %xmm12, %xmm12, %xmm14
; vaddps %xmm8, %xmm14, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %fvdemote(f64x2) -> f32x4 {
block0(v0: f64x2):
v1 = fvdemote v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vcvtpd2ps %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vcvtpd2ps %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %fvpromote_low(f32x4) -> f64x2 {
block0(v0: f32x4):
v1 = fvpromote_low v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vcvtps2pd %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vcvtps2pd %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %fcvt_to_sint_sat(f32x4) -> i32x4 {
block0(v0: f32x4):
v1 = fcvt_to_sint_sat.i32x4 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vcmpps $0 %xmm0, %xmm0, %xmm2
; vandps %xmm0, %xmm2, %xmm4
; vpxor %xmm2, %xmm4, %xmm6
; vcvttps2dq %xmm4, %xmm8
; vpand %xmm8, %xmm6, %xmm10
; vpsrad %xmm10, $31, %xmm12
; vpxor %xmm12, %xmm8, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vcmpeqps %xmm0, %xmm0, %xmm2
; vandps %xmm2, %xmm0, %xmm4
; vpxor %xmm4, %xmm2, %xmm6
; vcvttps2dq %xmm4, %xmm8
; vpand %xmm6, %xmm8, %xmm10
; vpsrad $0x1f, %xmm10, %xmm12
; vpxor %xmm8, %xmm12, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %fcvt_to_sint_sat_snarrow(f64x2) -> i32x4 {
block0(v0: f64x2):
v1 = fcvt_to_sint_sat.i64x2 v0
v2 = vconst.i64x2 0x00
v3 = snarrow v1, v2
return v3
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vcmppd $0 %xmm0, %xmm0, %xmm2
; movupd const(0), %xmm4
; vandps %xmm2, %xmm4, %xmm6
; vminpd %xmm0, %xmm6, %xmm8
; vcvttpd2dq %xmm8, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vcmpeqpd %xmm0, %xmm0, %xmm2
; movupd 0x1f(%rip), %xmm4
; vandps %xmm4, %xmm2, %xmm6
; vminpd %xmm6, %xmm0, %xmm8
; vcvttpd2dq %xmm8, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, %al

View File

@@ -1213,7 +1213,7 @@ block0(v0: i8x16):
; movq %rsp, %rbp ; movq %rsp, %rbp
; block0: ; block0:
; vpalignr $8 %xmm0, %xmm0, %xmm2 ; vpalignr $8 %xmm0, %xmm0, %xmm2
; pmovzxbw %xmm2, %xmm0 ; vpmovzxbw %xmm2, %xmm0
; movq %rbp, %rsp ; movq %rbp, %rsp
; popq %rbp ; popq %rbp
; ret ; ret
@@ -1224,7 +1224,7 @@ block0(v0: i8x16):
; movq %rsp, %rbp ; movq %rsp, %rbp
; block1: ; offset 0x4 ; block1: ; offset 0x4
; vpalignr $8, %xmm0, %xmm0, %xmm2 ; vpalignr $8, %xmm0, %xmm0, %xmm2
; pmovzxbw %xmm2, %xmm0 ; vpmovzxbw %xmm2, %xmm0
; movq %rbp, %rsp ; movq %rbp, %rsp
; popq %rbp ; popq %rbp
; retq ; retq
@@ -1359,7 +1359,7 @@ block0(v0: f64x2):
; vmaxpd %xmm0, %xmm2, %xmm4 ; vmaxpd %xmm0, %xmm2, %xmm4
; movupd const(0), %xmm6 ; movupd const(0), %xmm6
; vminpd %xmm4, %xmm6, %xmm8 ; vminpd %xmm4, %xmm6, %xmm8
; roundpd $3, %xmm8, %xmm10 ; vroundpd $3, %xmm8, %xmm10
; movupd const(1), %xmm12 ; movupd const(1), %xmm12
; vaddpd %xmm10, %xmm12, %xmm14 ; vaddpd %xmm10, %xmm12, %xmm14
; vshufps $136 %xmm14, %xmm2, %xmm0 ; vshufps $136 %xmm14, %xmm2, %xmm0
@@ -1376,8 +1376,8 @@ block0(v0: f64x2):
; vmaxpd %xmm2, %xmm0, %xmm4 ; vmaxpd %xmm2, %xmm0, %xmm4
; movupd 0x2c(%rip), %xmm6 ; movupd 0x2c(%rip), %xmm6
; vminpd %xmm6, %xmm4, %xmm8 ; vminpd %xmm6, %xmm4, %xmm8
; roundpd $3, %xmm8, %xmm10 ; vroundpd $3, %xmm8, %xmm10
; movupd 0x28(%rip), %xmm12 ; movupd 0x29(%rip), %xmm12
; vaddpd %xmm12, %xmm10, %xmm14 ; vaddpd %xmm12, %xmm10, %xmm14
; vshufps $0x88, %xmm2, %xmm14, %xmm0 ; vshufps $0x88, %xmm2, %xmm14, %xmm0
; movq %rbp, %rsp ; movq %rbp, %rsp
@@ -1388,7 +1388,8 @@ block0(v0: f64x2):
; addb %al, (%rax) ; addb %al, (%rax)
; addb %al, (%rax) ; addb %al, (%rax)
; addb %al, (%rax) ; addb %al, (%rax)
; addb %ah, %al ; addb %al, (%rax)
; loopne 0x43
function %i8x16_shl(i8x16, i32) -> i8x16 { function %i8x16_shl(i8x16, i32) -> i8x16 {
block0(v0: i8x16, v1: i32): block0(v0: i8x16, v1: i32):
@@ -1884,3 +1885,78 @@ block0(v0: i64x2):
; popq %rbp ; popq %rbp
; retq ; retq
function %i8x16_abs(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iabs v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpabsb %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpabsb %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %i16x8_abs(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iabs v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpabsw %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpabsw %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %i32x4_abs(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iabs v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpabsd %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpabsd %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

View File

@@ -0,0 +1,154 @@
test compile precise-output
set enable_simd
target x86_64 has_avx
function %sload8x8(i64) -> i16x8 {
block0(v0: i64):
v1 = sload8x8 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpmovsxbw 0(%rdi), %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpmovsxbw (%rdi), %xmm0 ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq
function %uload8x8(i64) -> i16x8 {
block0(v0: i64):
v1 = uload8x8 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpmovzxbw 0(%rdi), %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpmovzxbw (%rdi), %xmm0 ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq
function %sload16x4(i64) -> i32x4 {
block0(v0: i64):
v1 = sload16x4 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpmovsxwd 0(%rdi), %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpmovsxwd (%rdi), %xmm0 ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq
function %uload16x4(i64) -> i32x4 {
block0(v0: i64):
v1 = uload16x4 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpmovzxwd 0(%rdi), %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpmovzxwd (%rdi), %xmm0 ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq
function %sload32x2(i64) -> i64x2 {
block0(v0: i64):
v1 = sload32x2 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpmovsxdq 0(%rdi), %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpmovsxdq (%rdi), %xmm0 ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq
function %uload32x2(i64) -> i64x2 {
block0(v0: i64):
v1 = uload32x2 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpmovzxdq 0(%rdi), %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpmovzxdq (%rdi), %xmm0 ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq