aarch64: Add specialized shuffle lowerings (#5977)
* aarch64: Add `shuffle` lowerings for the `uzp{1,2}` instructions
This commit uses the same style of patterns in the x64 backend to start
adding specific lowerings of the Cranelift `shuffle` instruction to
particular AArch64 instructions.
* aarch64: Add `shuffle` lowerings to the `zip{1,2}` instructions
These instructions match the `punpck*` family of instructions on x64 and
should help provide more efficient lowerings than the current `shuffle`
fallback.
* aarch64: Add `shuffle` lowerings for `trn{1,2}`
Along the lines of prior commits adds specific patterns to lowering for
individual AArch64 instructions available.
* aarch64: Add a `shuffle` lowering for the `ext` instruction
This instruction will more-or-less concatenate two 128-bit vector
registers to create a 256-bit value, shift it right, and then take the
lower 128-bits into the destination. This can be modeled with a
`shuffle` of consecutive bytes so this adds a lowering rule to generate
this instruction.
* aarch64: Add `shuffle` special case for `dup`
This commit adds special cases for Cranelift's `shuffle` on AArch64 when
the lowering can be represented with a `dup` instruction which
broadcasts one vector's lane into all lanes of the destination.
* aarch64: Add `shuffle` specializations for `rev` instructions
This commit adds shuffle mask specializations for the `rev{16,32,64}`
family of instructions on AArch64 which can be used to reverse bytes,
16-bit values, or 32-bit values within larger values.
* Fix tests
* Add doc-comments in ISLE
This commit is contained in:
@@ -553,7 +553,8 @@
|
||||
(VecDupFromFpu
|
||||
(rd WritableReg)
|
||||
(rn Reg)
|
||||
(size VectorSize))
|
||||
(size VectorSize)
|
||||
(lane u8))
|
||||
|
||||
;; Duplicate FP immediate to vector.
|
||||
(VecDupFPImm
|
||||
@@ -1390,8 +1391,18 @@
|
||||
(Addp)
|
||||
;; Zip vectors (primary) [meaning, high halves]
|
||||
(Zip1)
|
||||
;; Zip vectors (secondary)
|
||||
(Zip2)
|
||||
;; Signed saturating rounding doubling multiply returning high half
|
||||
(Sqrdmulh)
|
||||
;; Unzip vectors (primary)
|
||||
(Uzp1)
|
||||
;; Unzip vectors (secondary)
|
||||
(Uzp2)
|
||||
;; Transpose vectors (primary)
|
||||
(Trn1)
|
||||
;; Transpose vectors (secondary)
|
||||
(Trn2)
|
||||
))
|
||||
|
||||
;; A Vector ALU operation which modifies a source register.
|
||||
@@ -1420,6 +1431,10 @@
|
||||
(Fneg)
|
||||
;; Floating-point square root
|
||||
(Fsqrt)
|
||||
;; Reverse elements in 16-bit lanes
|
||||
(Rev16)
|
||||
;; Reverse elements in 32-bit lanes
|
||||
(Rev32)
|
||||
;; Reverse elements in 64-bit doublewords
|
||||
(Rev64)
|
||||
;; Floating-point convert to signed integer, rounding toward zero
|
||||
@@ -1887,10 +1902,10 @@
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecDupFromFpu` instructions.
|
||||
(decl vec_dup_from_fpu (Reg VectorSize) Reg)
|
||||
(rule (vec_dup_from_fpu src size)
|
||||
(decl vec_dup_from_fpu (Reg VectorSize u8) Reg)
|
||||
(rule (vec_dup_from_fpu src size lane)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.VecDupFromFpu dst src size))))
|
||||
(_ Unit (emit (MInst.VecDupFromFpu dst src size lane))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.AluRRImm12` instructions.
|
||||
@@ -2386,6 +2401,14 @@
|
||||
(decl neg (Reg VectorSize) Reg)
|
||||
(rule (neg x size) (vec_misc (VecMisc2.Neg) x size))
|
||||
|
||||
;; Helper for generating `rev16` instructions.
|
||||
(decl rev16 (Reg VectorSize) Reg)
|
||||
(rule (rev16 x size) (vec_misc (VecMisc2.Rev16) x size))
|
||||
|
||||
;; Helper for generating `rev32` instructions.
|
||||
(decl rev32 (Reg VectorSize) Reg)
|
||||
(rule (rev32 x size) (vec_misc (VecMisc2.Rev32) x size))
|
||||
|
||||
;; Helper for generating `rev64` instructions.
|
||||
(decl rev64 (Reg VectorSize) Reg)
|
||||
(rule (rev64 x size) (vec_misc (VecMisc2.Rev64) x size))
|
||||
@@ -3767,3 +3790,27 @@
|
||||
(emit_side_effect (with_flags_side_effect
|
||||
(cmp (OperandSize.Size32) ridx jt_size)
|
||||
(jt_sequence ridx jt_info)))))
|
||||
|
||||
;; Helper for emitting the `uzp1` instruction
|
||||
(decl vec_uzp1 (Reg Reg VectorSize) Reg)
|
||||
(rule (vec_uzp1 rn rm size) (vec_rrr (VecALUOp.Uzp1) rn rm size))
|
||||
|
||||
;; Helper for emitting the `uzp2` instruction
|
||||
(decl vec_uzp2 (Reg Reg VectorSize) Reg)
|
||||
(rule (vec_uzp2 rn rm size) (vec_rrr (VecALUOp.Uzp2) rn rm size))
|
||||
|
||||
;; Helper for emitting the `zip1` instruction
|
||||
(decl vec_zip1 (Reg Reg VectorSize) Reg)
|
||||
(rule (vec_zip1 rn rm size) (vec_rrr (VecALUOp.Zip1) rn rm size))
|
||||
|
||||
;; Helper for emitting the `zip2` instruction
|
||||
(decl vec_zip2 (Reg Reg VectorSize) Reg)
|
||||
(rule (vec_zip2 rn rm size) (vec_rrr (VecALUOp.Zip2) rn rm size))
|
||||
|
||||
;; Helper for emitting the `trn1` instruction
|
||||
(decl vec_trn1 (Reg Reg VectorSize) Reg)
|
||||
(rule (vec_trn1 rn rm size) (vec_rrr (VecALUOp.Trn1) rn rm size))
|
||||
|
||||
;; Helper for emitting the `trn2` instruction
|
||||
(decl vec_trn2 (Reg Reg VectorSize) Reg)
|
||||
(rule (vec_trn2 rn rm size) (vec_rrr (VecALUOp.Trn2) rn rm size))
|
||||
|
||||
Reference in New Issue
Block a user