x64: Add shuffle cases for punpck{h,l}bw (#5905)
* x64: Add `shuffle` cases for `punpck{h,l}bw`
I noticed this difference between LLVM and Cranelift for something I was
looking at recently, and while it's probably not all that common I
figured I'd add it here since it should be somewhat useful nevertheless.
* Review feedback
* Use u128 extractor instead
This commit is contained in:
@@ -3510,6 +3510,16 @@
|
|||||||
|
|
||||||
;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; Special case for the `punpckhbw` instruction which interleaves the upper
|
||||||
|
;; lanes of the two input registers.
|
||||||
|
(rule 4 (lower (shuffle a b (u128_from_immediate 0x1f0f_1e0e_1d0d_1c0c_1b0b_1a0a_1909_1808)))
|
||||||
|
(x64_punpckhbw a b))
|
||||||
|
|
||||||
|
;; Special case for the `punpcklbw` instruction which interleaves the lower
|
||||||
|
;; lanes of the two input registers.
|
||||||
|
(rule 4 (lower (shuffle a b (u128_from_immediate 0x1707_1606_1505_1404_1303_1202_1101_1000)))
|
||||||
|
(x64_punpcklbw a b))
|
||||||
|
|
||||||
;; If `lhs` and `rhs` are the same we can use a single PSHUFB to shuffle the XMM
|
;; If `lhs` and `rhs` are the same we can use a single PSHUFB to shuffle the XMM
|
||||||
;; register. We statically build `constructed_mask` to zero out any unknown lane
|
;; register. We statically build `constructed_mask` to zero out any unknown lane
|
||||||
;; indices (may not be completely necessary: verification could fail incorrect
|
;; indices (may not be completely necessary: verification could fail incorrect
|
||||||
|
|||||||
54
cranelift/filetests/filetests/isa/x64/shuffle.clif
Normal file
54
cranelift/filetests/filetests/isa/x64/shuffle.clif
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
test compile precise-output
|
||||||
|
set enable_simd
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
function %punpcklbw(i8x16, i8x16) -> i8x16 {
|
||||||
|
block0(v0: i8x16, v1: i8x16):
|
||||||
|
v2 = shuffle v0, v1, 0x17071606150514041303120211011000
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; punpcklbw %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; punpcklbw %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
|
function %punpckhbw(i8x16, i8x16) -> i8x16 {
|
||||||
|
block0(v0: i8x16, v1: i8x16):
|
||||||
|
v2 = shuffle v0, v1, 0x1f0f1e0e1d0d1c0c1b0b1a0a19091808
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; VCode:
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; punpckhbw %xmm0, %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
;
|
||||||
|
; Disassembled:
|
||||||
|
; block0: ; offset 0x0
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block1: ; offset 0x4
|
||||||
|
; punpckhbw %xmm1, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; retq
|
||||||
|
|
||||||
Reference in New Issue
Block a user