x64: Add shuffle cases for punpck{h,l}bw (#5905)

* x64: Add `shuffle` cases for `punpck{h,l}bw`

I noticed this difference between LLVM and Cranelift for something I was
looking at recently, and while it's probably not all that common I
figured I'd add it here since it should be somewhat useful nevertheless.

* Review feedback

* Use u128 extractor instead
This commit is contained in:
Alex Crichton
2023-03-01 15:49:00 -06:00
committed by GitHub
parent 6f6fcfa437
commit f05babc744
2 changed files with 64 additions and 0 deletions

View File

@@ -0,0 +1,54 @@
test compile precise-output
set enable_simd
target x86_64
function %punpcklbw(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle v0, v1, 0x17071606150514041303120211011000
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; punpcklbw %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; punpcklbw %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %punpckhbw(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle v0, v1, 0x1f0f1e0e1d0d1c0c1b0b1a0a19091808
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; punpckhbw %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; punpckhbw %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq