x64: Lower shuffle and swizzle in ISLE (#4772)
Lower `shuffle` and `swizzle` in ISLE.
This PR surfaced a bug with the lowering of `shuffle` when avx512vl and avx512vbmi are enabled: we use `vpermi2b` as the implementation, but panic if the immediate shuffle mask contains any out-of-bounds values. The behavior when the avx512 extensions are not present is that out-of-bounds values are turned into `0` in the result.
I've resolved this by detecting when the shuffle immediate has out-of-bounds indices in the avx512-enabled lowering, and generating an additional mask to zero out the lanes where those indices occur. This brings the avx512 case into line with the semantics of the `shuffle` op: 94bcbe8446/cranelift/codegen/meta/src/shared/instructions.rs (L1495-L1498)
This commit is contained in:
@@ -15,13 +15,13 @@ block0:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; load_const VCodeConstant(3), %xmm6
|
||||
; load_const VCodeConstant(2), %xmm0
|
||||
; load_const VCodeConstant(0), %xmm7
|
||||
; pshufb %xmm6, %xmm7, %xmm6
|
||||
; load_const VCodeConstant(1), %xmm10
|
||||
; pshufb %xmm0, %xmm10, %xmm0
|
||||
; orps %xmm0, %xmm6, %xmm0
|
||||
; load_const VCodeConstant(3), %xmm0
|
||||
; load_const VCodeConstant(2), %xmm5
|
||||
; load_const VCodeConstant(0), %xmm3
|
||||
; pshufb %xmm0, %xmm3, %xmm0
|
||||
; load_const VCodeConstant(1), %xmm7
|
||||
; pshufb %xmm5, %xmm7, %xmm5
|
||||
; por %xmm0, %xmm5, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
@@ -37,8 +37,8 @@ block0:
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; load_const VCodeConstant(1), %xmm0
|
||||
; load_const VCodeConstant(0), %xmm4
|
||||
; pshufb %xmm0, %xmm4, %xmm0
|
||||
; load_const VCodeConstant(0), %xmm2
|
||||
; pshufb %xmm0, %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
@@ -55,10 +55,10 @@ block0:
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; load_const VCodeConstant(1), %xmm0
|
||||
; load_const VCodeConstant(1), %xmm5
|
||||
; load_const VCodeConstant(0), %xmm6
|
||||
; paddusb %xmm5, %xmm6, %xmm5
|
||||
; pshufb %xmm0, %xmm5, %xmm0
|
||||
; load_const VCodeConstant(1), %xmm3
|
||||
; load_const VCodeConstant(0), %xmm4
|
||||
; paddusb %xmm3, %xmm4, %xmm3
|
||||
; pshufb %xmm0, %xmm3, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
Reference in New Issue
Block a user