* x64: Add non-SSE4.1 lowerings of `pmov{s,z}x*`
This commit adds lowerings for a suite of sign/zero extension
instructions which don't require SSE4.1. Like before these lowerings are
based on LLVM's output.
This commit also deletes special casees for `i16x8.extmul_{low,high}_*`
since the output of the special case is the same as the default lowering
of all the component instructions used within as well.
* Remove SSE4.1 specialization of `uwiden_high`
LLVM prefers the `punpckh*`-based lowerings and at least according to
`llvm-mca` these are slightly better cycle-wise too.
31 lines
683 B
Plaintext
31 lines
683 B
Plaintext
test interpret
|
|
test run
|
|
target aarch64
|
|
target s390x
|
|
target x86_64 ssse3 has_sse41=false
|
|
set enable_simd
|
|
target x86_64
|
|
target x86_64 sse41
|
|
target x86_64 sse41 has_avx
|
|
|
|
function %swidenlow_i8x16(i8x16) -> i16x8 {
|
|
block0(v0: i8x16):
|
|
v1 = swiden_low v0
|
|
return v1
|
|
}
|
|
; run: %swidenlow_i8x16([1 -2 3 -4 5 -6 7 -8 9 -10 11 -12 13 -14 15 -16]) == [1 -2 3 -4 5 -6 7 -8]
|
|
|
|
function %swidenlow_i16x8(i16x8) -> i32x4 {
|
|
block0(v0: i16x8):
|
|
v1 = swiden_low v0
|
|
return v1
|
|
}
|
|
; run: %swidenlow_i16x8([1 -2 3 -4 5 -6 7 -8]) == [1 -2 3 -4]
|
|
|
|
function %swidenlow_i32x4(i32x4) -> i64x2 {
|
|
block0(v0: i32x4):
|
|
v1 = swiden_low v0
|
|
return v1
|
|
}
|
|
; run: %swidenlow_i32x4([1 -2 3 -4]) == [1 -2]
|