Add x86 implementation of shuffle
This commit is contained in:
31
cranelift/filetests/filetests/isa/x86/shuffle-legalize.clif
Normal file
31
cranelift/filetests/filetests/isa/x86/shuffle-legalize.clif
Normal file
@@ -0,0 +1,31 @@
|
||||
test legalizer
|
||||
set enable_simd
|
||||
target x86_64 skylake
|
||||
|
||||
function %test_shuffle_different_ssa_values() -> i8x16 {
|
||||
ebb0:
|
||||
v0 = vconst.i8x16 0x00
|
||||
v1 = vconst.i8x16 0x01
|
||||
v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: v1 = vconst.i8x16 0x01
|
||||
; nextln: v3 = vconst.i8x16 0x80000000000000000000000000000000
|
||||
; nextln: v4 = x86_pshufb v0, v3
|
||||
; nextln: v5 = vconst.i8x16 0x01808080808080808080808080808080
|
||||
; nextln: v6 = x86_pshufb v1, v5
|
||||
; nextln: v2 = bor v4, v6
|
||||
|
||||
|
||||
|
||||
function %test_shuffle_same_ssa_value() -> i8x16 {
|
||||
ebb0:
|
||||
v1 = vconst.i8x16 0x01
|
||||
v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: v1 = vconst.i8x16 0x01
|
||||
; nextln: v3 = vconst.i8x16 0x03000000000000000000000000000000
|
||||
; nextln: v2 = x86_pshufb v1, v3
|
||||
44
cranelift/filetests/filetests/isa/x86/shuffle-run.clif
Normal file
44
cranelift/filetests/filetests/isa/x86/shuffle-run.clif
Normal file
@@ -0,0 +1,44 @@
|
||||
test run
|
||||
set enable_simd
|
||||
|
||||
function %test_shuffle_different_ssa_values() -> b1 {
|
||||
ebb0:
|
||||
v0 = vconst.i8x16 0x00
|
||||
v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42]
|
||||
v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31] ; use the first lane of v0 throughout except use the last lane of v1
|
||||
v3 = extractlane.i8x16 v2, 15
|
||||
v4 = iconst.i8 42
|
||||
v5 = icmp eq v3, v4
|
||||
return v5
|
||||
}
|
||||
|
||||
; run
|
||||
|
||||
function %test_shuffle_same_ssa_value() -> b1 {
|
||||
ebb0:
|
||||
v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000 ; note where lane 15 is when written with hexadecimal syntax
|
||||
v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f ; use the last lane of v0 to fill all lanes
|
||||
v2 = extractlane.i8x16 v1, 4
|
||||
v3 = iconst.i8 0x01
|
||||
v4 = icmp eq v2, v3
|
||||
return v4
|
||||
}
|
||||
|
||||
; run
|
||||
|
||||
function %compare_shuffle() -> b1 {
|
||||
ebb0:
|
||||
v1 = vconst.i32x4 [0 1 2 3]
|
||||
v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
|
||||
; keep each lane in place from the first vector
|
||||
v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
||||
v4 = raw_bitcast.i32x4 v3
|
||||
v5 = extractlane.i32x4 v4, 3
|
||||
v6 = icmp_imm eq v5, 3
|
||||
v7 = extractlane.i32x4 v4, 0
|
||||
v8 = icmp_imm eq v7, 0
|
||||
v9 = band v6, v8
|
||||
return v9
|
||||
}
|
||||
|
||||
; run
|
||||
@@ -1,6 +1,5 @@
|
||||
test rodata
|
||||
set enable_simd=true
|
||||
set probestack_enabled=false
|
||||
target x86_64 haswell
|
||||
|
||||
function %test_vconst_i32() -> i32x4 {
|
||||
|
||||
Reference in New Issue
Block a user