Files
wasmtime/cranelift/filetests/filetests/runtests/simd-lane-access-legacy.clif
Afonso Bordado 214755c6a0 cranelift: Merge all run tests into runtests dir
With this change we now reuse tests across multiple arches.

Duplicate tests were merged into the same file where possible.
Some legacy x86 tests were left in separate files due to incompatibilities with the rest of the test suite.
2021-06-07 14:44:11 +01:00

222 lines
5.2 KiB
Plaintext

test run
set enable_simd
target x86_64 legacy
function %shuffle_different_ssa_values() -> b1 {
block0:
v0 = vconst.i8x16 0x00
v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42]
v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31] ; use the first lane of v0 throughout except use the last lane of v1
v3 = extractlane.i8x16 v2, 15
v4 = iconst.i8 42
v5 = icmp eq v3, v4
return v5
}
; run
function %shuffle_same_ssa_value() -> b1 {
block0:
v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000 ; note where lane 15 is when written with hexadecimal syntax
v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f ; use the last lane of v0 to fill all lanes
v2 = extractlane.i8x16 v1, 4
v3 = iconst.i8 0x01
v4 = icmp eq v2, v3
return v4
}
; run
function %compare_shuffle() -> b1 {
block0:
v1 = vconst.i32x4 [0 1 2 3]
v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
; keep each lane in place from the first vector
v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
v4 = raw_bitcast.i32x4 v3
v5 = extractlane.i32x4 v4, 3
v6 = icmp_imm eq v5, 3
v7 = extractlane.i32x4 v4, 0
v8 = icmp_imm eq v7, 0
v9 = band v6, v8
return v9
}
; run
function %compare_shuffle() -> b32 {
block0:
v1 = vconst.b32x4 [true false true false]
v2 = raw_bitcast.b8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
; pair up the true values to make the entire vector true
v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11]
v4 = raw_bitcast.b32x4 v3
v5 = extractlane v4, 3
v6 = extractlane v4, 0
v7 = band v5, v6
return v7
}
; run
; TODO once SIMD vector comparison is implemented, remove use of extractlane below
function %insertlane_b8() -> b8 {
block0:
v1 = bconst.b8 true
v2 = vconst.b8x16 [false false false false false false false false false false false false false
false false false]
v3 = insertlane v2, v1, 10
v4 = extractlane v3, 10
return v4
}
; run
function %insertlane_f32() -> b1 {
block0:
v0 = f32const 0x42.42
v1 = vconst.f32x4 0x00
v2 = insertlane v1, v0, 1
v3 = extractlane v2, 1
v4 = fcmp eq v3, v0
return v4
}
; run
function %insertlane_f64_lane1() -> b1 {
block0:
v0 = f64const 0x42.42
v1 = vconst.f64x2 0x00
v2 = insertlane v1, v0, 1
v3 = extractlane v2, 1
v4 = fcmp eq v3, v0
return v4
}
; run
function %insertlane_f64_lane0() -> b1 {
block0:
v0 = f64const 0x42.42
v1 = vconst.f64x2 0x00
v2 = insertlane v1, v0, 0
v3 = extractlane v2, 0
v4 = fcmp eq v3, v0
return v4
}
; run
function %extractlane_b8() -> b8 {
block0:
v1 = vconst.b8x16 [false false false false false false false false false false true false false
false false false]
v2 = extractlane v1, 10
return v2
}
; run
function %extractlane_i16() -> b1 {
block0:
v0 = vconst.i16x8 0x00080007000600050004000300020001
v1 = extractlane v0, 1
v2 = icmp_imm eq v1, 2
return v2
}
; run
function %extractlane_f32() -> b1 {
block0:
v0 = f32const 0x42.42
v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42]
v2 = extractlane v1, 3
v3 = fcmp eq v2, v0
return v3
}
; run
function %extractlane_i32_with_vector_reuse() -> b1 {
block0:
v0 = iconst.i32 42
v1 = iconst.i32 99
v2 = splat.i32x4 v0
v3 = insertlane v2, v1, 2
v4 = extractlane v3, 3
v5 = icmp eq v4, v0
v6 = extractlane v3, 2
v7 = icmp eq v6, v1
v8 = band v5, v7
return v8
}
; run
function %extractlane_f32_with_vector_reuse() -> b1 {
block0:
v0 = f32const 0x42.42
v1 = f32const 0x99.99
v2 = splat.f32x4 v0
v3 = insertlane v2, v1, 2
v4 = extractlane v3, 3
v5 = fcmp eq v4, v0
v6 = extractlane v3, 2
v7 = fcmp eq v6, v1
v8 = band v5, v7
return v8
}
; run
function %swizzle() -> b1 {
block0:
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
v1 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 42]
v2 = swizzle.i8x16 v0, v1 ; reverse the lanes, with over-large index 42 using lane 0
v3 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run
function %swizzle_with_overflow() -> b1 {
block0:
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
v1 = vconst.i8x16 [16 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
v2 = swizzle.i8x16 v0, v1 ; 250 should overflow but saturate so that the MSB is set (PSHUFB uses this to shuffle from lane 0)
v3 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run
function %unpack_low() -> b1 {
block0:
v0 = vconst.i32x4 [0 1 2 3]
v1 = vconst.i32x4 [4 5 6 7]
v2 = x86_punpckl v0, v1
v3 = vconst.i32x4 [0 4 1 5]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run
function %snarrow(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = snarrow v0, v1
return v2
}
; run: %snarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 -1 0x7fff 4 5 -6 0xffff]
function %unarrow(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = unarrow v0, v1
return v2
}
; run: %unarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 0 0xffff 4 5 0 0]