With this change we now reuse tests across multiple arches. Duplicate tests were merged into the same file where possible. Some legacy x86 tests were left in separate files due to incompatibilities with the rest of the test suite.
222 lines
5.2 KiB
Plaintext
222 lines
5.2 KiB
Plaintext
test run
|
|
set enable_simd
|
|
target x86_64 legacy
|
|
|
|
function %shuffle_different_ssa_values() -> b1 {
|
|
block0:
|
|
v0 = vconst.i8x16 0x00
|
|
v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42]
|
|
v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31] ; use the first lane of v0 throughout except use the last lane of v1
|
|
v3 = extractlane.i8x16 v2, 15
|
|
v4 = iconst.i8 42
|
|
v5 = icmp eq v3, v4
|
|
return v5
|
|
}
|
|
; run
|
|
|
|
function %shuffle_same_ssa_value() -> b1 {
|
|
block0:
|
|
v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000 ; note where lane 15 is when written with hexadecimal syntax
|
|
v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f ; use the last lane of v0 to fill all lanes
|
|
v2 = extractlane.i8x16 v1, 4
|
|
v3 = iconst.i8 0x01
|
|
v4 = icmp eq v2, v3
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %compare_shuffle() -> b1 {
|
|
block0:
|
|
v1 = vconst.i32x4 [0 1 2 3]
|
|
v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
|
|
; keep each lane in place from the first vector
|
|
v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
|
v4 = raw_bitcast.i32x4 v3
|
|
v5 = extractlane.i32x4 v4, 3
|
|
v6 = icmp_imm eq v5, 3
|
|
v7 = extractlane.i32x4 v4, 0
|
|
v8 = icmp_imm eq v7, 0
|
|
v9 = band v6, v8
|
|
return v9
|
|
}
|
|
; run
|
|
|
|
function %compare_shuffle() -> b32 {
|
|
block0:
|
|
v1 = vconst.b32x4 [true false true false]
|
|
v2 = raw_bitcast.b8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
|
|
; pair up the true values to make the entire vector true
|
|
v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11]
|
|
v4 = raw_bitcast.b32x4 v3
|
|
v5 = extractlane v4, 3
|
|
v6 = extractlane v4, 0
|
|
v7 = band v5, v6
|
|
return v7
|
|
}
|
|
; run
|
|
|
|
; TODO once SIMD vector comparison is implemented, remove use of extractlane below
|
|
|
|
function %insertlane_b8() -> b8 {
|
|
block0:
|
|
v1 = bconst.b8 true
|
|
v2 = vconst.b8x16 [false false false false false false false false false false false false false
|
|
false false false]
|
|
v3 = insertlane v2, v1, 10
|
|
v4 = extractlane v3, 10
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %insertlane_f32() -> b1 {
|
|
block0:
|
|
v0 = f32const 0x42.42
|
|
v1 = vconst.f32x4 0x00
|
|
v2 = insertlane v1, v0, 1
|
|
v3 = extractlane v2, 1
|
|
v4 = fcmp eq v3, v0
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %insertlane_f64_lane1() -> b1 {
|
|
block0:
|
|
v0 = f64const 0x42.42
|
|
v1 = vconst.f64x2 0x00
|
|
v2 = insertlane v1, v0, 1
|
|
v3 = extractlane v2, 1
|
|
v4 = fcmp eq v3, v0
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %insertlane_f64_lane0() -> b1 {
|
|
block0:
|
|
v0 = f64const 0x42.42
|
|
v1 = vconst.f64x2 0x00
|
|
v2 = insertlane v1, v0, 0
|
|
v3 = extractlane v2, 0
|
|
v4 = fcmp eq v3, v0
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %extractlane_b8() -> b8 {
|
|
block0:
|
|
v1 = vconst.b8x16 [false false false false false false false false false false true false false
|
|
false false false]
|
|
v2 = extractlane v1, 10
|
|
return v2
|
|
}
|
|
; run
|
|
|
|
function %extractlane_i16() -> b1 {
|
|
block0:
|
|
v0 = vconst.i16x8 0x00080007000600050004000300020001
|
|
v1 = extractlane v0, 1
|
|
v2 = icmp_imm eq v1, 2
|
|
return v2
|
|
}
|
|
; run
|
|
|
|
function %extractlane_f32() -> b1 {
|
|
block0:
|
|
v0 = f32const 0x42.42
|
|
v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42]
|
|
v2 = extractlane v1, 3
|
|
v3 = fcmp eq v2, v0
|
|
return v3
|
|
}
|
|
; run
|
|
|
|
function %extractlane_i32_with_vector_reuse() -> b1 {
|
|
block0:
|
|
v0 = iconst.i32 42
|
|
v1 = iconst.i32 99
|
|
|
|
v2 = splat.i32x4 v0
|
|
v3 = insertlane v2, v1, 2
|
|
|
|
v4 = extractlane v3, 3
|
|
v5 = icmp eq v4, v0
|
|
|
|
v6 = extractlane v3, 2
|
|
v7 = icmp eq v6, v1
|
|
|
|
v8 = band v5, v7
|
|
return v8
|
|
}
|
|
; run
|
|
|
|
function %extractlane_f32_with_vector_reuse() -> b1 {
|
|
block0:
|
|
v0 = f32const 0x42.42
|
|
v1 = f32const 0x99.99
|
|
|
|
v2 = splat.f32x4 v0
|
|
v3 = insertlane v2, v1, 2
|
|
|
|
v4 = extractlane v3, 3
|
|
v5 = fcmp eq v4, v0
|
|
|
|
v6 = extractlane v3, 2
|
|
v7 = fcmp eq v6, v1
|
|
|
|
v8 = band v5, v7
|
|
return v8
|
|
}
|
|
; run
|
|
|
|
function %swizzle() -> b1 {
|
|
block0:
|
|
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
|
v1 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 42]
|
|
v2 = swizzle.i8x16 v0, v1 ; reverse the lanes, with over-large index 42 using lane 0
|
|
|
|
v3 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
|
|
v4 = icmp eq v2, v3
|
|
v5 = vall_true v4
|
|
return v5
|
|
}
|
|
; run
|
|
|
|
function %swizzle_with_overflow() -> b1 {
|
|
block0:
|
|
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
|
v1 = vconst.i8x16 [16 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
|
v2 = swizzle.i8x16 v0, v1 ; 250 should overflow but saturate so that the MSB is set (PSHUFB uses this to shuffle from lane 0)
|
|
|
|
v3 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
|
v4 = icmp eq v2, v3
|
|
v5 = vall_true v4
|
|
return v5
|
|
}
|
|
; run
|
|
|
|
function %unpack_low() -> b1 {
|
|
block0:
|
|
v0 = vconst.i32x4 [0 1 2 3]
|
|
v1 = vconst.i32x4 [4 5 6 7]
|
|
v2 = x86_punpckl v0, v1
|
|
|
|
v3 = vconst.i32x4 [0 4 1 5]
|
|
v4 = icmp eq v2, v3
|
|
v5 = vall_true v4
|
|
return v5
|
|
}
|
|
; run
|
|
|
|
function %snarrow(i32x4, i32x4) -> i16x8 {
|
|
block0(v0: i32x4, v1: i32x4):
|
|
v2 = snarrow v0, v1
|
|
return v2
|
|
}
|
|
; run: %snarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 -1 0x7fff 4 5 -6 0xffff]
|
|
|
|
function %unarrow(i32x4, i32x4) -> i16x8 {
|
|
block0(v0: i32x4, v1: i32x4):
|
|
v2 = unarrow v0, v1
|
|
return v2
|
|
}
|
|
; run: %unarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 0 0xffff 4 5 0 0]
|