194 lines
4.6 KiB
Plaintext
194 lines
4.6 KiB
Plaintext
test run
|
|
set enable_simd
|
|
|
|
function %shuffle_different_ssa_values() -> b1 {
|
|
block0:
|
|
v0 = vconst.i8x16 0x00
|
|
v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42]
|
|
v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31] ; use the first lane of v0 throughout except use the last lane of v1
|
|
v3 = extractlane.i8x16 v2, 15
|
|
v4 = iconst.i8 42
|
|
v5 = icmp eq v3, v4
|
|
return v5
|
|
}
|
|
; run
|
|
|
|
function %shuffle_same_ssa_value() -> b1 {
|
|
block0:
|
|
v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000 ; note where lane 15 is when written with hexadecimal syntax
|
|
v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f ; use the last lane of v0 to fill all lanes
|
|
v2 = extractlane.i8x16 v1, 4
|
|
v3 = iconst.i8 0x01
|
|
v4 = icmp eq v2, v3
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %compare_shuffle() -> b1 {
|
|
block0:
|
|
v1 = vconst.i32x4 [0 1 2 3]
|
|
v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
|
|
; keep each lane in place from the first vector
|
|
v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
|
v4 = raw_bitcast.i32x4 v3
|
|
v5 = extractlane.i32x4 v4, 3
|
|
v6 = icmp_imm eq v5, 3
|
|
v7 = extractlane.i32x4 v4, 0
|
|
v8 = icmp_imm eq v7, 0
|
|
v9 = band v6, v8
|
|
return v9
|
|
}
|
|
; run
|
|
|
|
function %compare_shuffle() -> b32 {
|
|
block0:
|
|
v1 = vconst.b32x4 [true false true false]
|
|
v2 = raw_bitcast.b8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
|
|
; pair up the true values to make the entire vector true
|
|
v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11]
|
|
v4 = raw_bitcast.b32x4 v3
|
|
v5 = extractlane v4, 3
|
|
v6 = extractlane v4, 0
|
|
v7 = band v5, v6
|
|
return v7
|
|
}
|
|
; run
|
|
|
|
; TODO once SIMD vector comparison is implemented, remove use of extractlane below
|
|
|
|
function %insertlane_b8() -> b8 {
|
|
block0:
|
|
v1 = bconst.b8 true
|
|
v2 = vconst.b8x16 [false false false false false false false false false false false false false
|
|
false false false]
|
|
v3 = insertlane v2, 10, v1
|
|
v4 = extractlane v3, 10
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %insertlane_f32() -> b1 {
|
|
block0:
|
|
v0 = f32const 0x42.42
|
|
v1 = vconst.f32x4 0x00
|
|
v2 = insertlane v1, 1, v0
|
|
v3 = extractlane v2, 1
|
|
v4 = fcmp eq v3, v0
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %insertlane_f64_lane1() -> b1 {
|
|
block0:
|
|
v0 = f64const 0x42.42
|
|
v1 = vconst.f64x2 0x00
|
|
v2 = insertlane v1, 1, v0
|
|
v3 = extractlane v2, 1
|
|
v4 = fcmp eq v3, v0
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %insertlane_f64_lane0() -> b1 {
|
|
block0:
|
|
v0 = f64const 0x42.42
|
|
v1 = vconst.f64x2 0x00
|
|
v2 = insertlane v1, 0, v0
|
|
v3 = extractlane v2, 0
|
|
v4 = fcmp eq v3, v0
|
|
return v4
|
|
}
|
|
; run
|
|
|
|
function %extractlane_b8() -> b8 {
|
|
block0:
|
|
v1 = vconst.b8x16 [false false false false false false false false false false true false false
|
|
false false false]
|
|
v2 = extractlane v1, 10
|
|
return v2
|
|
}
|
|
; run
|
|
|
|
function %extractlane_i16() -> b1 {
|
|
block0:
|
|
v0 = vconst.i16x8 0x00080007000600050004000300020001
|
|
v1 = extractlane v0, 1
|
|
v2 = icmp_imm eq v1, 2
|
|
return v2
|
|
}
|
|
; run
|
|
|
|
function %extractlane_f32() -> b1 {
|
|
block0:
|
|
v0 = f32const 0x42.42
|
|
v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42]
|
|
v2 = extractlane v1, 3
|
|
v3 = fcmp eq v2, v0
|
|
return v3
|
|
}
|
|
; run
|
|
|
|
function %extractlane_i32_with_vector_reuse() -> b1 {
|
|
block0:
|
|
v0 = iconst.i32 42
|
|
v1 = iconst.i32 99
|
|
|
|
v2 = splat.i32x4 v0
|
|
v3 = insertlane v2, 2, v1
|
|
|
|
v4 = extractlane v3, 3
|
|
v5 = icmp eq v4, v0
|
|
|
|
v6 = extractlane v3, 2
|
|
v7 = icmp eq v6, v1
|
|
|
|
v8 = band v5, v7
|
|
return v8
|
|
}
|
|
; run
|
|
|
|
function %extractlane_f32_with_vector_reuse() -> b1 {
|
|
block0:
|
|
v0 = f32const 0x42.42
|
|
v1 = f32const 0x99.99
|
|
|
|
v2 = splat.f32x4 v0
|
|
v3 = insertlane v2, 2, v1
|
|
|
|
v4 = extractlane v3, 3
|
|
v5 = fcmp eq v4, v0
|
|
|
|
v6 = extractlane v3, 2
|
|
v7 = fcmp eq v6, v1
|
|
|
|
v8 = band v5, v7
|
|
return v8
|
|
}
|
|
; run
|
|
|
|
function %swizzle() -> b1 {
|
|
block0:
|
|
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
|
v1 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 42]
|
|
v2 = swizzle.i8x16 v0, v1 ; reverse the lanes, with over-large index 42 using lane 0
|
|
|
|
v3 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
|
|
v4 = icmp eq v2, v3
|
|
v5 = vall_true v4
|
|
return v5
|
|
}
|
|
; run
|
|
|
|
function %swizzle_with_overflow() -> b1 {
|
|
block0:
|
|
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
|
|
v1 = vconst.i8x16 [16 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
|
v2 = swizzle.i8x16 v0, v1 ; 250 should overflow but saturate so that the MSB is set (PSHUFB uses this to shuffle from lane 0)
|
|
|
|
v3 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
|
v4 = icmp eq v2, v3
|
|
v5 = vall_true v4
|
|
return v5
|
|
}
|
|
; run
|