wasmtime/cranelift/filetests/filetests/runtests/simd-bitwise-run.clif

test run
set enable_simd
target x86_64 legacy skylake

; TODO: once available, replace all lane extraction with `icmp + all_ones`

function %ishl_i32x4() -> b1 {
block0:
    v0 = iconst.i32 1
    v1 = vconst.i32x4 [1 2 4 8]
    v2 = ishl v1, v0

    v3 = extractlane v2, 0
    v4 = icmp_imm eq v3, 2

    v5 = extractlane v2, 3
    v6 = icmp_imm eq v5, 16

    v7 = band v4, v6
    return v7
}
; run

function %ishl_too_large_i16x8() -> b1 {
block0:
    v0 = iconst.i32 17 ; note that this will shift off the end of each lane
    v1 = vconst.i16x8 [1 2 4 8 16 32 64 128]
    v2 = ishl v1, v0

    v3 = extractlane v2, 0
    v4 = icmp_imm eq v3, 0

    v5 = extractlane v2, 3
    v6 = icmp_imm eq v5, 0

    v7 = band v4, v6
    return v7
}
; run

function %ushr_i8x16() -> b1 {
block0:
    v0 = iconst.i32 1
    v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
    v2 = ushr v1, v0

    v3 = vconst.i8x16 [0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7]
    v4 = icmp eq v2, v3
    v5 = vall_true v4
    return v5
}
; run

function %sshr_i8x16() -> b1 {
block0:
    v0 = iconst.i32 1
    v1 = vconst.i8x16 [0 0xff 2 0xfd 4 0xfb 6 0xf9 8 0xf7 10 0xf5 12 0xf3 14 0xf1]
    v2 = sshr v1, v0

    v3 = vconst.i8x16 [0 0xff 1 0xfe 2 0xfd 3 0xfc 4 0xfb 5 0xfa 6 0xf9 7 0xf8]
    v4 = icmp eq v2, v3
    v5 = vall_true v4
    return v5
}
; run

function %ishl_i8x16() -> b1 {
block0:
    v0 = iconst.i32 1
    v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
    v2 = ishl v1, v0

    v3 = vconst.i8x16 [0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30]
    v4 = icmp eq v2, v3
    v5 = vall_true v4
    return v5
}
; run

function %ushr_i64x2() -> b1 {
block0:
    v0 = iconst.i32 1
    v1 = vconst.i64x2 [1 2]
    v2 = ushr v1, v0

    v3 = extractlane v2, 0
    v4 = icmp_imm eq v3, 0

    v5 = extractlane v2, 1
    v6 = icmp_imm eq v5, 1

    v7 = band v4, v6
    return v7
}
; run

function %ushr_too_large_i32x4() -> b1 {
block0:
    v0 = iconst.i32 33 ; note that this will shift off the end of each lane
    v1 = vconst.i32x4 [1 2 4 8]
    v2 = ushr v1, v0

    v3 = extractlane v2, 0
    v4 = icmp_imm eq v3, 0

    v5 = extractlane v2, 3
    v6 = icmp_imm eq v5, 0

    v7 = band v4, v6
    return v7
}
; run

function %sshr_i16x8() -> b1 {
block0:
    v0 = iconst.i32 1
    v1 = vconst.i16x8 [-1 2 4 8 -16 32 64 128]
    v2 = sshr v1, v0

    v3 = extractlane v2, 0
    v4 = icmp_imm eq v3, 0xffff ; because of the shifted-in sign-bit, this remains 0xffff == -1

    v5 = extractlane v2, 4
    v6 = icmp_imm eq v5, 0xfff8 ; -16 has been shifted to -8 == 0xfff8

    v7 = band v4, v6
    return v7
}
; run

function %sshr_too_large_i32x4() -> b1 {
block0:
    v0 = iconst.i32 33 ; note that this will shift off the end of each lane
    v1 = vconst.i32x4 [1 2 4 -8]
    v2 = sshr v1, v0

    v3 = extractlane v2, 0
    v4 = icmp_imm eq v3, 0

    v5 = extractlane v2, 3
    v6 = icmp_imm eq v5, 0xffff_ffff ; shifting in the sign-bit repeatedly fills the result with 1s

    v7 = band v4, v6
    return v7
}
; run

function %sshr_i64x2(i64x2, i32) -> i64x2 {
block0(v0:i64x2, v1:i32):
    v2 = sshr v0, v1
    return v2
}
; run: %sshr_i64x2([1 -1], 0) == [1 -1]
; run: %sshr_i64x2([1 -1], 1) == [0 -1] ; note the -1 shift result
; run: %sshr_i64x2([2 -2], 1) == [1 -1]
; run: %sshr_i64x2([0x80000000_00000000 0x7FFFFFFF_FFFFFFFF], 63) == [0xFFFFFFFF_FFFFFFFF 0]

function %bitselect_i8x16() -> b1 {
block0:
    v0 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 255]  ; the selector vector
    v1 = vconst.i8x16 [127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42] ; for each 1-bit in v0 the bit of v1 is selected
    v2 = vconst.i8x16 [42 0 0 0 0 0 0 0 0 0 0 0 0 0 0 127] ; for each 0-bit in v0 the bit of v2 is selected
    v3 = bitselect v0, v1, v2

    v4 = extractlane v3, 0
    v5 = icmp_imm eq v4, 42

    v6 = extractlane v3, 1
    v7 = icmp_imm eq v6, 0

    v8 = extractlane v3, 15
    v9 = icmp_imm eq v8, 42

    v10 = band v5, v7
    v11 = band v10, v9
    return v11
}
; run

function %sshr_imm_i32x4() -> b1 {
block0:
    v1 = vconst.i32x4 [1 2 4 -8]
    v2 = sshr_imm v1, 1

    v3 = vconst.i32x4 [0 1 2 -4]
    v4 = icmp eq v2, v3
    v5 = vall_true v4
    return v5
}
; run

function %sshr_imm_i16x8() -> b1 {
block0:
    v1 = vconst.i16x8 [1 2 4 -8 0 0 0 0]
    v2 = ushr_imm v1, 1

    v3 = vconst.i16x8 [0 1 2 32764 0 0 0 0] ; -4 with MSB unset == 32764
    v4 = icmp eq v2, v3
    v5 = vall_true v4
    return v5
}
; run

function %ishl_imm_i64x2() -> b1 {
block0:
    v1 = vconst.i64x2 [1 0]
    v2 = ishl_imm v1, 1

    v3 = vconst.i64x2 [2 0]
    v4 = icmp eq v2, v3
    v5 = vall_true v4
    return v5
}
; run