cranelift: Merge all run tests into runtests dir

With this change we now reuse tests across multiple arches.

Duplicate tests were merged into the same file where possible.
Some legacy x86 tests were left in separate files due to incompatibilities with the rest of the test suite.
This commit is contained in:
Afonso Bordado
2021-06-03 20:01:38 +01:00
parent e25bf362ab
commit 214755c6a0
43 changed files with 618 additions and 1115 deletions

View File

@@ -0,0 +1,17 @@
test run
target aarch64
target arm
target s390x
; target x86_64 machinst TODO: Not yet implemented on x86_64
target x86_64 legacy
function %bnot_band() -> b1 {
block0:
v1 = bconst.b1 false
v2 = bconst.b1 true
v3 = bnot v1
v4 = band v3, v2
return v4
}
; run

View File

@@ -0,0 +1,38 @@
test run
target aarch64
target arm
target s390x
target x86_64 machinst
target x86_64 legacy
function u0:0() -> b1 {
block0:
v0 = iconst.i8 0
brz v0, block1
jump block2
block1:
v1 = bconst.b1 true
return v1
block2:
v2 = bconst.b1 false
return v2
}
; run
function u0:1() -> b1 {
block0:
v0 = iconst.i8 0
brnz v0, block1
jump block2
block1:
v1 = bconst.b1 false
return v1
block2:
v2 = bconst.b1 true
return v2
}
; run

View File

@@ -0,0 +1,155 @@
test run
target aarch64
target arm
target s390x
target x86_64 machinst
target x86_64 legacy
function %i8_iconst_0() -> i8 {
block0:
v1 = iconst.i8 0
return v1
}
; run: %i8_iconst_0() == 0
function %i8_iconst_1() -> i8 {
block0:
v1 = iconst.i8 1
return v1
}
; run: %i8_iconst_1() == 1
function %i8_iconst_neg_one() -> i8 {
block0:
v1 = iconst.i8 -1
return v1
}
; run: %i8_iconst_neg_one() == -1
function %i16_iconst_0() -> i16 {
block0:
v1 = iconst.i16 0
return v1
}
; run: %i16_iconst_0() == 0
function %i16_iconst_1() -> i16 {
block0:
v1 = iconst.i16 1
return v1
}
; run: %i16_iconst_1() == 1
function %i16_iconst_neg_one() -> i16 {
block0:
v1 = iconst.i16 -1
return v1
}
; run: %i16_iconst_neg_one() == -1
function %i32_iconst_0() -> i32 {
block0:
v1 = iconst.i32 0
return v1
}
; run: %i32_iconst_0() == 0
function %i32_iconst_1() -> i32 {
block0:
v1 = iconst.i32 1
return v1
}
; run: %i32_iconst_1() == 1
function %i32_iconst_neg_one() -> i32 {
block0:
v1 = iconst.i32 -1
return v1
}
; run: %i32_iconst_neg_one() == -1
function %i64_iconst_0() -> i64 {
block0:
v1 = iconst.i64 0
return v1
}
; run: %i64_iconst_0() == 0
function %i64_iconst_1() -> i64 {
block0:
v1 = iconst.i64 1
return v1
}
; run: %i64_iconst_1() == 1
function %i64_iconst_neg_one() -> i64 {
block0:
v1 = iconst.i64 -1
return v1
}
; run: %i64_iconst_neg_one() == -1
function %b8_bconst_false() -> b8 {
block0:
v1 = bconst.b8 false
return v1
}
; run: %b8_bconst_false() == false
function %b8_bconst_true() -> b8 {
block0:
v1 = bconst.b8 true
return v1
}
; run: %b8_bconst_true() == true
function %b16_bconst_false() -> b16 {
block0:
v1 = bconst.b16 false
return v1
}
; run: %b16_bconst_false() == false
function %b16_bconst_true() -> b16 {
block0:
v1 = bconst.b16 true
return v1
}
; run: %b16_bconst_true() == true
function %b32_bconst_false() -> b32 {
block0:
v1 = bconst.b32 false
return v1
}
; run: %b32_bconst_false() == false
function %b32_bconst_true() -> b32 {
block0:
v1 = bconst.b32 true
return v1
}
; run: %b32_bconst_true() == true
function %b64_bconst_false() -> b64 {
block0:
v1 = bconst.b64 false
return v1
}
; run: %b64_bconst_false() == false
; this verifies that returning b64 immediates does not result in a segmentation fault, see https://github.com/bytecodealliance/cranelift/issues/911
function %b64_bconst_true() -> b64 {
block0:
v1 = bconst.b64 true
return v1
}
; run: %b64_bconst_true() == true

View File

@@ -0,0 +1,35 @@
test run
target aarch64
target arm
target s390x
set avoid_div_traps=false
target x86_64 machinst
function %i8(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = srem.i8 v0, v1
return v2
}
; run: %i8(0x80, 0xff) == 0
; run: %i8(0x2, 0x7) == 0x2
function %i16(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = srem.i16 v0, v1
return v2
}
; run: %i16(0x8000, 0xffff) == 0
function %i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = srem.i32 v0, v1
return v2
}
; run: %i32(0x80000000, 0xffffffff) == 0
function %i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = srem.i64 v0, v1
return v2
}
; run: %i32(0x800000000000000, 0xffffffffffffffff) == 0

View File

@@ -0,0 +1,30 @@
test run
target aarch64
target arm
target s390x
; target x86_64 machinst TODO: Not yet implemented on x86_64
target i686 legacy
function %uextend() -> b1 {
block0:
v0 = iconst.i32 0xffff_ee00
v1 = uextend.i64 v0
v2, v3 = isplit v1
v4 = icmp_imm eq v2, 0xffff_ee00
v5 = icmp_imm eq v3, 0
v6 = band v4, v5
return v6
}
; run
function %sextend() -> b1 {
block0:
v0 = iconst.i32 0xffff_ee00
v1 = sextend.i64 v0
v2, v3 = isplit v1
v4 = icmp_imm eq v2, 0xffff_ee00
v5 = icmp_imm eq v3, 0xffff_ffff
v6 = band v4, v5
return v6
}
; run

View File

@@ -0,0 +1,20 @@
test run
target x86_64 legacy haswell
function %test_imul_i128() -> b1 {
block0:
v11 = iconst.i64 0xf2347ac4503f1e24
v12 = iconst.i64 0x0098fe985354ab06
v1 = iconcat v11, v12
v21 = iconst.i64 0xf606ba453589ef89
v22 = iconst.i64 0x042e1f3054ca7432
v2 = iconcat v21, v22
v31 = iconst.i64 0xbe2044b2742ebd44
v32 = iconst.i64 0xa363ce3b6849f307
v3 = iconcat v31, v32
v4 = imul v1, v2
v5 = icmp eq v3, v4
return v5
}
; run

View File

@@ -0,0 +1,206 @@
test run
; target aarch64 TODO: Not yet implemented on aarch64
; target s390x TODO: Not yet implemented on s390x
target x86_64 machinst
; TODO: Cleanup these tests when we have native support for i128 immediates in CLIF's parser
function %add_i128(i64, i64, i64, i64) -> i64, i64 {
block0(v0: i64,v1: i64,v2: i64,v3: i64):
v4 = iconcat v0, v1
v5 = iconcat v2, v3
v6 = iadd v4, v5
v7, v8 = isplit v6
return v7, v8
}
; run: %add_i128(0, 0, 0, 0) == [0, 0]
; run: %add_i128(0, -1, -1, 0) == [-1, -1]
; run: %add_i128(1, 0, 0, 0) == [1, 0]
; run: %add_i128(1, 0, 1, 0) == [2, 0]
; run: %add_i128(1, 0, -1, -1) == [0, 0]
; run: %add_i128(-1, 0, 1, 0) == [0, 1]
; run: %add_i128(0x01234567_89ABCDEF, 0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210) == [-1, -1]
; run: %add_i128(0x06060606_06060606, 0xA00A00A0_0A00A00A, 0x30303030_30303030, 0x0BB0BB0B_B0BB0BB0) == [0x36363636_36363636, 0xABBABBAB_BABBABBA]
; run: %add_i128(0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111, 0x1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF]
function %sub_i128(i64, i64, i64, i64) -> i64, i64 {
block0(v0: i64,v1: i64,v2: i64,v3: i64):
v4 = iconcat v0, v1
v5 = iconcat v2, v3
v6 = isub v4, v5
v7, v8 = isplit v6
return v7, v8
}
; run: %sub_i128(0, 0, 0, 0) == [0, 0]
; run: %sub_i128(1, 0, 1, 0) == [0, 0]
; run: %sub_i128(1, 0, 0, 0) == [1, 0]
; run: %sub_i128(0, 0, 1, 0) == [-1, -1]
; run: %sub_i128(0, 0, -1, -1) == [1, 0]
; run: %sub_i128(-1, -1, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210) == [0x01234567_89ABCDEF, 0x01234567_89ABCDEF]
; run: %sub_i128(0x36363636_36363636, 0xABBABBAB_BABBABBA, 0x30303030_30303030, 0x0BB0BB0B_B0BB0BB0) == [0x06060606_06060606, 0xA00A00A0_0A00A00A]
; run: %sub_i128(0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF, 0x1DCB1111_1DCB1111, 0x1DCB1111_1DCB1111) == [0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE]
function %mul_i128(i64, i64, i64, i64) -> i64, i64 {
block0(v0: i64,v1: i64,v2: i64,v3: i64):
v4 = iconcat v0, v1
v5 = iconcat v2, v3
v6 = imul v4, v5
v7, v8 = isplit v6
return v7, v8
}
; run: %mul_i128(0, 0, 0, 0) == [0, 0]
; run: %mul_i128(1, 0, 1, 0) == [1, 0]
; run: %mul_i128(1, 0, 0, 0) == [0, 0]
; run: %mul_i128(0, 0, 1, 0) == [0, 0]
; run: %mul_i128(2, 0, 1, 0) == [2, 0]
; run: %mul_i128(2, 0, 2, 0) == [4, 0]
; run: %mul_i128(1, 0, -1, -1) == [-1, -1]
; run: %mul_i128(2, 0, -1, -1) == [-2, -1]
; run: %mul_i128(0x01010101_01010101, 0x01010101_01010101, 13, 0) == [0x0D0D0D0D_0D0D0D0D, 0x0D0D0D0D_0D0D0D0D]
; run: %mul_i128(13, 0, 0x01010101_01010101, 0x01010101_01010101) == [0x0D0D0D0D_0D0D0D0D, 0x0D0D0D0D_0D0D0D0D]
; run: %mul_i128(0x00000000_01234567, 0x89ABCDEF_00000000, 0x00000000_FEDCBA98, 0x76543210_00000000) == [0x0121FA00_23E20B28, 0xE2946058_00000000]
; run: %mul_i128(0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF) == [0xDB6B1E48_19BA1112, 0x5ECD38B5_9D1C2B7E]
; run: %mul_i128(0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF) == [0xDB6B1E48_19BA1112, 0x5ECD38B5_9D1C2B7E]
function %ishl_i128_i8(i64, i64, i8) -> i64, i64 {
block0(v0: i64, v1: i64, v2: i8):
v3 = iconcat v0, v1
v4 = ishl.i128 v3, v2
v5, v6 = isplit v4
return v5, v6
}
; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404]
; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202]
; run: %ishl_i128_i8(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404]
; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101]
; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101]
; run: %ishl_i128_i8(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000]
; run: %ishl_i128_i8(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001]
; run: %ishl_i128_i8(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000]
; run: %ishl_i128_i8(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000]
; run: %ishl_i128_i8(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000]
; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 129) == [0x02020202_02020202, 0x02020202_02020202]
; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 130) == [0x04040404_04040404, 0x04040404_04040404]
function %ishl_i128_i128(i64, i64, i8) -> i64, i64 {
block0(v0: i64, v1: i64, v2: i8):
v3 = iconcat v0, v1
v4 = uextend.i64 v2
v5 = iconcat v4, v4
v6 = ishl.i128 v3, v5
v7, v8 = isplit v6
return v7, v8
}
; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404]
; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202]
; run: %ishl_i128_i128(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404]
; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101]
; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101]
; run: %ishl_i128_i128(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000]
; run: %ishl_i128_i128(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001]
; run: %ishl_i128_i128(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000]
; run: %ishl_i128_i128(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000]
; run: %ishl_i128_i128(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000]
; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 129) == [0x02020202_02020202, 0x02020202_02020202]
; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 130) == [0x04040404_04040404, 0x04040404_04040404]
function %ushr_i128_i8(i64, i64, i8) -> i64, i64 {
block0(v0: i64, v1: i64, v2: i8):
v3 = iconcat v0, v1
v4 = ushr.i128 v3, v2
v5, v6 = isplit v4
return v5, v6
}
; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040]
; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000]
; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101]
; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101]
; run: %ushr_i128_i8(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000]
; run: %ushr_i128_i8(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001]
; run: %ushr_i128_i8(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000]
; run: %ushr_i128_i8(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000]
; run: %ushr_i128_i8(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000]
; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 129) == [0x80808080_80808080, 0x00808080_80808080]
; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 130) == [0x40404040_40404040, 0x00404040_40404040]
function %ushr_i128_i128(i64, i64, i8) -> i64, i64 {
block0(v0: i64, v1: i64, v2: i8):
v3 = iconcat v0, v1
v4 = uextend.i64 v2
v5 = iconcat v4, v4
v6 = ushr.i128 v3, v5
v7, v8 = isplit v6
return v7, v8
}
; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040]
; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000]
; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101]
; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101]
; run: %ushr_i128_i128(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000]
; run: %ushr_i128_i128(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001]
; run: %ushr_i128_i128(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000]
; run: %ushr_i128_i128(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000]
; run: %ushr_i128_i128(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000]
; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 129) == [0x80808080_80808080, 0x00808080_80808080]
; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 130) == [0x40404040_40404040, 0x00404040_40404040]
function %sshr_i128_i8(i64, i64, i8) -> i64, i64 {
block0(v0: i64, v1: i64, v2: i8):
v3 = iconcat v0, v1
v4 = sshr.i128 v3, v2
v5, v6 = isplit v4
return v5, v6
}
; run: %sshr_i128_i8(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040]
; run: %sshr_i128_i8(0x00000000_00000000, 0xffffffff_ffffffff, 32) == [0xffffffff_00000000, 0xffffffff_ffffffff]
; run: %sshr_i128_i8(0x80000000_00000000, 0xffffffff_00000000, 32) == [0x00000000_80000000, 0xffffffff_ffffffff]
; run: %sshr_i128_i8(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff]
; run: %sshr_i128_i8(0x00000000_00000000, 0x00000000_00000000, 64) == [0x00000000_00000000, 0x00000000_00000000]
; run: %sshr_i128_i8(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010]
; run: %sshr_i128_i8(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010]
; run: %sshr_i128_i8(0x01010101_01010101, 0x81010101_01010101, 129) == [0x80808080_80808080, 0xc0808080_80808080]
; run: %sshr_i128_i8(0x01010101_01010101, 0x81010101_01010101, 130) == [0x40404040_40404040, 0xe0404040_40404040]
function %sshr_i128_i128(i64, i64, i8) -> i64, i64 {
block0(v0: i64, v1: i64, v2: i8):
v3 = iconcat v0, v1
v4 = uextend.i64 v2
v5 = iconcat v4, v4
v6 = sshr.i128 v3, v5
v7, v8 = isplit v6
return v7, v8
}
; run: %sshr_i128_i128(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040]
; run: %sshr_i128_i128(0x00000000_00000000, 0xffffffff_ffffffff, 32) == [0xffffffff_00000000, 0xffffffff_ffffffff]
; run: %sshr_i128_i128(0x80000000_00000000, 0xffffffff_00000000, 32) == [0x00000000_80000000, 0xffffffff_ffffffff]
; run: %sshr_i128_i128(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff]
; run: %sshr_i128_i128(0x00000000_00000000, 0x00000000_00000000, 64) == [0x00000000_00000000, 0x00000000_00000000]
; run: %sshr_i128_i128(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010]
; run: %sshr_i128_i128(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010]
; run: %sshr_i128_i128(0x01010101_01010101, 0x81010101_01010101, 129) == [0x80808080_80808080, 0xc0808080_80808080]
; run: %sshr_i128_i128(0x01010101_01010101, 0x81010101_01010101, 130) == [0x40404040_40404040, 0xe0404040_40404040]

View File

@@ -0,0 +1,27 @@
test run
; target s390x TODO: Not yet implemented on s390x
target x86_64 machinst
function %ctz(i64, i64) -> i8 {
block0(v0: i64, v1: i64):
v2 = iconcat v0, v1
v3 = ctz.i128 v2
v4 = ireduce.i8 v3
return v4
}
; run: %ctz(0x00000000_00000000, 0x00000001_00000000) == 96
; run: %ctz(0x00000000_00010000, 0x00000001_00000000) == 16
; run: %ctz(0x00000000_00010000, 0x00000000_00000000) == 16
; run: %ctz(0x00000000_00000000, 0x00000000_00000000) == 128
function %clz(i64, i64) -> i8 {
block0(v0: i64, v1: i64):
v2 = iconcat v0, v1
v3 = clz.i128 v2
v4 = ireduce.i8 v3
return v4
}
; run: %clz(0x00000000_00000000, 0x00000001_00000000) == 31
; run: %clz(0x00000000_00010000, 0x00000001_00000000) == 31
; run: %clz(0x00000000_00010000, 0x00000000_00000000) == 111
; run: %clz(0x00000000_00000000, 0x00000000_00000000) == 128

View File

@@ -0,0 +1,47 @@
test run
target x86_64 machinst
target x86_64 legacy
function %reverse_bits_zero() -> b1 {
block0:
v0 = iconst.i64 0
v1 = iconcat v0, v0
v2 = bitrev.i128 v1
v3 = icmp eq v2, v1
return v3
}
; run
function %reverse_bits_one() -> b1 {
block0:
v0 = iconst.i64 0
v1 = iconst.i64 1
v2 = iconcat v0, v1
v3 = bitrev.i128 v2
v4 = iconst.i64 0x8000_0000_0000_0000
v5 = iconst.i64 0
v6 = iconcat v4, v5
v7 = icmp eq v3, v6
return v7
}
; run
function %reverse_bits() -> b1 {
block0:
v0 = iconst.i64 0x06AD_8667_69EC_41BA
v1 = iconst.i64 0x6C83_D81A_6E28_83AB
v2 = iconcat v0, v1
v3 = bitrev.i128 v2
v4 = iconst.i64 0xD5C11476581BC136
v5 = iconst.i64 0x5D823796E661B560
v6 = iconcat v4, v5
v7 = icmp eq v3, v6
return v7
}
; run

View File

@@ -0,0 +1,42 @@
test run
; target aarch64 TODO: Not yet implemented on aarch64
; target s390x TODO: Not yet implemented on s390x
target x86_64 machinst
target x86_64 legacy
function %br_false() -> b1 {
block0:
v10 = iconst.i64 0x42
v11 = iconst.i64 0x00
v0 = iconcat v10, v11
brz v0, block2
jump block1
block1:
v1 = bconst.b1 true
return v1
block2:
v2 = bconst.b1 false
return v2
}
; run
function %br_true() -> b1 {
block0:
v10 = iconst.i64 0x00
v11 = iconst.i64 0x00
v0 = iconcat v10, v11
brz v0, block2
jump block1
block1:
v1 = bconst.b1 false
return v1
block2:
v2 = bconst.b1 true
return v2
}
; run

View File

@@ -0,0 +1,12 @@
test run
target aarch64
; target s390x TODO: Not yet implemented on s390x
target x86_64 machinst
function %i128_const_0() -> i64, i64 {
block0:
v1 = iconst.i128 0
v2, v3 = isplit v1
return v2, v3
}
; run: %i128_const_0() == [0, 0]

View File

@@ -0,0 +1,29 @@
test run
; target aarch64 TODO: Not yet implemented on aarch64
; target s390x TODO: Not yet implemented on s390x
target x86_64 machinst
target x86_64 legacy
function %i128_uextend() -> b1 {
block0:
v0 = iconst.i64 0xffff_ffff_eeee_0000
v1 = uextend.i128 v0
v2, v3 = isplit v1
v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000
v5 = icmp_imm eq v3, 0
v6 = band v4, v5
return v6
}
; run
function %i128_sextend() -> b1 {
block0:
v0 = iconst.i64 0xffff_ffff_eeee_0000
v1 = sextend.i128 v0
v2, v3 = isplit v1
v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000
v5 = icmp_imm eq v3, 0xffff_ffff_ffff_ffff
v6 = band v4, v5
return v6
}
; run

View File

@@ -0,0 +1,94 @@
test run
target x86_64 machinst
function %test_icmp_eq_i128() -> b1 {
block0:
v11 = iconst.i64 0x0
v12 = iconst.i64 0x0
v1 = iconcat v11, v12
v21 = iconst.i64 0x0
v22 = iconst.i64 0x0
v2 = iconcat v21, v22
v10 = icmp.i128 eq v1, v2
return v10
}
; run
function %test_icmp_imm_eq_i128() -> b1 {
block0:
v11 = iconst.i64 0x0
v12 = iconst.i64 0x0
v1 = iconcat v11, v12
v10 = icmp_imm.i128 eq v1, 0x0
return v10
}
; run
function %test_icmp_ne_i128() -> b1 {
block0:
v11 = iconst.i64 0x0
v12 = iconst.i64 0x0
v1 = iconcat v11, v12
v21 = iconst.i64 0x0
v22 = iconst.i64 0x1
v2 = iconcat v21, v22
v10 = icmp.i128 ne v1, v2
return v10
}
; run
function %test_icmp_imm_ne_i128() -> b1 {
block0:
v11 = iconst.i64 0x0
v12 = iconst.i64 0x0
v1 = iconcat v11, v12
v10 = icmp_imm.i128 ne v1, 0x1
return v10
}
; run
function %test_icmp_nz_eq_i128() -> b1 {
block0:
v11 = iconst.i64 0x1
v12 = iconst.i64 0x1
v1 = iconcat v11, v12
v21 = iconst.i64 0x1
v22 = iconst.i64 0x1
v2 = iconcat v21, v22
v10 = icmp.i128 eq v1, v2
return v10
}
; run
function %test_icmp_nz_gt_i128() -> b1 {
block0:
v11 = iconst.i64 0x1
v12 = iconst.i64 0x1
v1 = iconcat v11, v12
v21 = iconst.i64 0x1
v22 = iconst.i64 0x2
v2 = iconcat v21, v22
v10 = icmp.i128 ugt v2, v1
return v10
}
; run
function %test_icmp_nz_ge_i128() -> b1 {
block0:
v11 = iconst.i64 0x1
v12 = iconst.i64 0x1
v1 = iconcat v11, v12
v21 = iconst.i64 0x1
v22 = iconst.i64 0x1
v2 = iconcat v21, v22
v10 = icmp.i128 uge v1, v2
return v10
}
; run

View File

@@ -0,0 +1,60 @@
test run
; target aarch64 TODO: Not yet implemented on aarch64
; target s390x TODO: Not yet implemented on s390x
target x86_64 machinst
function %rotl(i64, i64, i8) -> i64, i64 {
block0(v0: i64, v1: i64, v2: i8):
v3 = iconcat v0, v1
v4 = rotl.i128 v3, v2
v5, v6 = isplit v4
return v5, v6
}
; run: %rotl(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020202, 0x02020202_02020202]
; run: %rotl(0x01010101_01010101, 0x01010101_01010101, 73) == [0x02020202_02020202, 0x02020202_02020202]
; run: %rotl(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202]
; run: %rotl(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303]
function %rotr(i64, i64, i8) -> i64, i64 {
block0(v0: i64, v1: i64, v2: i8):
v3 = iconcat v0, v1
v4 = rotr.i128 v3, v2
v5, v6 = isplit v4
return v5, v6
}
; run: %rotr(0x01010101_01010101, 0x01010101_01010101, 9) == [0x80808080_80808080, 0x80808080_80808080]
; run: %rotr(0x01010101_01010101, 0x01010101_01010101, 73) == [0x80808080_80808080, 0x80808080_80808080]
; run: %rotr(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202]
; run: %rotr(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303]
function %rotl_amt_i128(i64, i64, i8) -> i64, i64 {
block0(v0: i64, v1: i64, v2: i8):
v3 = uextend.i64 v2
v4 = iconcat v3, v3
v5 = iconcat v0, v1
v6 = rotl.i128 v5, v4
v7, v8 = isplit v6
return v7, v8
}
; run: %rotl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020202, 0x02020202_02020202]
; run: %rotl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 73) == [0x02020202_02020202, 0x02020202_02020202]
; run: %rotl_amt_i128(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202]
; run: %rotl_amt_i128(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303]
function %rotr_amt_i128(i64, i64, i8) -> i64, i64 {
block0(v0: i64, v1: i64, v2: i8):
v3 = uextend.i64 v2
v4 = iconcat v3, v3
v5 = iconcat v0, v1
v6 = rotr.i128 v5, v4
v7, v8 = isplit v6
return v7, v8
}
; run: %rotr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x80808080_80808080, 0x80808080_80808080]
; run: %rotr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 73) == [0x80808080_80808080, 0x80808080_80808080]
; run: %rotr_amt_i128(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202]
; run: %rotr_amt_i128(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303]

View File

@@ -0,0 +1,193 @@
test run
; target aarch64 TODO: Not yet implemented on aarch64
; target s390x TODO: Not yet implemented on s390x
set enable_simd
target x86_64 machinst skylake
set enable_simd
target x86_64 legacy skylake
function %iadd_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0:i32x4, v1:i32x4):
v2 = iadd v0, v1
return v2
}
; run: %iadd_i32x4([1 1 1 1], [1 2 3 4]) == [2 3 4 5]
function %iadd_i8x16_with_overflow() -> i8x16 {
block0:
v0 = vconst.i8x16 [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255]
v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
v2 = iadd v0, v1
return v2
}
; run: %iadd_i8x16_with_overflow() == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
function %isub_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = isub v0, v1
return v2
}
; run: %isub_i32x4([1 1 1 1], [1 2 3 4]) == [0 -1 -2 -3]
function %ineg_i32x4(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = ineg v0
return v1
}
; run: %ineg_i32x4([1 1 1 1]) == [-1 -1 -1 -1]
function %imul_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = imul v0, v1
return v2
}
; run: %imul_i64x2([0 2], [0 2]) == [0 4]
function %imul_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = imul v0, v1
return v2
}
; run: %imul_i32x4([-1 0 1 0x80_00_00_01], [2 2 2 2]) == [-2 0 2 2]
; Note above how bits are truncated: 0x80_00_00_01 * 2 == 0x1_00_00_00_02, but
; the leading 1 is dropped.
function %imul_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = imul v0, v1
return v2
}
; run: %imul_i16x8([-1 0 1 0x7f_ff 0 0 0 0], [2 2 2 2 0 0 0 0]) == [-2 0 2 0xff_fe 0 0 0 0]
function %sadd_sat_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = sadd_sat v0, v1
return v2
}
; run: %sadd_sat_i8x16([0x7f 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]) == [0x7f 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
function %uadd_sat_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = uadd_sat v0, v1
return v2
}
; run: %uadd_sat_i16x8([-1 0 0 0 0 0 0 0], [-1 1 1 1 1 1 1 1]) == [65535 1 1 1 1 1 1 1]
function %ssub_sat_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = ssub_sat v0, v1
return v2
}
; run: %ssub_sat_i8x16([0x80 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]) == [0x80 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff]
; Note that 0x80 == -128 and subtracting 1 from that should saturate.
function %usub_sat_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = usub_sat v0, v1
return v2
}
; run: %usub_sat_i8x16([0x80 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]) == [0x7f 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
function %add_sub_f32x4() -> b1 {
block0:
v0 = vconst.f32x4 [0x4.2 0.0 0.0 0.0]
v1 = vconst.f32x4 [0x1.0 0x1.0 0x1.0 0x1.0]
v2 = vconst.f32x4 [0x5.2 0x1.0 0x1.0 0x1.0]
v3 = fadd v0, v1
v4 = fcmp eq v3, v2
v6 = fsub v2, v1
v7 = fcmp eq v6, v0
v8 = band v4, v7
v9 = vall_true v8
return v9
}
; run
function %mul_div_f32x4() -> b1 {
block0:
v0 = vconst.f32x4 [0x4.2 -0x2.1 0x2.0 0.0]
v1 = vconst.f32x4 [0x3.4 0x6.7 0x8.9 0xa.b]
v2 = vconst.f32x4 [0xd.68 -0xd.47 0x11.2 0x0.0]
v3 = fmul v0, v1
v4 = fcmp eq v3, v2
v6 = fdiv v2, v1
v7 = fcmp eq v6, v0
v8 = band v4, v7
v9 = vall_true v8
return v9
}
; run
function %sqrt_f64x2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = sqrt v0
return v1
}
; run: %sqrt_f64x2([0x9.0 0x1.0]) == [0x3.0 0x1.0]
function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fmax v0, v1
return v2
}
; note below how NaNs are quieted but (unlike fmin), retain their sign: this discrepancy is allowed by non-determinism
; in the spec, see https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0.
; run: %fmax_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [+0x0.0 0x1.0]
; run: %fmax_f64x2([-NaN NaN], [0x0.0 0x100.0]) == [-NaN NaN]
; run: %fmax_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0]
; run: %fmax_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
; run: %fmax_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN 0.0]
function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2):
v2 = fmin v0, v1
return v2
}
; note below how NaNs are quieted and negative: this is due to non-determinism in the spec for NaNs, see
; https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0.
; run: %fmin_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [-0x0.0 -0x1.0]
; run: %fmin_f64x2([-NaN 0x100.0], [0.0 NaN]) == [-NaN -NaN]
; run: %fmin_f64x2([NaN 0.0], [0.0 0.0]) == [-NaN 0.0]
; run: %fmin_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
; run: %fmin_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [-NaN 0.0]
function %fneg_f64x2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = fneg v0
return v1
}
; run: %fneg_f64x2([0x1.0 -0x1.0]) == [-0x1.0 0x1.0]
function %fneg_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = fneg v0
return v1
}
; run: %fneg_f32x4([0x0.0 -0x0.0 -Inf Inf]) == [-0x0.0 0x0.0 Inf -Inf]
function %fabs_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = fabs v0
return v1
}
; run: %fabs_f32x4([0x0.0 -0x1.0 0x2.0 -0x3.0]) == [0x0.0 0x1.0 0x2.0 0x3.0]
function %average_rounding_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = avg_round v0, v1
return v2
}
; run: %average_rounding_i16x8([0 0 0 1 42 19 -1 0xffff], [0 1 2 4 42 18 -1 0]) == [0 1 1 3 42 19 -1 0x8000]
function %iabs(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iabs v0
return v1
}
; run: %iabs([-42 -1 0 1]) == [42 1 0 1]

View File

@@ -0,0 +1,44 @@
test run
target aarch64
; target s390x TODO: Not yet implemented on s390x
set opt_level=speed_and_size
set enable_simd
target x86_64 machinst skylake
set opt_level=speed_and_size
set enable_simd
target x86_64 legacy haswell
;; x86_64 legacy: Test if bitselect->vselect optimization works properly
function %mask_from_icmp(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = icmp sge v0, v1
v3 = raw_bitcast.i32x4 v2
v4 = bitselect v3, v0, v1
return v4
}
; run: %mask_from_icmp([5 6 7 8], [1 10 20 7]) == [5 10 20 8]
function %mask_casted(i64x2, i64x2, i32x4) -> i64x2 {
block0(v0: i64x2, v1: i64x2, v2: i32x4):
v3 = raw_bitcast.i64x2 v2
v4 = bitselect v3, v0, v1
return v4
}
; run: %mask_casted([0 0], [0xFFFFFF 0xFFFF4F], [0xFFF1 0 0xF 0]) == [0xFF000E 0xFFFF40]
function %good_const_mask(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = vconst.i32x4 [0x0000FF00 0x00FF00FF 0x00FF00FF 0xFF00FFFF]
v4 = bitselect v2, v0, v1
return v4
}
; run: %good_const_mask([0x1234 0x5678 0x1234 0x5678], [0xAAAA 0xAAAA 0xAAAA 0xAAAA]) == [0x12AA 0xAA78 0xAA34 0x5678]
function %bad_const_mask(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = vconst.i32x4 [0x0000FF00 0x00FF00FF 0x00FF000F 0xFF00FFF0]
v4 = bitselect v2, v0, v1
return v4
}
; run: %bad_const_mask([0x1234 0x5678 0x1234 0x5678], [0xAAAA 0xAAAA 0xAAAA 0xAAAA]) == [0x12AA 0xAA78 0xAAA4 0x567A]

View File

@@ -0,0 +1,214 @@
test run
set enable_simd
target x86_64 legacy skylake
; TODO: once available, replace all lane extraction with `icmp + all_ones`
function %ishl_i32x4() -> b1 {
block0:
v0 = iconst.i32 1
v1 = vconst.i32x4 [1 2 4 8]
v2 = ishl v1, v0
v3 = extractlane v2, 0
v4 = icmp_imm eq v3, 2
v5 = extractlane v2, 3
v6 = icmp_imm eq v5, 16
v7 = band v4, v6
return v7
}
; run
function %ishl_too_large_i16x8() -> b1 {
block0:
v0 = iconst.i32 17 ; note that this will shift off the end of each lane
v1 = vconst.i16x8 [1 2 4 8 16 32 64 128]
v2 = ishl v1, v0
v3 = extractlane v2, 0
v4 = icmp_imm eq v3, 0
v5 = extractlane v2, 3
v6 = icmp_imm eq v5, 0
v7 = band v4, v6
return v7
}
; run
function %ushr_i8x16() -> b1 {
block0:
v0 = iconst.i32 1
v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
v2 = ushr v1, v0
v3 = vconst.i8x16 [0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run
function %sshr_i8x16() -> b1 {
block0:
v0 = iconst.i32 1
v1 = vconst.i8x16 [0 0xff 2 0xfd 4 0xfb 6 0xf9 8 0xf7 10 0xf5 12 0xf3 14 0xf1]
v2 = sshr v1, v0
v3 = vconst.i8x16 [0 0xff 1 0xfe 2 0xfd 3 0xfc 4 0xfb 5 0xfa 6 0xf9 7 0xf8]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run
function %ishl_i8x16() -> b1 {
block0:
v0 = iconst.i32 1
v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
v2 = ishl v1, v0
v3 = vconst.i8x16 [0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run
function %ushr_i64x2() -> b1 {
block0:
v0 = iconst.i32 1
v1 = vconst.i64x2 [1 2]
v2 = ushr v1, v0
v3 = extractlane v2, 0
v4 = icmp_imm eq v3, 0
v5 = extractlane v2, 1
v6 = icmp_imm eq v5, 1
v7 = band v4, v6
return v7
}
; run
function %ushr_too_large_i32x4() -> b1 {
block0:
v0 = iconst.i32 33 ; note that this will shift off the end of each lane
v1 = vconst.i32x4 [1 2 4 8]
v2 = ushr v1, v0
v3 = extractlane v2, 0
v4 = icmp_imm eq v3, 0
v5 = extractlane v2, 3
v6 = icmp_imm eq v5, 0
v7 = band v4, v6
return v7
}
; run
function %sshr_i16x8() -> b1 {
block0:
v0 = iconst.i32 1
v1 = vconst.i16x8 [-1 2 4 8 -16 32 64 128]
v2 = sshr v1, v0
v3 = extractlane v2, 0
v4 = icmp_imm eq v3, 0xffff ; because of the shifted-in sign-bit, this remains 0xffff == -1
v5 = extractlane v2, 4
v6 = icmp_imm eq v5, 0xfff8 ; -16 has been shifted to -8 == 0xfff8
v7 = band v4, v6
return v7
}
; run
function %sshr_too_large_i32x4() -> b1 {
block0:
v0 = iconst.i32 33 ; note that this will shift off the end of each lane
v1 = vconst.i32x4 [1 2 4 -8]
v2 = sshr v1, v0
v3 = extractlane v2, 0
v4 = icmp_imm eq v3, 0
v5 = extractlane v2, 3
v6 = icmp_imm eq v5, 0xffff_ffff ; shifting in the sign-bit repeatedly fills the result with 1s
v7 = band v4, v6
return v7
}
; run
function %sshr_i64x2(i64x2, i32) -> i64x2 {
block0(v0:i64x2, v1:i32):
v2 = sshr v0, v1
return v2
}
; run: %sshr_i64x2([1 -1], 0) == [1 -1]
; run: %sshr_i64x2([1 -1], 1) == [0 -1] ; note the -1 shift result
; run: %sshr_i64x2([2 -2], 1) == [1 -1]
; run: %sshr_i64x2([0x80000000_00000000 0x7FFFFFFF_FFFFFFFF], 63) == [0xFFFFFFFF_FFFFFFFF 0]
function %bitselect_i8x16() -> b1 {
block0:
v0 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 255] ; the selector vector
v1 = vconst.i8x16 [127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42] ; for each 1-bit in v0 the bit of v1 is selected
v2 = vconst.i8x16 [42 0 0 0 0 0 0 0 0 0 0 0 0 0 0 127] ; for each 0-bit in v0 the bit of v2 is selected
v3 = bitselect v0, v1, v2
v4 = extractlane v3, 0
v5 = icmp_imm eq v4, 42
v6 = extractlane v3, 1
v7 = icmp_imm eq v6, 0
v8 = extractlane v3, 15
v9 = icmp_imm eq v8, 42
v10 = band v5, v7
v11 = band v10, v9
return v11
}
; run
function %sshr_imm_i32x4() -> b1 {
block0:
v1 = vconst.i32x4 [1 2 4 -8]
v2 = sshr_imm v1, 1
v3 = vconst.i32x4 [0 1 2 -4]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run
function %sshr_imm_i16x8() -> b1 {
block0:
v1 = vconst.i16x8 [1 2 4 -8 0 0 0 0]
v2 = ushr_imm v1, 1
v3 = vconst.i16x8 [0 1 2 32764 0 0 0 0] ; -4 with MSB unset == 32764
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run
function %ishl_imm_i64x2() -> b1 {
block0:
v1 = vconst.i64x2 [1 0]
v2 = ishl_imm v1, 1
v3 = vconst.i64x2 [2 0]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run

View File

@@ -0,0 +1,132 @@
test run
target aarch64
; target s390x TODO: Not yet implemented on s390x
set enable_simd
target x86_64 machinst skylake
function %bitselect_i8x16(i8x16, i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16, v2: i8x16):
v3 = bitselect v0, v1, v2
return v3
}
; Remember that bitselect accepts: 1) the selector vector, 2) the "if true" vector, and 3) the "if false" vector.
; run: %bitselect_i8x16([0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 255], [127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42], [42 0 0 0 0 0 0 0 0 0 0 0 0 0 0 127]) == [42 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42]
function %vselect_i32x4(i32x4, i32x4) -> i32x4 {
block0(v1: i32x4, v2: i32x4):
; `make_trampoline` still does not know how to convert boolean vector types
; so we load the value directly here.
v0 = vconst.b32x4 [true true false false]
v3 = vselect v0, v1, v2
return v3
}
; Remember that vselect accepts: 1) the selector vector, 2) the "if true" vector, and 3) the "if false" vector.
; run: %vselect_i8x16([1 2 -1 -1], [-1 -1 3 4]) == [1 2 3 4]
; shift left
function %ishl_i8x16(i8x16, i32) -> i8x16 {
block0(v0: i8x16, v1: i32):
v2 = ishl v0, v1
return v2
}
; run: %ishl_i8x16([0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15], 4) == [0x00 0x10 0x20 0x30 0x40 0x50 0x60 0x70 0x80 0x90 0xa0 0xb0 0xc0 0xd0 0xe0 0xf0]
function %ishl_i16x8(i16x8, i32) -> i16x8 {
block0(v0: i16x8, v1: i32):
v2 = ishl v0, v1
return v2
}
; run: %ishl_i16x8([1 2 4 8 16 32 64 128], 17) == [0 0 0 0 0 0 0 0]
function %ishl_i32x4(i32x4, i32) -> i32x4 {
block0(v0: i32x4, v1: i32):
v2 = ishl v0, v1
return v2
}
; run: %ishl_i32x4([1 2 4 8], 1) == [2 4 8 16]
function %ishl_imm_i64x2(i64x2) -> i64x2 {
block0(v0: i64x2):
v2 = ishl_imm v0, 1
return v2
}
; run: %ishl_imm_i64x2([1 0]) == [2 0]
; shift right (logical)
function %ushr_i8x16(i8x16, i32) -> i8x16 {
block0(v0: i8x16, v1: i32):
v2 = ushr v0, v1
return v2
}
; run: %ushr_i8x16([0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15], 1) == [0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7]
function %ushr_i32x4(i32x4, i32) -> i32x4 {
block0(v0: i32x4, v1: i32):
v2 = ushr v0, v1
return v2
}
; run: %ushr_i32x4([1 2 4 8], 33) == [0 0 0 0]
function %ushr_i64x2(i64x2, i32) -> i64x2 {
block0(v0: i64x2, v1: i32):
v2 = ushr v0, v1
return v2
}
; run: %ushr_i64x2([1 2], 1) == [0 1]
; shift right (arithmetic)
function %sshr_i8x16(i8x16, i32) -> i8x16 {
block0(v0: i8x16, v1: i32):
v2 = sshr v0, v1
return v2
}
; run: %sshr_i8x16([0 0xff 2 0xfd 4 0xfb 6 0xf9 8 0xf7 10 0xf5 12 0xf3 14 0xf1], 1) == [0 0xff 1 0xfe 2 0xfd 3 0xfc 4 0xfb 5 0xfa 6 0xf9 7 0xf8]
function %sshr_i16x8(i16x8, i32) -> i16x8 {
block0(v0: i16x8, v1: i32):
v2 = sshr v0, v1
return v2
}
; note: because of the shifted-in sign-bit, lane 0 remains -1 == 0xffff, whereas lane 4 has been shifted to -8 == 0xfff8
; run: %ushr_i16x8([-1 2 4 8 -16 32 64 128], 1) == [-1 1 2 4 -8 16 32 64]
function %sshr_i32x4(i32x4, i32) -> i32x4 {
block0(v0: i32x4, v1: i32):
v2 = sshr v0, v1
return v2
}
; note: shifting in the sign-bit repeatedly in lane 3 fills the result with 1s (-1 == 0xffff_ffff)
; run: %ushr_i32x4([1 2 4 -8], 33) == [0 0 0 0xffff_ffff]
function %sshr_i64x2(i64x2, i32) -> i64x2 {
block0(v0:i64x2, v1:i32):
v2 = sshr v0, v1
return v2
}
; run: %sshr_i64x2([1 -1], 0) == [1 -1]
; run: %sshr_i64x2([1 -1], 1) == [0 -1] ; note the -1 shift result
; run: %sshr_i64x2([2 -2], 1) == [1 -1]
; run: %sshr_i64x2([0x80000000_00000000 0x7FFFFFFF_FFFFFFFF], 63) == [0xFFFFFFFF_FFFFFFFF 0]
function %sshr_imm_i32x4(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = sshr_imm v0, 1
return v1
}
; run: %sshr_imm_i32x4([1 2 4 -8]) == [0 1 2 -4]
function %sshr_imm_i16x8(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = sshr_imm v0, 1
return v1
}
; run: %sshr_imm_i16x8([1 2 4 -8 0 0 0 0]) == [0 1 2 -4 0 0 0 0]

View File

@@ -0,0 +1,44 @@
test run
set enable_simd
target x86_64 legacy
function %maxs_i8x16() -> b1 {
block0:
v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 will be greater than -1 == 0xff with
; signed max
v1 = vconst.i8x16 [0xff 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
v2 = x86_pmaxs v0, v1
v8 = vall_true v2
return v8
}
; run
function %maxu_i16x8() -> b1 {
block0:
v0 = vconst.i16x8 [0 1 1 1 1 1 1 1]
v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] ; -1 == 0xff will be greater with unsigned max
v2 = x86_pmaxu v0, v1
v8 = vall_true v2
return v8
}
; run
function %mins_i32x4() -> b1 {
block0:
v0 = vconst.i32x4 [0 1 1 1]
v1 = vconst.i32x4 [-1 1 1 1] ; -1 == 0xff will be less with signed min
v2 = x86_pmins v0, v1
v8 = vall_true v2
return v8
}
; run
function %minu_i8x16() -> b1 {
block0:
v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 < 2 with unsiged min
v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
v2 = x86_pminu v0, v1
v8 = vall_true v2
return v8
}
; run

View File

@@ -0,0 +1,210 @@
test run
; target aarch64 TODO: Not yet implemented on aarch64
; target s390x TODO: Not yet implemented on s390x
set enable_simd
target x86_64 machinst
set enable_simd
target x86_64 legacy
function %icmp_eq_i8x16() -> b8 {
block0:
v0 = vconst.i8x16 0x00
v1 = vconst.i8x16 0x00
v2 = icmp eq v0, v1
v3 = extractlane v2, 0
return v3
}
; run
function %icmp_eq_i64x2() -> b64 {
block0:
v0 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
v2 = icmp eq v0, v1
v3 = extractlane v2, 1
return v3
}
; run
function %icmp_ne_i32x4() -> b1 {
block0:
v0 = vconst.i32x4 [0 1 2 3]
v1 = vconst.i32x4 [7 7 7 7]
v2 = icmp ne v0, v1
v3 = vall_true v2
return v3
}
; run
function %icmp_ne_i16x8() -> b1 {
block0:
v0 = vconst.i16x8 [0 1 2 3 4 5 6 7]
v1 = vconst.i16x8 [0 1 2 3 4 5 6 7]
v2 = icmp ne v0, v1
v3 = vall_true v2
v4 = bint.i32 v3
v5 = icmp_imm eq v4, 0
return v5
}
; run
function %icmp_sgt_i8x16() -> b1 {
block0:
v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
v2 = icmp sgt v0, v1
v3 = raw_bitcast.i8x16 v2
v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
v7 = icmp eq v3, v4
v8 = vall_true v7
return v8
}
; run
function %icmp_sgt_i64x2() -> b1 {
block0:
v0 = vconst.i64x2 [0 -42]
v1 = vconst.i64x2 [-1 -43]
v2 = icmp sgt v0, v1
v8 = vall_true v2
return v8
}
; run
function %icmp_ugt_i8x16() -> b1 {
block0:
v0 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]
v1 = vconst.i8x16 [0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
v2 = icmp ugt v0, v1
v8 = vall_true v2
return v8
}
; run
function %icmp_sge_i16x8() -> b1 {
block0:
v0 = vconst.i16x8 [-1 1 2 3 4 5 6 7]
v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1]
v2 = icmp sge v0, v1
v8 = vall_true v2
return v8
}
; run
function %icmp_uge_i32x4() -> b1 {
block0:
v0 = vconst.i32x4 [1 2 3 4]
v1 = vconst.i32x4 [1 1 1 1]
v2 = icmp uge v0, v1
v8 = vall_true v2
return v8
}
; run
function %icmp_slt_i32x4() -> b1 {
block0:
v0 = vconst.i32x4 [-1 1 1 1]
v1 = vconst.i32x4 [1 2 3 4]
v2 = icmp slt v0, v1
v8 = vall_true v2
return v8
}
; run
function %icmp_ult_i32x4() -> b1 {
block0:
v0 = vconst.i32x4 [1 1 1 1]
v1 = vconst.i32x4 [-1 2 3 4] ; -1 = 0xffff... will be greater than 1 when unsigned
v2 = icmp ult v0, v1
v8 = vall_true v2
return v8
}
; run
function %icmp_ult_i16x8() -> b1 {
block0:
v0 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
v2 = icmp ult v0, v1
v3 = vconst.i16x8 0x00
v4 = raw_bitcast.i16x8 v2
v5 = icmp eq v3, v4
v8 = vall_true v5
return v8
}
; run
function %icmp_sle_i16x8() -> b1 {
block0:
v0 = vconst.i16x8 [-1 -1 0 0 0 0 0 0]
v1 = vconst.i16x8 [-1 0 0 0 0 0 0 0]
v2 = icmp sle v0, v1
v8 = vall_true v2
return v8
}
; run
function %icmp_ule_i16x8() -> b1 {
block0:
v0 = vconst.i16x8 [-1 0 0 0 0 0 0 0]
v1 = vconst.i16x8 [-1 -1 0 0 0 0 0 0]
v2 = icmp ule v0, v1
v8 = vall_true v2
return v8
}
; run
function %fcmp_eq_f32x4() -> b1 {
block0:
v0 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0]
v1 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0]
v2 = fcmp eq v0, v1
v8 = vall_true v2
return v8
}
; run
function %fcmp_lt_f32x4() -> b1 {
block0:
v0 = vconst.f32x4 [0.0 -0x4.2 0x0.0 -0.0]
v1 = vconst.f32x4 [0x0.001 0x4.2 0x0.33333 0x1.0]
v2 = fcmp lt v0, v1
v8 = vall_true v2
return v8
}
; run
function %fcmp_ge_f64x2() -> b1 {
block0:
v0 = vconst.f64x2 [0x0.0 0x4.2]
v1 = vconst.f64x2 [0.0 0x4.1]
v2 = fcmp ge v0, v1
v8 = vall_true v2
return v8
}
; run
function %fcmp_uno_f64x2() -> b1 {
block0:
v0 = vconst.f64x2 [0.0 NaN]
v1 = vconst.f64x2 [NaN 0x4.1]
v2 = fcmp uno v0, v1
v8 = vall_true v2
return v8
}
; run
function %fcmp_gt_nans_f32x4() -> b1 {
block0:
v0 = vconst.f32x4 [NaN 0x42.0 -NaN NaN]
v1 = vconst.f32x4 [NaN NaN 0x42.0 Inf]
v2 = fcmp gt v0, v1
; now check that the result v2 is all zeroes
v3 = vconst.i32x4 0x00
v4 = raw_bitcast.i32x4 v2
v5 = icmp eq v3, v4
v8 = vall_true v5
return v8
}
; run

View File

@@ -0,0 +1,41 @@
test run
target aarch64
; target s390x TODO: Not yet implemented on s390x
set enable_simd
target x86_64 machinst
set enable_simd
target x86_64 legacy
function %fcvt_from_sint(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_sint.f32x4 v0
return v1
}
; run: %fcvt_from_sint([-1 0 1 123456789]) == [-0x1.0 0.0 0x1.0 0x75bcd18.0]
; Note that 123456789 rounds to 123456792.0, an error of 3
function %fcvt_from_uint(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_uint.f32x4 v0
return v1
}
; run: %fcvt_from_uint([0 0 0 0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
; run: %fcvt_from_uint([0xFFFFFFFF 0 1 123456789]) == [0x100000000.0 0.0 0x1.0 0x75bcd18.0]
; Note that 0xFFFFFFFF is decimal 4,294,967,295 and is rounded up 1 to 4,294,967,296 in f32x4.
function %fcvt_to_sint_sat(f32x4) -> i32x4 {
block0(v0:f32x4):
v1 = fcvt_to_sint_sat.i32x4 v0
return v1
}
; run: %fcvt_to_sint_sat([0x0.0 -0x1.0 0x1.0 0x1.0p100]) == [0 -1 1 0x7FFFFFFF]
; run: %fcvt_to_sint_sat([-0x8.1 0x0.0 0x0.0 -0x1.0p100]) == [-8 0 0 0x80000000]
function %fcvt_to_uint_sat(f32x4) -> i32x4 {
block0(v0:f32x4):
v1 = fcvt_to_uint_sat.i32x4 v0
return v1
}
; run: %fcvt_to_uint_sat([0x1.0 0x4.2 0x4.6 0x1.0p100]) == [1 4 4 0xFFFFFFFF]
; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0]
; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0]

View File

@@ -0,0 +1,221 @@
test run
set enable_simd
target x86_64 legacy
function %shuffle_different_ssa_values() -> b1 {
block0:
v0 = vconst.i8x16 0x00
v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42]
v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31] ; use the first lane of v0 throughout except use the last lane of v1
v3 = extractlane.i8x16 v2, 15
v4 = iconst.i8 42
v5 = icmp eq v3, v4
return v5
}
; run
function %shuffle_same_ssa_value() -> b1 {
block0:
v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000 ; note where lane 15 is when written with hexadecimal syntax
v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f ; use the last lane of v0 to fill all lanes
v2 = extractlane.i8x16 v1, 4
v3 = iconst.i8 0x01
v4 = icmp eq v2, v3
return v4
}
; run
function %compare_shuffle() -> b1 {
block0:
v1 = vconst.i32x4 [0 1 2 3]
v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
; keep each lane in place from the first vector
v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
v4 = raw_bitcast.i32x4 v3
v5 = extractlane.i32x4 v4, 3
v6 = icmp_imm eq v5, 3
v7 = extractlane.i32x4 v4, 0
v8 = icmp_imm eq v7, 0
v9 = band v6, v8
return v9
}
; run
function %compare_shuffle() -> b32 {
block0:
v1 = vconst.b32x4 [true false true false]
v2 = raw_bitcast.b8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
; pair up the true values to make the entire vector true
v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11]
v4 = raw_bitcast.b32x4 v3
v5 = extractlane v4, 3
v6 = extractlane v4, 0
v7 = band v5, v6
return v7
}
; run
; TODO once SIMD vector comparison is implemented, remove use of extractlane below
function %insertlane_b8() -> b8 {
block0:
v1 = bconst.b8 true
v2 = vconst.b8x16 [false false false false false false false false false false false false false
false false false]
v3 = insertlane v2, v1, 10
v4 = extractlane v3, 10
return v4
}
; run
function %insertlane_f32() -> b1 {
block0:
v0 = f32const 0x42.42
v1 = vconst.f32x4 0x00
v2 = insertlane v1, v0, 1
v3 = extractlane v2, 1
v4 = fcmp eq v3, v0
return v4
}
; run
function %insertlane_f64_lane1() -> b1 {
block0:
v0 = f64const 0x42.42
v1 = vconst.f64x2 0x00
v2 = insertlane v1, v0, 1
v3 = extractlane v2, 1
v4 = fcmp eq v3, v0
return v4
}
; run
function %insertlane_f64_lane0() -> b1 {
block0:
v0 = f64const 0x42.42
v1 = vconst.f64x2 0x00
v2 = insertlane v1, v0, 0
v3 = extractlane v2, 0
v4 = fcmp eq v3, v0
return v4
}
; run
function %extractlane_b8() -> b8 {
block0:
v1 = vconst.b8x16 [false false false false false false false false false false true false false
false false false]
v2 = extractlane v1, 10
return v2
}
; run
function %extractlane_i16() -> b1 {
block0:
v0 = vconst.i16x8 0x00080007000600050004000300020001
v1 = extractlane v0, 1
v2 = icmp_imm eq v1, 2
return v2
}
; run
function %extractlane_f32() -> b1 {
block0:
v0 = f32const 0x42.42
v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42]
v2 = extractlane v1, 3
v3 = fcmp eq v2, v0
return v3
}
; run
function %extractlane_i32_with_vector_reuse() -> b1 {
block0:
v0 = iconst.i32 42
v1 = iconst.i32 99
v2 = splat.i32x4 v0
v3 = insertlane v2, v1, 2
v4 = extractlane v3, 3
v5 = icmp eq v4, v0
v6 = extractlane v3, 2
v7 = icmp eq v6, v1
v8 = band v5, v7
return v8
}
; run
function %extractlane_f32_with_vector_reuse() -> b1 {
block0:
v0 = f32const 0x42.42
v1 = f32const 0x99.99
v2 = splat.f32x4 v0
v3 = insertlane v2, v1, 2
v4 = extractlane v3, 3
v5 = fcmp eq v4, v0
v6 = extractlane v3, 2
v7 = fcmp eq v6, v1
v8 = band v5, v7
return v8
}
; run
function %swizzle() -> b1 {
block0:
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
v1 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 42]
v2 = swizzle.i8x16 v0, v1 ; reverse the lanes, with over-large index 42 using lane 0
v3 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run
function %swizzle_with_overflow() -> b1 {
block0:
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
v1 = vconst.i8x16 [16 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
v2 = swizzle.i8x16 v0, v1 ; 250 should overflow but saturate so that the MSB is set (PSHUFB uses this to shuffle from lane 0)
v3 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run
function %unpack_low() -> b1 {
block0:
v0 = vconst.i32x4 [0 1 2 3]
v1 = vconst.i32x4 [4 5 6 7]
v2 = x86_punpckl v0, v1
v3 = vconst.i32x4 [0 4 1 5]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run
function %snarrow(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = snarrow v0, v1
return v2
}
; run: %snarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 -1 0x7fff 4 5 -6 0xffff]
function %unarrow(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = unarrow v0, v1
return v2
}
; run: %unarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 0 0xffff 4 5 0 0]

View File

@@ -0,0 +1,211 @@
test run
; target aarch64 TODO: Not yet implemented on aarch64
; target s390x TODO: Not yet implemented on s390x
set enable_simd
target x86_64 machinst
;; shuffle
function %shuffle_different_ssa_values() -> i8x16 {
block0:
v0 = vconst.i8x16 0x00
v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42]
v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31] ; use the first lane of v0 throughout except use the last lane of v1
return v2
}
; run: %shuffle_different_ssa_values() == [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42]
function %shuffle_same_ssa_value() -> i8x16 {
block0:
v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000 ; note where lane 15 is when written with hexadecimal syntax
v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f ; use the last lane of v0 to fill all lanes
return v1
}
; run: %shuffle_same_ssa_value() == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
function %shuffle_i32x4_in_same_place() -> i32x4 {
block0:
v1 = vconst.i32x4 [0 1 2 3]
v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
; keep each lane in place from the first vector
v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
v4 = raw_bitcast.i32x4 v3
return v4
}
; run: %shuffle_in_same_place() == [0 1 2 3]
function %shuffle_b32x4_to_all_true() -> i32x4 {
block0:
v1 = vconst.b32x4 [true false true false]
v2 = raw_bitcast.b8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
; pair up the true values to make the entire vector true
v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11]
v4 = raw_bitcast.i32x4 v3 ; TODO store.b32x4 is unavailable; see https://github.com/bytecodealliance/wasmtime/issues/2237
return v4
}
; run: %shuffle_b32x4_to_all_true() == [0xffffffff 0xffffffff 0xffffffff 0xffffffff]
;; swizzle
function %swizzle(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = swizzle.i8x16 v0, v1
return v2
}
; reverse the lanes, with over-large index 42 using lane 0
; run: %swizzle([0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15], [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 42]) == [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
; 250 should overflow but saturate so that the MSB is set (PSHUFB uses this to shuffle from lane 0)
; run: %swizzle([0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15], [16 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0]) == [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
;; insertlane
function %insertlane_i8x16_first(i8x16, i8) -> i8x16 {
block0(v1: i8x16, v2: i8):
v3 = insertlane v1, v2, 0
return v3
}
; run: %insertlane_i8x16_first([0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], 0xff) == [0xff 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
function %insertlane_f32x4_second(f32x4, f32) -> f32x4 {
block0(v1: f32x4, v2: f32):
v3 = insertlane v1, v2, 1
return v3
}
; run: %insertlane_f32x4_second([0.0 0.0 0.0 0.0], 0x42.42) == [0.0 0x42.42 0.0 0.0]
function %insertlane_f64x2_first(f64x2, f64) -> f64x2 {
block0(v1: f64x2, v2: f64):
v3 = insertlane v1, v2, 0
return v3
}
; run: %insertlane_f64x2_first([0.0 0.0], 0x42.42) == [0x42.42 0.0]
function %insertlane_f64x2_second(f64x2, f64) -> f64x2 {
block0(v1: f64x2, v2: f64):
v3 = insertlane v1, v2, 1
return v3
}
; run: %insertlane_f64x2_second([0.0 0.0], 0x42.42) == [0.0 0x42.42]
;; extractlane
function %extractlane_b8x16() -> i8 {
block0:
v1 = vconst.b8x16 [false false false false false false false false false false true false false
false false false]
v2 = extractlane v1, 10
v3 = raw_bitcast.i8 v2
return v3
}
; run: %extractlane_b8x16_last() == 0xff
function %extractlane_i16x8_second(i16x8) -> i16 {
block0(v0: i16x8):
v1 = extractlane v0, 1
return v1
}
; run: %extractlane_i16x8_second(0x00080007000600050004000300020001) == 2
function %extractlane_f32x4_last(f32x4) -> f32 {
block0(v0: f32x4):
v1 = extractlane v0, 3
return v1
}
; run: %extractlane_f32x4_last([0x00.00 0x00.00 0x00.00 0x42.42]) == 0x42.42
function %extractlane_i32_with_vector_reuse() -> b1 {
block0:
v0 = iconst.i32 42
v1 = iconst.i32 99
v2 = vconst.i32x4 [42 42 42 42]
v3 = insertlane v2, v1, 2
v4 = extractlane v3, 3
v5 = icmp eq v4, v0
v6 = extractlane v3, 2
v7 = icmp eq v6, v1
v8 = band v5, v7
return v8
}
; run
function %extractlane_f32_with_vector_reuse() -> b1 {
block0:
v0 = f32const 0x42.42
v1 = f32const 0x99.99
v2 = vconst.f32x4 [0x42.42 0x42.42 0x42.42 0x42.42]
v3 = insertlane v2, v1, 2
v4 = extractlane v3, 3
v5 = fcmp eq v4, v0
v6 = extractlane v3, 2
v7 = fcmp eq v6, v1
v8 = band v5, v7
return v8
}
; run
;; splat
function %splat_i64x2() -> b1 {
block0:
v0 = iconst.i64 -1
v1 = splat.i64x2 v0
v2 = vconst.i64x2 [-1 -1]
v3 = icmp eq v1, v2
v8 = vall_true v3
return v8
}
; run
function %splat_i8(i8) -> i8x16 {
block0(v0: i8):
v1 = splat.i8x16 v0
return v1
}
; run: %splat_i8(0xff) == [0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff]
function %splat_i32(i32) -> i32x4 {
block0(v0: i32):
v1 = splat.i32x4 v0
return v1
}
; run: %splat_i32(42) == [42 42 42 42]
function %splat_f64(f64) -> f64x2 {
block0(v0: f64):
v1 = splat.f64x2 v0
return v1
}
; run: %splat_f64(-0x1.1) == [-0x1.1 -0x1.1]
; narrow
function %snarrow(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = snarrow v0, v1
return v2
}
; run: %snarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 -1 0x7fff 4 5 -6 0xffff]
function %unarrow(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = unarrow v0, v1
return v2
}
; run: %unarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 0 0xffff 4 5 0 0]

View File

@@ -0,0 +1,63 @@
test run
target aarch64
; target s390x TODO: Not yet implemented on s390x
set enable_simd
target x86_64 machinst
set enable_simd
target x86_64 legacy skylake
function %bnot() -> b32 {
block0:
v0 = vconst.b32x4 [true true true false]
v1 = bnot v0
v2 = extractlane v1, 3
return v2
}
; run
function %band_not() -> b1 {
block0:
v0 = vconst.i16x8 [1 0 0 0 0 0 0 0]
v1 = vconst.i16x8 [0 0 0 0 0 0 0 0]
v2 = band_not v0, v1
v3 = extractlane v2, 0
v4 = icmp_imm eq v3, 1
return v4
}
; run
function %vany_true_i16x8() -> b1 {
block0:
v0 = vconst.i16x8 [1 0 0 0 0 0 0 0]
v1 = vany_true v0
return v1
}
; run
function %vany_true_b32x4() -> b1 {
block0:
v0 = vconst.b32x4 [false false false false]
v1 = vany_true v0
v2 = bint.i32 v1
v3 = icmp_imm eq v2, 0
return v3
}
; run
function %vall_true_i16x8() -> b1 {
block0:
v0 = vconst.i16x8 [1 0 0 0 0 0 0 0]
v1 = vall_true v0
v2 = bint.i32 v1
v3 = icmp_imm eq v2, 0
return v3
}
; run
function %vall_true_b32x4() -> b1 {
block0:
v0 = vconst.b32x4 [true true true true]
v1 = vall_true v0
return v1
}
; run

View File

@@ -0,0 +1,46 @@
test run
set enable_simd
target x86_64 legacy
function %vconst_syntax() -> b1 {
block0:
v0 = vconst.i32x4 0x00000004_00000003_00000002_00000001 ; build constant using hexadecimal syntax
v1 = vconst.i32x4 [1 2 3 4] ; build constant using literal list syntax
; verify lane 1 matches
v2 = extractlane v0, 1
v3 = extractlane v1, 1
v4 = icmp eq v3, v2
; verify lane 1 has the correct value
v5 = icmp_imm eq v3, 2
v6 = band v4, v5
return v6
}
; run
; Since both jump tables and constants are emitted after the function body, it is important that any RIP-relative
; addressing of constants is not incorrect in the presence of jump tables. This test confirms that, even when both
; jump tables and constants are emitted, the constant addressing works correctly.
function %vconst_with_jumptables() -> b1 {
jt0 = jump_table [block0]
block10:
v10 = iconst.i64 0
br_table v10, block1, jt0
block0:
v0 = iconst.i64 100
jump block11(v0)
block1:
v1 = iconst.i64 101
jump block11(v1)
block11(v11: i64):
v12 = icmp_imm eq v11, 100 ; We should have jumped through block 0.
v13 = vconst.i32x4 [1 2 3 4]
v14 = extractlane.i32x4 v13, 1 ; Extract the second element...
v15 = icmp_imm eq v14, 2 ; ...which should be the value 2.
v16 = band v12, v15
return v16
}
; run

View File

@@ -0,0 +1,40 @@
test run
; target s390x TODO: Not yet implemented on s390x
; target aarch64 TODO: Not yet implemented on aarch64
set enable_simd
target x86_64 machinst
set enable_simd
target x86_64 legacy
set enable_simd
target x86_64 legacy skylake
function %vconst_zeroes() -> b1 {
block0:
v0 = vconst.i8x16 0x00
v1 = extractlane v0, 4
v2 = icmp_imm eq v1, 0
return v2
}
; run
function %vconst_ones() -> b1 {
block0:
v0 = vconst.i8x16 0xffffffffffffffffffffffffffffffff
v1 = extractlane v0, 2
v2 = icmp_imm eq v1, 0xff
return v2
}
; run
function %splat_i64x2() -> b1 {
block0:
v0 = iconst.i64 -1
v1 = splat.i64x2 v0
v2 = vconst.i64x2 [-1 -1]
v3 = icmp eq v1, v2
v8 = vall_true v3
return v8
}
; run

View File

@@ -0,0 +1,47 @@
test run
; target s390x TODO: Not yet implemented on s390x
target aarch64
set enable_simd
target x86_64 machinst
set enable_simd
target x86_64 legacy haswell
function %vselect_i8x16() -> i8x16 {
block0:
v1 = vconst.b8x16 [false true false true false true true true true true false false false false false false]
v2 = vconst.i8x16 [100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115]
v3 = vconst.i8x16 [200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215]
v4 = vselect v1, v2, v3
return v4
}
; run: %vselect_i8x16() == [200 101 202 103 204 105 106 107 108 109 210 211 212 213 214 215]
function %vselect_i16x8() -> i16x8 {
block0:
v1 = vconst.b16x8 [false true false true false true true true]
v2 = vconst.i16x8 [100 101 102 103 104 105 106 107]
v3 = vconst.i16x8 [200 201 202 203 204 205 206 207]
v4 = vselect v1, v2, v3
return v4
}
; run: %vselect_i16x8() == [200 101 202 103 204 105 106 107]
function %vselect_i32x4() -> i32x4 {
block0:
v1 = vconst.b32x4 [false true false true]
v2 = vconst.i32x4 [100 101 102 103]
v3 = vconst.i32x4 [200 201 202 203]
v4 = vselect v1, v2, v3
return v4
}
; run: %vselect_i32x4() == [200 101 202 103]
function %vselect_i64x2() -> i64x2 {
block0:
v1 = vconst.b64x2 [false true]
v2 = vconst.i64x2 [100 101]
v3 = vconst.i64x2 [200 201]
v4 = vselect v1, v2, v3
return v4
}
; run: %vselect_i64x2() == [200 101]

View File

@@ -0,0 +1,40 @@
test run
target s390x
target aarch64
target x86_64 machinst
target x86_64 legacy
function %f(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i64 {
block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32, v9: i32, v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32):
v20 = iadd.i32 v0, v1
v21 = iadd.i32 v2, v3
v22 = iadd.i32 v4, v5
v23 = iadd.i32 v6, v7
v24 = iadd.i32 v8, v9
v25 = iadd.i32 v10, v11
v26 = iadd.i32 v12, v13
v27 = iadd.i32 v14, v15
v28 = iadd.i32 v16, v17
v29 = iadd.i32 v18, v19
v30 = iadd.i32 v20, v21
v31 = iadd.i32 v22, v23
v32 = iadd.i32 v24, v25
v33 = iadd.i32 v26, v27
v34 = iadd.i32 v28, v29
v35 = iadd.i32 v30, v31
v36 = iadd.i32 v32, v33
v37 = iadd.i32 v35, v34
v38 = iadd.i32 v36, v37
;; v38 should be zero (due to wrapping).
v39 = iconst.i64 1
v40 = uextend.i64 v0 ;; should be reloaded from a spillslot
v41 = uextend.i64 v38
v42 = iadd.i64 v39, v40
v43 = iadd.i64 v42, v41
return v43
}
; run: %f(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000) == 0x80000001