cranelift: Cleanup SIMD icmp tests (#5530)

* cranelift: Enable more SIMD tests * cranelift: Reorganize icmp tests * cranelift: Enable SIMD icmp tests for unsigned ops * cranelift: Cleanup trailing newlines
2023-01-05 17:19:03 +00:00
parent 7bfbec1b57
commit ee6a909ccb
16 changed files with 323 additions and 236 deletions
--- a/cranelift/filetests/filetests/runtests/simd-comparison.clif
+++ b/cranelift/filetests/filetests/runtests/simd-comparison.clif
@@ -1,208 +0,0 @@
-test run
-target aarch64
-target s390x
-set enable_simd
-target x86_64 has_sse3 has_ssse3 has_sse41
-
-function %icmp_eq_i8x16() -> i8 {
-block0:
-    v0 = vconst.i8x16 0x00
-    v1 = vconst.i8x16 0x00
-    v2 = icmp eq v0, v1
-    v3 = extractlane v2, 0
-    return v3
-}
-; run
-
-function %icmp_eq_i64x2() -> i64 {
-block0:
-    v0 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
-    v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
-    v2 = icmp eq v0, v1
-    v3 = extractlane v2, 1
-    return v3
-}
-; run
-
-function %icmp_ne_i32x4() -> i8 {
-block0:
-    v0 = vconst.i32x4 [0 1 2 3]
-    v1 = vconst.i32x4 [7 7 7 7]
-    v2 = icmp ne v0, v1
-    v3 = vall_true v2
-    return v3
-}
-; run
-
-function %icmp_ne_i16x8() -> i8 {
-block0:
-    v0 = vconst.i16x8 [0 1 2 3 4 5 6 7]
-    v1 = vconst.i16x8 [0 1 2 3 4 5 6 7]
-    v2 = icmp ne v0, v1
-    v3 = vall_true v2
-    v4 = uextend.i32 v3
-    v5 = icmp_imm eq v4, 0
-    return v5
-}
-; run
-
-function %icmp_sgt_i8x16() -> i8 {
-block0:
-    v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
-    v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
-    v2 = icmp sgt v0, v1
-    v3 = bitcast.i8x16 v2
-    v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
-    v7 = icmp eq v3, v4
-    v8 = vall_true v7
-    return v8
-}
-; run
-
-function %icmp_sgt_i64x2() -> i8 {
-block0:
-    v0 = vconst.i64x2 [0 -42]
-    v1 = vconst.i64x2 [-1 -43]
-    v2 = icmp sgt v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run
-
-function %icmp_ugt_i8x16() -> i8 {
-block0:
-    v0 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]
-    v1 = vconst.i8x16 [0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
-    v2 = icmp ugt v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run
-
-function %icmp_sge_i16x8() -> i8 {
-block0:
-    v0 = vconst.i16x8 [-1 1 2 3 4 5 6 7]
-    v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1]
-    v2 = icmp sge v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run
-
-function %icmp_uge_i32x4() -> i8 {
-block0:
-    v0 = vconst.i32x4 [1 2 3 4]
-    v1 = vconst.i32x4 [1 1 1 1]
-    v2 = icmp uge v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run
-
-function %icmp_slt_i32x4() -> i8 {
-block0:
-    v0 = vconst.i32x4 [-1 1 1 1]
-    v1 = vconst.i32x4 [1 2 3 4]
-    v2 = icmp slt v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run
-
-function %icmp_ult_i32x4() -> i8 {
-block0:
-    v0 = vconst.i32x4 [1 1 1 1]
-    v1 = vconst.i32x4 [-1 2 3 4] ; -1 = 0xffff... will be greater than 1 when unsigned
-    v2 = icmp ult v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run
-
-
-function %icmp_ult_i16x8() -> i8 {
-block0:
-    v0 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
-    v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
-    v2 = icmp ult v0, v1
-    v3 = vconst.i16x8 0x00
-    v4 = bitcast.i16x8 v2
-    v5 = icmp eq v3, v4
-    v8 = vall_true v5
-    return v8
-}
-; run
-
-function %icmp_sle_i16x8() -> i8 {
-block0:
-    v0 = vconst.i16x8 [-1 -1 0 0 0 0 0 0]
-    v1 = vconst.i16x8 [-1  0 0 0 0 0 0 0]
-    v2 = icmp sle v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run
-
-function %icmp_ule_i16x8() -> i8 {
-block0:
-    v0 = vconst.i16x8 [-1  0 0 0 0 0 0 0]
-    v1 = vconst.i16x8 [-1 -1 0 0 0 0 0 0]
-    v2 = icmp ule v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run
-
-function %fcmp_eq_f32x4() -> i8 {
-block0:
-    v0 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0]
-    v1 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0]
-    v2 = fcmp eq v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run
-
-function %fcmp_lt_f32x4() -> i8 {
-block0:
-    v0 = vconst.f32x4 [0.0      -0x4.2  0x0.0       -0.0]
-    v1 = vconst.f32x4 [0x0.001  0x4.2   0x0.33333   0x1.0]
-    v2 = fcmp lt v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run
-
-function %fcmp_ge_f64x2() -> i8 {
-block0:
-    v0 = vconst.f64x2 [0x0.0  0x4.2]
-    v1 = vconst.f64x2 [0.0    0x4.1]
-    v2 = fcmp ge v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run
-
-function %fcmp_uno_f64x2() -> i8 {
-block0:
-    v0 = vconst.f64x2 [0.0  NaN]
-    v1 = vconst.f64x2 [NaN  0x4.1]
-    v2 = fcmp uno v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run
-
-function %fcmp_gt_nans_f32x4() -> i8 {
-block0:
-    v0 = vconst.f32x4 [NaN 0x42.0 -NaN NaN]
-    v1 = vconst.f32x4 [NaN NaN 0x42.0 Inf]
-    v2 = fcmp gt v0, v1
-    ; now check that the result v2 is all zeroes
-    v3 = vconst.i32x4 0x00
-    v4 = bitcast.i32x4 v2
-    v5 = icmp eq v3, v4
-    v8 = vall_true v5
-    return v8
-}
-; run
--- a/cranelift/filetests/filetests/runtests/simd-fcmp.clif
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp.clif
@@ -0,0 +1,60 @@
+test run
+target aarch64
+target s390x
+set enable_simd
+target x86_64 has_sse3 has_ssse3 has_sse41
+
+
+function %fcmp_eq_f32x4() -> i8 {
+block0:
+    v0 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0]
+    v1 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0]
+    v2 = fcmp eq v0, v1
+    v8 = vall_true v2
+    return v8
+}
+; run: %fcmp_eq_f32x4() == 1
+
+function %fcmp_lt_f32x4() -> i8 {
+block0:
+    v0 = vconst.f32x4 [0.0      -0x4.2  0x0.0       -0.0]
+    v1 = vconst.f32x4 [0x0.001  0x4.2   0x0.33333   0x1.0]
+    v2 = fcmp lt v0, v1
+    v8 = vall_true v2
+    return v8
+}
+; run: %fcmp_lt_f32x4() == 1
+
+function %fcmp_ge_f64x2() -> i8 {
+block0:
+    v0 = vconst.f64x2 [0x0.0  0x4.2]
+    v1 = vconst.f64x2 [0.0    0x4.1]
+    v2 = fcmp ge v0, v1
+    v8 = vall_true v2
+    return v8
+}
+; run: %fcmp_ge_f64x2() == 1
+
+function %fcmp_uno_f64x2() -> i8 {
+block0:
+    v0 = vconst.f64x2 [0.0  NaN]
+    v1 = vconst.f64x2 [NaN  0x4.1]
+    v2 = fcmp uno v0, v1
+    v8 = vall_true v2
+    return v8
+}
+; run: %fcmp_uno_f64x2() == 1
+
+function %fcmp_gt_nans_f32x4() -> i8 {
+block0:
+    v0 = vconst.f32x4 [NaN 0x42.0 -NaN NaN]
+    v1 = vconst.f32x4 [NaN NaN 0x42.0 Inf]
+    v2 = fcmp gt v0, v1
+    ; now check that the result v2 is all zeroes
+    v3 = vconst.i32x4 0x00
+    v4 = bitcast.i32x4 v2
+    v5 = icmp eq v3, v4
+    v8 = vall_true v5
+    return v8
+}
+; run: %fcmp_gt_nans_f32x4() == 1
--- a/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif
@@ -1,4 +1,8 @@
 test interpret
+test run
+target x86_64
+target aarch64
+target s390x

 function %simd_icmp_eq_i8(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -28,3 +32,25 @@ block0(v0: i64x2, v1: i64x2):
 }
 ; run: %simd_icmp_eq_i64([10 0], [1 0]) == [0 -1]
 ; run: %simd_icmp_eq_i64([-1 1], [-1 0]) == [-1 0]
+
+
+function %icmp_eq_const_i8x16() -> i8 {
+block0:
+    v0 = vconst.i8x16 0x00
+    v1 = vconst.i8x16 0x00
+    v2 = icmp eq v0, v1
+    v3 = extractlane v2, 0
+    return v3
+}
+; run: %icmp_eq_const_i8x16() == -1
+
+
+function %icmp_eq_const_i64x2() -> i64 {
+block0:
+    v0 = vconst.i64x2 0xf1ffffffffffffffffffffffffffffff
+    v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
+    v2 = icmp eq v0, v1
+    v3 = extractlane v2, 1
+    return v3
+}
+; run: %icmp_eq_const_i64x2()  == 0
--- a/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif
@@ -1,4 +1,8 @@
 test interpret
+test run
+target x86_64
+target aarch64
+target s390x

 function %simd_icmp_ne_i8(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -28,3 +32,27 @@ block0(v0: i64x2, v1: i64x2):
 }
 ; run: %simd_icmp_ne_i64([10 0], [1 0]) == [-1 0]
 ; run: %simd_icmp_ne_i64([-1 1], [-1 0]) == [0 -1]
+
+
+function %icmp_ne_const_i32x4() -> i8 {
+block0:
+    v0 = vconst.i32x4 [0 1 2 3]
+    v1 = vconst.i32x4 [7 7 7 7]
+    v2 = icmp ne v0, v1
+    v3 = vall_true v2
+    return v3
+}
+; run: %icmp_ne_const_i32x4() == 1
+
+
+function %icmp_ne_const_i16x8() -> i8 {
+block0:
+    v0 = vconst.i16x8 [0 1 2 3 4 5 6 7]
+    v1 = vconst.i16x8 [0 1 2 3 4 5 6 7]
+    v2 = icmp ne v0, v1
+    v3 = vall_true v2
+    v4 = uextend.i32 v3
+    v5 = icmp_imm eq v4, 0
+    return v5
+}
+; run: %icmp_ne_const_i16x8() == 1
--- a/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif
@@ -1,4 +1,8 @@
 test interpret
+test run
+target x86_64
+target aarch64
+target s390x

 function %simd_icmp_sge_i8(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -31,3 +35,14 @@ block0(v0: i64x2, v1: i64x2):
 ; run: %simd_icmp_sge_i64([-1 0], [-1 1]) == [-1 0]
 ; run: %simd_icmp_sge_i64([-5 1], [-1 1]) == [0 -1]
 ; run: %simd_icmp_sge_i64([0 0], [0 0]) == [-1 -1]
+
+
+function %icmp_sge_const_i16x8() -> i8 {
+block0:
+    v0 = vconst.i16x8 [-1 1 2 3 4 5 6 7]
+    v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1]
+    v2 = icmp sge v0, v1
+    v8 = vall_true v2
+    return v8
+}
+; run: %icmp_sge_const_i16x8() == 1
--- a/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif
@@ -1,4 +1,8 @@
 test interpret
+test run
+target x86_64
+target aarch64
+target s390x

 function %simd_icmp_sgt_i8(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -33,3 +37,28 @@ block0(v0: i64x2, v1: i64x2):
 ; run: %simd_icmp_sgt_i64([-1 0], [-1 1]) == [0 0]
 ; run: %simd_icmp_sgt_i64([-5 1], [-1 -1]) == [0 -1]
 ; run: %simd_icmp_sgt_i64([0 0], [0 0]) == [0 0]
+
+
+function %icmp_sgt_const_i8x16() -> i8 {
+block0:
+    v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
+    v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
+    v2 = icmp sgt v0, v1
+    v3 = bitcast.i8x16 v2
+    v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
+    v7 = icmp eq v3, v4
+    v8 = vall_true v7
+    return v8
+}
+; run: %icmp_sgt_const_i8x16() == 1
+
+
+function %icmp_sgt_const_i64x2() -> i8 {
+block0:
+    v0 = vconst.i64x2 [0 -42]
+    v1 = vconst.i64x2 [-1 -43]
+    v2 = icmp sgt v0, v1
+    v8 = vall_true v2
+    return v8
+}
+; run: %icmp_sgt_const_i64x2() == 1
--- a/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif
@@ -1,4 +1,8 @@
 test interpret
+test run
+target x86_64
+target aarch64
+target s390x

 function %simd_icmp_sle_i8(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -33,3 +37,15 @@ block0(v0: i64x2, v1: i64x2):
 ; run: %simd_icmp_sle_i64([-1 0], [-1 1]) == [-1 -1]
 ; run: %simd_icmp_sle_i64([-5 1], [-1 -1]) == [-1 0]
 ; run: %simd_icmp_sle_i64([0 0], [0 0]) == [-1 -1]
+
+
+
+function %icmp_sle_const_i16x8() -> i8 {
+block0:
+    v0 = vconst.i16x8 [-1 -1 0 0 0 0 0 0]
+    v1 = vconst.i16x8 [-1  0 0 0 0 0 0 0]
+    v2 = icmp sle v0, v1
+    v8 = vall_true v2
+    return v8
+}
+; run: %icmp_sle_const_i16x8() == 1
--- a/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif
@@ -1,4 +1,8 @@
 test interpret
+test run
+target x86_64
+target aarch64
+target s390x

 function %simd_icmp_slt_i8(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -31,3 +35,14 @@ block0(v0: i64x2, v1: i64x2):
 ; run: %simd_icmp_slt_i64([-1 0], [-1 1]) == [0 -1]
 ; run: %simd_icmp_slt_i64([-5 1], [-1 1]) == [-1 0]
 ; run: %simd_icmp_slt_i64([0 0], [0 0]) == [0 0]
+
+
+function %icmp_slt_const_i32x4() -> i8 {
+block0:
+    v0 = vconst.i32x4 [-1 1 1 1]
+    v1 = vconst.i32x4 [1 2 3 4]
+    v2 = icmp slt v0, v1
+    v8 = vall_true v2
+    return v8
+}
+; run: %icmp_slt_const_i32x4() == 1
--- a/cranelift/filetests/filetests/runtests/simd-icmp-uge-i64x2.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-uge-i64x2.clif
@@ -0,0 +1,17 @@
+test interpret
+test run
+target aarch64
+target s390x
+
+; TODO: Move this to the main file once x86_64 supports this operation
+; See: #5529
+
+function %simd_icmp_uge_i64(i64x2, i64x2) -> i64x2 {
+block0(v0: i64x2, v1: i64x2):
+    v2 = icmp uge v0, v1
+    return v2
+}
+; run: %simd_icmp_uge_i64([0 1], [0 0]) == [-1 -1]
+; run: %simd_icmp_uge_i64([-1 0], [-1 1]) == [-1 0]
+; run: %simd_icmp_uge_i64([-5 1], [-1 -1]) == [0 0]
+; run: %simd_icmp_uge_i64([0 0], [0 0]) == [-1 -1]
--- a/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif
@@ -1,4 +1,8 @@
 test interpret
+test run
+target aarch64
+target s390x
+target x86_64

 function %simd_icmp_uge_i8(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -22,12 +26,14 @@ block0(v0: i32x4, v1: i32x4):
 ; run: %simd_icmp_uge_i32([0 1 -1 0], [0 0 -1 1]) == [-1 -1 -1 0]
 ; run: %simd_icmp_uge_i32([-5 1 0 0], [-1 -1 0 0]) == [0 0 -1 -1]

-function %simd_icmp_uge_i64(i64x2, i64x2) -> i64x2 {
-block0(v0: i64x2, v1: i64x2):
+
+
+function %icmp_uge_const_i32x4() -> i8 {
+block0:
+    v0 = vconst.i32x4 [1 2 3 4]
+    v1 = vconst.i32x4 [1 1 1 1]
    v2 = icmp uge v0, v1
-    return v2
+    v8 = vall_true v2
+    return v8
 }
-; run: %simd_icmp_uge_i64([0 1], [0 0]) == [-1 -1]
-; run: %simd_icmp_uge_i64([-1 0], [-1 1]) == [-1 0]
-; run: %simd_icmp_uge_i64([-5 1], [-1 -1]) == [0 0]
-; run: %simd_icmp_uge_i64([0 0], [0 0]) == [-1 -1]
+; run: %icmp_uge_const_i32x4() == 1
--- a/cranelift/filetests/filetests/runtests/simd-icmp-ugt-i64x2.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-ugt-i64x2.clif
@@ -0,0 +1,17 @@
+test interpret
+test run
+target aarch64
+target s390x
+
+; TODO: Move this to the main file once x86_64 supports this operation
+; See: #5529
+
+function %simd_icmp_ugt_i64(i64x2, i64x2) -> i64x2 {
+block0(v0: i64x2, v1: i64x2):
+    v2 = icmp ugt v0, v1
+    return v2
+}
+; run: %simd_icmp_ugt_i64([0 1], [0 0]) == [0 -1]
+; run: %simd_icmp_ugt_i64([-1 0], [-1 1]) == [0 0]
+; run: %simd_icmp_ugt_i64([-5 1], [-1 -1]) == [0 0]
+; run: %simd_icmp_ugt_i64([0 0], [0 0]) == [0 0]
--- a/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif
@@ -1,4 +1,8 @@
 test interpret
+test run
+target aarch64
+target s390x
+target x86_64

 function %simd_icmp_ugt_i8(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -22,12 +26,13 @@ block0(v0: i32x4, v1: i32x4):
 ; run: %simd_icmp_ugt_i32([0 1 -1 0], [0 0 -1 1]) == [0 -1 0 0]
 ; run: %simd_icmp_ugt_i32([-5 1 0 0], [-1 -1 0 0]) == [0 0 0 0]

-function %simd_icmp_ugt_i64(i64x2, i64x2) -> i64x2 {
-block0(v0: i64x2, v1: i64x2):
+
+function %icmp_ugt_const_i8x16() -> i8 {
+block0:
+    v0 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]
+    v1 = vconst.i8x16 [0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
    v2 = icmp ugt v0, v1
-    return v2
+    v8 = vall_true v2
+    return v8
 }
-; run: %simd_icmp_ugt_i64([0 1], [0 0]) == [0 -1]
-; run: %simd_icmp_ugt_i64([-1 0], [-1 1]) == [0 0]
-; run: %simd_icmp_ugt_i64([-5 1], [-1 -1]) == [0 0]
-; run: %simd_icmp_ugt_i64([0 0], [0 0]) == [0 0]
+; run: %icmp_ugt_const_i8x16() == 1
--- a/cranelift/filetests/filetests/runtests/simd-icmp-ule-i64x2.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-ule-i64x2.clif
@@ -0,0 +1,17 @@
+test interpret
+test run
+target aarch64
+target s390x
+
+; TODO: Move this to the main file once x86_64 supports this operation
+; See: #5529
+
+function %simd_icmp_ule_i64(i64x2, i64x2) -> i64x2 {
+block0(v0: i64x2, v1: i64x2):
+    v2 = icmp ule v0, v1
+    return v2
+}
+; run: %simd_icmp_ule_i64([0 1], [0 0]) == [-1 0]
+; run: %simd_icmp_ule_i64([-1 0], [-1 1]) == [-1 -1]
+; run: %simd_icmp_ule_i64([-5 1], [-1 -1]) == [-1 -1]
+; run: %simd_icmp_ule_i64([0 0], [0 0]) == [-1 -1]
--- a/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif
@@ -1,4 +1,8 @@
 test interpret
+test run
+target aarch64
+target s390x
+target x86_64

 function %simd_icmp_ule_i8(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -22,12 +26,15 @@ block0(v0: i32x4, v1: i32x4):
 ; run: %simd_icmp_ule_i32([0 1 -1 0], [0 0 -1 1]) == [-1 0 -1 -1]
 ; run: %simd_icmp_ule_i32([-5 1 0 0], [-1 -1 0 0]) == [-1 -1 -1 -1]

-function %simd_icmp_ule_i64(i64x2, i64x2) -> i64x2 {
-block0(v0: i64x2, v1: i64x2):
+
+
+
+function %icmp_ule_const_i16x8() -> i8 {
+block0:
+    v0 = vconst.i16x8 [-1  0 0 0 0 0 0 0]
+    v1 = vconst.i16x8 [-1 -1 0 0 0 0 0 0]
    v2 = icmp ule v0, v1
-    return v2
+    v8 = vall_true v2
+    return v8
 }
-; run: %simd_icmp_ule_i64([0 1], [0 0]) == [-1 0]
-; run: %simd_icmp_ule_i64([-1 0], [-1 1]) == [-1 -1]
-; run: %simd_icmp_ule_i64([-5 1], [-1 -1]) == [-1 -1]
-; run: %simd_icmp_ule_i64([0 0], [0 0]) == [-1 -1]
+; run: %icmp_ule_const_i16x8() == 1
--- a/cranelift/filetests/filetests/runtests/simd-icmp-ult-i64x2.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-ult-i64x2.clif
@@ -0,0 +1,17 @@
+test interpret
+test run
+target aarch64
+target s390x
+
+; TODO: Move this to the main file once x86_64 supports this operation
+; See: #5529
+
+function %simd_icmp_ult_i64(i64x2, i64x2) -> i64x2 {
+block0(v0: i64x2, v1: i64x2):
+    v2 = icmp ult v0, v1
+    return v2
+}
+; run: %simd_icmp_ult_i64([0 1], [0 0]) == [0 0]
+; run: %simd_icmp_ult_i64([-1 0], [-1 1]) == [0 -1]
+; run: %simd_icmp_ult_i64([-5 1], [-1 -1]) == [-1 -1]
+; run: %simd_icmp_ult_i64([0 0], [0 0]) == [0 0]
--- a/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif
+++ b/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif
@@ -1,4 +1,8 @@
 test interpret
+test run
+target aarch64
+target s390x
+target x86_64

 function %simd_icmp_ult_i8(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -22,12 +26,28 @@ block0(v0: i32x4, v1: i32x4):
 ; run: %simd_icmp_ult_i32([0 1 -1 0], [0 0 -1 1]) == [0 0 0 -1]
 ; run: %simd_icmp_ult_i32([-5 1 0 0], [-1 -1 0 0]) == [-1 -1 0 0]

-function %simd_icmp_ult_i64(i64x2, i64x2) -> i64x2 {
-block0(v0: i64x2, v1: i64x2):
+
+
+function %icmp_ult_const_i32x4() -> i8 {
+block0:
+    v0 = vconst.i32x4 [1 1 1 1]
+    v1 = vconst.i32x4 [-1 2 3 4] ; -1 = 0xffff... will be greater than 1 when unsigned
    v2 = icmp ult v0, v1
-    return v2
+    v8 = vall_true v2
+    return v8
 }
-; run: %simd_icmp_ult_i64([0 1], [0 0]) == [0 0]
-; run: %simd_icmp_ult_i64([-1 0], [-1 1]) == [0 -1]
-; run: %simd_icmp_ult_i64([-5 1], [-1 -1]) == [-1 -1]
-; run: %simd_icmp_ult_i64([0 0], [0 0]) == [0 0]
+; run: %icmp_ult_const_i32x4() == 1
+
+
+function %icmp_ult_const_i16x8() -> i8 {
+block0:
+    v0 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
+    v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1]
+    v2 = icmp ult v0, v1
+    v3 = vconst.i16x8 0x00
+    v4 = bitcast.i16x8 v2
+    v5 = icmp eq v3, v4
+    v8 = vall_true v5
+    return v8
+}
+; run: %icmp_ult_const_i16x8() == 1