diff --git a/cranelift/filetests/filetests/isa/x86/extractlane-binemit.clif b/cranelift/filetests/filetests/isa/x86/extractlane-binemit.clif deleted file mode 100644 index 84140a23bd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/extractlane-binemit.clif +++ /dev/null @@ -1,38 +0,0 @@ -test binemit -set enable_simd -target x86_64 haswell - -; for extractlane, floats are legalized differently than integers and booleans; integers and -; booleans use x86_pextr which is manually placed in the IR so that it can be binemit-tested - -function %test_extractlane_b8() { -block0: -[-, %rax] v0 = bconst.b8 true -[-, %xmm0] v1 = splat.b8x16 v0 -[-, %rax] v2 = x86_pextr v1, 10 ; bin: 66 0f 3a 14 c0 0a - return -} - -function %test_extractlane_i16() { -block0: -[-, %rax] v0 = iconst.i16 4 -[-, %xmm1] v1 = splat.i16x8 v0 -[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f 3a 15 c8 04 - return -} - -function %test_extractlane_i32() { -block0: -[-, %rax] v0 = iconst.i32 42 -[-, %xmm4] v1 = splat.i32x4 v0 -[-, %rcx] v2 = x86_pextr v1, 2 ; bin: 66 0f 3a 16 e1 02 - return -} - -function %test_extractlane_b64() { -block0: -[-, %rax] v0 = bconst.b64 false -[-, %xmm2] v1 = splat.b64x2 v0 -[-, %rbx] v2 = x86_pextr v1, 1 ; bin: 66 48 0f 3a 16 d3 01 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/extractlane-run.clif b/cranelift/filetests/filetests/isa/x86/extractlane-run.clif deleted file mode 100644 index 4e1d735bfe..0000000000 --- a/cranelift/filetests/filetests/isa/x86/extractlane-run.clif +++ /dev/null @@ -1,68 +0,0 @@ -test run -set enable_simd - -function %test_extractlane_b8() -> b8 { -block0: - v1 = vconst.b8x16 [false false false false false false false false false false true false false - false false false] - v2 = extractlane v1, 10 - return v2 -} -; run - -function %test_extractlane_i16() -> b1 { -block0: - v0 = vconst.i16x8 0x00080007000600050004000300020001 - v1 = extractlane v0, 1 - v2 = icmp_imm eq v1, 2 - return v2 -} -; run - -function %test_extractlane_f32() -> b1 { -block0: - v0 = f32const 0x42.42 - v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42] - v2 = extractlane v1, 3 - v3 = fcmp eq v2, v0 - return v3 -} -; run - -function %test_extractlane_i32_with_vector_reuse() -> b1 { -block0: - v0 = iconst.i32 42 - v1 = iconst.i32 99 - - v2 = splat.i32x4 v0 - v3 = insertlane v2, 2, v1 - - v4 = extractlane v3, 3 - v5 = icmp eq v4, v0 - - v6 = extractlane v3, 2 - v7 = icmp eq v6, v1 - - v8 = band v5, v7 - return v8 -} -; run - -function %test_extractlane_f32_with_vector_reuse() -> b1 { -block0: - v0 = f32const 0x42.42 - v1 = f32const 0x99.99 - - v2 = splat.f32x4 v0 - v3 = insertlane v2, 2, v1 - - v4 = extractlane v3, 3 - v5 = fcmp eq v4, v0 - - v6 = extractlane v3, 2 - v7 = fcmp eq v6, v1 - - v8 = band v5, v7 - return v8 -} -; run diff --git a/cranelift/filetests/filetests/isa/x86/icmp-compile.clif b/cranelift/filetests/filetests/isa/x86/icmp-compile.clif deleted file mode 100644 index 4a4ac0fc59..0000000000 --- a/cranelift/filetests/filetests/isa/x86/icmp-compile.clif +++ /dev/null @@ -1,35 +0,0 @@ -test binemit -set enable_simd -target x86_64 skylake - -function %icmp_i8x16() { -block0: -[-, %xmm3] v0 = vconst.i8x16 0x00 ; bin: 66 0f ef db -[-, %xmm4] v1 = vconst.i8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 e4 -[-, %xmm3] v2 = icmp eq v0, v1 ; bin: 66 0f 74 dc - return -} - -function %icmp_i16x8() { -block0: -[-, %xmm0] v0 = vconst.i16x8 0x00 -[-, %xmm7] v1 = vconst.i16x8 0xffffffffffffffffffffffffffffffff -[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 75 c7 - return -} - -function %icmp_i32x4() { -block0: -[-, %xmm0] v0 = vconst.i32x4 0x00 -[-, %xmm4] v1 = vconst.i32x4 0xffffffffffffffffffffffffffffffff -[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 76 c4 - return -} - -function %icmp_i64x2() { -block0: -[-, %xmm0] v0 = vconst.i64x2 0x00 -[-, %xmm1] v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff -[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 38 29 c1 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/icmp-run.clif b/cranelift/filetests/filetests/isa/x86/icmp-run.clif deleted file mode 100644 index 0820cac013..0000000000 --- a/cranelift/filetests/filetests/isa/x86/icmp-run.clif +++ /dev/null @@ -1,24 +0,0 @@ -test run -set enable_simd - -function %run_icmp_i8x16() -> b8 { -block0: - v0 = vconst.i8x16 0x00 - v1 = vconst.i8x16 0x00 - v2 = icmp eq v0, v1 - v3 = extractlane v2, 0 - return v3 -} - -; run - -function %run_icmp_i64x2() -> b64 { -block0: - v0 = vconst.i64x2 0xffffffffffffffffffffffffffffffff - v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff - v2 = icmp eq v0, v1 - v3 = extractlane v2, 1 - return v3 -} - -; run diff --git a/cranelift/filetests/filetests/isa/x86/insertlane-binemit.clif b/cranelift/filetests/filetests/isa/x86/insertlane-binemit.clif deleted file mode 100644 index 4be35a47b3..0000000000 --- a/cranelift/filetests/filetests/isa/x86/insertlane-binemit.clif +++ /dev/null @@ -1,42 +0,0 @@ -test binemit -set enable_simd -target x86_64 haswell - -; for insertlane, floats are legalized differently than integers and booleans; integers and -; booleans use x86_pinsr which is manually placed in the IR so that it can be binemit-tested - -function %test_insertlane_b8() { -block0: -[-, %rax] v0 = bconst.b8 true -[-, %rbx] v1 = bconst.b8 false -[-, %xmm0] v2 = splat.b8x16 v0 -[-, %xmm0] v3 = x86_pinsr v2, 10, v1 ; bin: 66 0f 3a 20 c3 0a - return -} - -function %test_insertlane_i16() { -block0: -[-, %rax] v0 = iconst.i16 4 -[-, %rbx] v1 = iconst.i16 5 -[-, %xmm1] v2 = splat.i16x8 v0 -[-, %xmm1] v3 = x86_pinsr v2, 4, v1 ; bin: 66 0f c4 cb 04 - return -} - -function %test_insertlane_i32() { -block0: -[-, %rax] v0 = iconst.i32 42 -[-, %rbx] v1 = iconst.i32 99 -[-, %xmm4] v2 = splat.i32x4 v0 -[-, %xmm4] v3 = x86_pinsr v2, 2, v1 ; bin: 66 0f 3a 22 e3 02 - return -} - -function %test_insertlane_b64() { -block0: -[-, %rax] v0 = bconst.b64 true -[-, %rbx] v1 = bconst.b64 false -[-, %xmm2] v2 = splat.b64x2 v0 -[-, %xmm2] v3 = x86_pinsr v2, 1, v1 ; bin: 66 48 0f 3a 22 d3 01 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/insertlane-run.clif b/cranelift/filetests/filetests/isa/x86/insertlane-run.clif deleted file mode 100644 index 8f1cd7ef46..0000000000 --- a/cranelift/filetests/filetests/isa/x86/insertlane-run.clif +++ /dev/null @@ -1,48 +0,0 @@ -test run -set enable_simd - -; TODO once SIMD vector comparison is implemented, remove use of extractlane below - -function %test_insertlane_b8() -> b8 { -block0: - v1 = bconst.b8 true - v2 = vconst.b8x16 [false false false false false false false false false false false false false - false false false] - v3 = insertlane v2, 10, v1 - v4 = extractlane v3, 10 - return v4 -} -; run - -function %test_insertlane_f32() -> b1 { -block0: - v0 = f32const 0x42.42 - v1 = vconst.f32x4 0x00 - v2 = insertlane v1, 1, v0 - v3 = extractlane v2, 1 - v4 = fcmp eq v3, v0 - return v4 -} -; run - -function %test_insertlane_f64_lane1() -> b1 { -block0: - v0 = f64const 0x42.42 - v1 = vconst.f64x2 0x00 - v2 = insertlane v1, 1, v0 - v3 = extractlane v2, 1 - v4 = fcmp eq v3, v0 - return v4 -} -; run - -function %test_insertlane_f64_lane0() -> b1 { -block0: - v0 = f64const 0x42.42 - v1 = vconst.f64x2 0x00 - v2 = insertlane v1, 0, v0 - v3 = extractlane v2, 0 - v4 = fcmp eq v3, v0 - return v4 -} -; run diff --git a/cranelift/filetests/filetests/isa/x86/legalize-splat.clif b/cranelift/filetests/filetests/isa/x86/legalize-splat.clif deleted file mode 100644 index 2fa6ace7e9..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-splat.clif +++ /dev/null @@ -1,72 +0,0 @@ -test compile -set enable_simd=true -set enable_probestack=false -target x86_64 haswell - -; use baldrdash_system_v calling convention here for simplicity (avoids prologue, epilogue) -function %test_splat_i32() -> i32x4 baldrdash_system_v { -block0: - v0 = iconst.i32 42 - v1 = splat.i32x4 v0 - return v1 -} - -; sameln: function %test_splat_i32() -> i32x4 [%xmm0] baldrdash_system_v { -; nextln: ss0 = incoming_arg 0, offset 0 -; nextln: -; nextln: block0: -; nextln: v0 = iconst.i32 42 -; nextln: v2 = scalar_to_vector.i32x4 v0 -; nextln: v1 = x86_pshufd v2, 0 -; nextln: return v1 -; nextln: } - - - -function %test_splat_i64() -> i64x2 baldrdash_system_v { -block0: - v0 = iconst.i64 42 - v1 = splat.i64x2 v0 - return v1 -} - -; check: block0: -; nextln: v0 = iconst.i64 42 -; nextln: v2 = scalar_to_vector.i64x2 v0 -; nextln: v1 = x86_pinsr v2, 1, v0 -; nextln: return v1 - - - -function %test_splat_b16() -> b16x8 baldrdash_system_v { -block0: - v0 = bconst.b16 true - v1 = splat.b16x8 v0 - return v1 -} - -; check: block0: -; nextln: v0 = bconst.b16 true -; nextln: v2 = scalar_to_vector.b16x8 v0 -; nextln: v3 = x86_pinsr v2, 1, v0 -; nextln: v4 = raw_bitcast.i32x4 v3 -; nextln: v5 = x86_pshufd v4, 0 -; nextln: v1 = raw_bitcast.b16x8 v5 -; nextln: return v1 - - - -function %test_splat_i8() -> i8x16 baldrdash_system_v { -block0: - v0 = iconst.i8 42 - v1 = splat.i8x16 v0 - return v1 -} - -; check: block0: -; nextln: v2 = iconst.i32 42 -; nextln: v0 = ireduce.i8 v2 -; nextln: v3 = scalar_to_vector.i8x16 v0 -; nextln: v4 = vconst.i8x16 0x00 -; nextln: v1 = x86_pshufb v3, v4 -; nextln: return v1 diff --git a/cranelift/filetests/filetests/isa/x86/pshufb.clif b/cranelift/filetests/filetests/isa/x86/pshufb.clif deleted file mode 100644 index c9d5d798d9..0000000000 --- a/cranelift/filetests/filetests/isa/x86/pshufb.clif +++ /dev/null @@ -1,13 +0,0 @@ -test binemit -set enable_simd -target x86_64 has_ssse3=true - -function %test_pshufb() { -block0: -[-, %rax] v0 = iconst.i8 42 -[-, %xmm0] v1 = scalar_to_vector.i8x16 v0 ; bin: 66 40 0f 6e c0 -[-, %rbx] v2 = iconst.i8 43 -[-, %xmm4] v3 = scalar_to_vector.i8x16 v2 ; bin: 66 40 0f 6e e3 -[-, %xmm0] v4 = x86_pshufb v1, v3 ; bin: 66 0f 38 00 c4 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/pshufd.clif b/cranelift/filetests/filetests/isa/x86/pshufd.clif deleted file mode 100644 index 69dc3f4ea0..0000000000 --- a/cranelift/filetests/filetests/isa/x86/pshufd.clif +++ /dev/null @@ -1,11 +0,0 @@ -test binemit -set enable_simd -target x86_64 - -function %test_pshuf() { -block0: -[-, %rax] v0 = iconst.i32 42 -[-, %xmm0] v1 = scalar_to_vector.i32x4 v0 ; bin: 66 40 0f 6e c0 -[-, %xmm0] v2 = x86_pshufd v1, 0 ; bin: 66 0f 70 c0 00 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/shuffle-legalize.clif b/cranelift/filetests/filetests/isa/x86/shuffle-legalize.clif deleted file mode 100644 index 78c6bfef40..0000000000 --- a/cranelift/filetests/filetests/isa/x86/shuffle-legalize.clif +++ /dev/null @@ -1,31 +0,0 @@ -test legalizer -set enable_simd -target x86_64 skylake - -function %test_shuffle_different_ssa_values() -> i8x16 { -block0: - v0 = vconst.i8x16 0x00 - v1 = vconst.i8x16 0x01 - v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0 - return v2 -} - -; check: v1 = vconst.i8x16 0x01 -; nextln: v3 = vconst.i8x16 0x80000000000000000000000000000000 -; nextln: v4 = x86_pshufb v0, v3 -; nextln: v5 = vconst.i8x16 0x01808080808080808080808080808080 -; nextln: v6 = x86_pshufb v1, v5 -; nextln: v2 = bor v4, v6 - - - -function %test_shuffle_same_ssa_value() -> i8x16 { -block0: - v1 = vconst.i8x16 0x01 - v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1 - return v2 -} - -; check: v1 = vconst.i8x16 0x01 -; nextln: v3 = vconst.i8x16 0x03000000000000000000000000000000 -; nextln: v2 = x86_pshufb v1, v3 diff --git a/cranelift/filetests/filetests/isa/x86/shuffle-run.clif b/cranelift/filetests/filetests/isa/x86/shuffle-run.clif deleted file mode 100644 index 44e4998907..0000000000 --- a/cranelift/filetests/filetests/isa/x86/shuffle-run.clif +++ /dev/null @@ -1,60 +0,0 @@ -test run -set enable_simd - -function %test_shuffle_different_ssa_values() -> b1 { -block0: - v0 = vconst.i8x16 0x00 - v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42] - v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31] ; use the first lane of v0 throughout except use the last lane of v1 - v3 = extractlane.i8x16 v2, 15 - v4 = iconst.i8 42 - v5 = icmp eq v3, v4 - return v5 -} - -; run - -function %test_shuffle_same_ssa_value() -> b1 { -block0: - v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000 ; note where lane 15 is when written with hexadecimal syntax - v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f ; use the last lane of v0 to fill all lanes - v2 = extractlane.i8x16 v1, 4 - v3 = iconst.i8 0x01 - v4 = icmp eq v2, v3 - return v4 -} - -; run - -function %compare_shuffle() -> b1 { -block0: - v1 = vconst.i32x4 [0 1 2 3] - v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16 - ; keep each lane in place from the first vector - v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v4 = raw_bitcast.i32x4 v3 - v5 = extractlane.i32x4 v4, 3 - v6 = icmp_imm eq v5, 3 - v7 = extractlane.i32x4 v4, 0 - v8 = icmp_imm eq v7, 0 - v9 = band v6, v8 - return v9 -} - -; run - - -function %compare_shuffle() -> b32 { -block0: - v1 = vconst.b32x4 [true false true false] - v2 = raw_bitcast.b8x16 v1 ; we have to cast because shuffle is type-limited to Tx16 - ; pair up the true values to make the entire vector true - v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11] - v4 = raw_bitcast.b32x4 v3 - v5 = extractlane v4, 3 - v6 = extractlane v4, 0 - v7 = band v5, v6 - return v7 -} - -; run diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif index 053b50a9f3..c5463491fe 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif @@ -2,6 +2,38 @@ test binemit set enable_simd target x86_64 skylake +function %icmp_i8x16() { +block0: +[-, %xmm3] v0 = vconst.i8x16 0x00 ; bin: 66 0f ef db +[-, %xmm4] v1 = vconst.i8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 e4 +[-, %xmm3] v2 = icmp eq v0, v1 ; bin: 66 0f 74 dc + return +} + +function %icmp_i16x8() { +block0: +[-, %xmm0] v0 = vconst.i16x8 0x00 +[-, %xmm7] v1 = vconst.i16x8 0xffffffffffffffffffffffffffffffff +[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 75 c7 + return +} + +function %icmp_i32x4() { +block0: +[-, %xmm0] v0 = vconst.i32x4 0x00 +[-, %xmm4] v1 = vconst.i32x4 0xffffffffffffffffffffffffffffffff +[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 76 c4 + return +} + +function %icmp_i64x2() { +block0: +[-, %xmm0] v0 = vconst.i64x2 0x00 +[-, %xmm1] v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff +[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 38 29 c1 + return +} + function %icmp_sgt_i8x16(i8x16, i8x16) -> b8x16 { block0(v0: i8x16 [%xmm2], v1: i8x16 [%xmm1]): [-, %xmm2] v2 = icmp sgt v0, v1 ; bin: 66 0f 64 d1 diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif index 7cbd285860..dde38ba936 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif @@ -1,6 +1,5 @@ test run set enable_simd -target x86_64 skylake function %icmp_eq_i8x16() -> b8 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/raw_bitcast.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif similarity index 83% rename from cranelift/filetests/filetests/isa/x86/raw_bitcast.clif rename to cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif index 717f655ec6..71bb3f048c 100644 --- a/cranelift/filetests/filetests/isa/x86/raw_bitcast.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif @@ -1,6 +1,7 @@ test binemit target x86_64 +; Ensure raw_bitcast emits no instructions. function %test_raw_bitcast_i16x8_to_b32x4() { block0: [-, %rbx] v0 = bconst.b16 true diff --git a/cranelift/filetests/filetests/isa/x86/scalar_to_vector-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif similarity index 87% rename from cranelift/filetests/filetests/isa/x86/scalar_to_vector-binemit.clif rename to cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif index 4a02e6bac6..a23a716f90 100644 --- a/cranelift/filetests/filetests/isa/x86/scalar_to_vector-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif @@ -3,6 +3,8 @@ set opt_level=speed_and_size set enable_simd target x86_64 +;; These scalar_to_vector tests avoid the use of REX prefixes with the speed_and_size optimization flag. + function %test_scalar_to_vector_b8() { block0: [-, %rax] v0 = bconst.b8 true diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif new file mode 100644 index 0000000000..88c71eb9d1 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif @@ -0,0 +1,97 @@ +test binemit +set enable_simd +target x86_64 haswell + +; for insertlane, floats are legalized differently than integers and booleans; integers and +; booleans use x86_pinsr which is manually placed in the IR so that it can be binemit-tested + +function %test_insertlane_b8() { +block0: +[-, %rax] v0 = bconst.b8 true +[-, %rbx] v1 = bconst.b8 false +[-, %xmm0] v2 = splat.b8x16 v0 +[-, %xmm0] v3 = x86_pinsr v2, 10, v1 ; bin: 66 0f 3a 20 c3 0a + return +} + +function %test_insertlane_i16() { +block0: +[-, %rax] v0 = iconst.i16 4 +[-, %rbx] v1 = iconst.i16 5 +[-, %xmm1] v2 = splat.i16x8 v0 +[-, %xmm1] v3 = x86_pinsr v2, 4, v1 ; bin: 66 0f c4 cb 04 + return +} + +function %test_insertlane_i32() { +block0: +[-, %rax] v0 = iconst.i32 42 +[-, %rbx] v1 = iconst.i32 99 +[-, %xmm4] v2 = splat.i32x4 v0 +[-, %xmm4] v3 = x86_pinsr v2, 2, v1 ; bin: 66 0f 3a 22 e3 02 + return +} + +function %test_insertlane_b64() { +block0: +[-, %rax] v0 = bconst.b64 true +[-, %rbx] v1 = bconst.b64 false +[-, %xmm2] v2 = splat.b64x2 v0 +[-, %xmm2] v3 = x86_pinsr v2, 1, v1 ; bin: 66 48 0f 3a 22 d3 01 + return +} + +; for extractlane, floats are legalized differently than integers and booleans; integers and +; booleans use x86_pextr which is manually placed in the IR so that it can be binemit-tested + +function %test_extractlane_b8() { +block0: +[-, %rax] v0 = bconst.b8 true +[-, %xmm0] v1 = splat.b8x16 v0 +[-, %rax] v2 = x86_pextr v1, 10 ; bin: 66 0f 3a 14 c0 0a + return +} + +function %test_extractlane_i16() { +block0: +[-, %rax] v0 = iconst.i16 4 +[-, %xmm1] v1 = splat.i16x8 v0 +[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f 3a 15 c8 04 + return +} + +function %test_extractlane_i32() { +block0: +[-, %rax] v0 = iconst.i32 42 +[-, %xmm4] v1 = splat.i32x4 v0 +[-, %rcx] v2 = x86_pextr v1, 2 ; bin: 66 0f 3a 16 e1 02 + return +} + +function %test_extractlane_b64() { +block0: +[-, %rax] v0 = bconst.b64 false +[-, %xmm2] v1 = splat.b64x2 v0 +[-, %rbx] v2 = x86_pextr v1, 1 ; bin: 66 48 0f 3a 16 d3 01 + return +} + +;; shuffle + +function %test_pshufd() { +block0: +[-, %rax] v0 = iconst.i32 42 +[-, %xmm0] v1 = scalar_to_vector.i32x4 v0 ; bin: 66 40 0f 6e c0 +[-, %xmm0] v2 = x86_pshufd v1, 0 ; bin: 66 0f 70 c0 00 + return +} + +function %test_pshufb() { +block0: +[-, %rax] v0 = iconst.i8 42 +[-, %xmm0] v1 = scalar_to_vector.i8x16 v0 ; bin: 66 40 0f 6e c0 +[-, %rbx] v2 = iconst.i8 43 +[-, %xmm4] v3 = scalar_to_vector.i8x16 v2 ; bin: 66 40 0f 6e e3 +[-, %xmm0] v4 = x86_pshufb v1, v3 ; bin: 66 0f 38 00 c4 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/scalar_to_vector-compile.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif similarity index 88% rename from cranelift/filetests/filetests/isa/x86/scalar_to_vector-compile.clif rename to cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif index 8ab62db59d..30c32700bf 100644 --- a/cranelift/filetests/filetests/isa/x86/scalar_to_vector-compile.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif @@ -4,7 +4,7 @@ set enable_probestack=false set enable_simd target x86_64 -; ensure that scalar_to_vector emits no instructions for floats (already exist in an XMM register) +; Ensure that scalar_to_vector emits no instructions for floats (already exist in an XMM register) function %test_scalar_to_vector_f32() -> f32x4 baldrdash_system_v { block0: v0 = f32const 0x0.42 diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif new file mode 100644 index 0000000000..068fe1b5d2 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif @@ -0,0 +1,85 @@ +test legalizer +set enable_simd +target x86_64 skylake + +;; shuffle + +function %test_shuffle_different_ssa_values() -> i8x16 { +block0: + v0 = vconst.i8x16 0x00 + v1 = vconst.i8x16 0x01 + v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0 + return v2 +} +; check: v1 = vconst.i8x16 0x01 +; nextln: v3 = vconst.i8x16 0x80000000000000000000000000000000 +; nextln: v4 = x86_pshufb v0, v3 +; nextln: v5 = vconst.i8x16 0x01808080808080808080808080808080 +; nextln: v6 = x86_pshufb v1, v5 +; nextln: v2 = bor v4, v6 + +function %test_shuffle_same_ssa_value() -> i8x16 { +block0: + v1 = vconst.i8x16 0x01 + v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1 + return v2 +} +; check: v1 = vconst.i8x16 0x01 +; nextln: v3 = vconst.i8x16 0x03000000000000000000000000000000 +; nextln: v2 = x86_pshufb v1, v3 + +;; splat + +function %test_splat_i32() -> i32x4 { +block0: + v0 = iconst.i32 42 + v1 = splat.i32x4 v0 + return v1 +} +; check: block0: +; nextln: v0 = iconst.i32 42 +; nextln: v2 = scalar_to_vector.i32x4 v0 +; nextln: v1 = x86_pshufd v2, 0 +; nextln: return v1 +; nextln: } + +function %test_splat_i64() -> i64x2 { +block0: + v0 = iconst.i64 42 + v1 = splat.i64x2 v0 + return v1 +} +; check: block0: +; nextln: v0 = iconst.i64 42 +; nextln: v2 = scalar_to_vector.i64x2 v0 +; nextln: v1 = x86_pinsr v2, 1, v0 +; nextln: return v1 + +function %test_splat_b16() -> b16x8 { +block0: + v0 = bconst.b16 true + v1 = splat.b16x8 v0 + return v1 +} +; check: block0: +; nextln: v0 = bconst.b16 true +; nextln: v2 = scalar_to_vector.b16x8 v0 +; nextln: v3 = x86_pinsr v2, 1, v0 +; nextln: v4 = raw_bitcast.i32x4 v3 +; nextln: v5 = x86_pshufd v4, 0 +; nextln: v1 = raw_bitcast.b16x8 v5 +; nextln: return v1 + +function %test_splat_i8() -> i8x16 { +block0: + v0 = iconst.i8 42 + v1 = splat.i8x16 v0 + return v1 +} +; check: block0: +; nextln: v2 = iconst.i32 42 +; nextln: v0 = ireduce.i8 v2 +; nextln: v3 = scalar_to_vector.i8x16 v0 +; nextln: v4 = vconst.i8x16 0x00 +; nextln: v1 = x86_pshufb v3, v4 +; nextln: return v1 diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif new file mode 100644 index 0000000000..b3ad77321c --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif @@ -0,0 +1,167 @@ +test run +set enable_simd + +function %test_shuffle_different_ssa_values() -> b1 { +block0: + v0 = vconst.i8x16 0x00 + v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42] + v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31] ; use the first lane of v0 throughout except use the last lane of v1 + v3 = extractlane.i8x16 v2, 15 + v4 = iconst.i8 42 + v5 = icmp eq v3, v4 + return v5 +} +; run + +function %test_shuffle_same_ssa_value() -> b1 { +block0: + v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000 ; note where lane 15 is when written with hexadecimal syntax + v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f ; use the last lane of v0 to fill all lanes + v2 = extractlane.i8x16 v1, 4 + v3 = iconst.i8 0x01 + v4 = icmp eq v2, v3 + return v4 +} +; run + +function %compare_shuffle() -> b1 { +block0: + v1 = vconst.i32x4 [0 1 2 3] + v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16 + ; keep each lane in place from the first vector + v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] + v4 = raw_bitcast.i32x4 v3 + v5 = extractlane.i32x4 v4, 3 + v6 = icmp_imm eq v5, 3 + v7 = extractlane.i32x4 v4, 0 + v8 = icmp_imm eq v7, 0 + v9 = band v6, v8 + return v9 +} +; run + +function %compare_shuffle() -> b32 { +block0: + v1 = vconst.b32x4 [true false true false] + v2 = raw_bitcast.b8x16 v1 ; we have to cast because shuffle is type-limited to Tx16 + ; pair up the true values to make the entire vector true + v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11] + v4 = raw_bitcast.b32x4 v3 + v5 = extractlane v4, 3 + v6 = extractlane v4, 0 + v7 = band v5, v6 + return v7 +} +; run + +; TODO once SIMD vector comparison is implemented, remove use of extractlane below + +function %test_insertlane_b8() -> b8 { +block0: + v1 = bconst.b8 true + v2 = vconst.b8x16 [false false false false false false false false false false false false false + false false false] + v3 = insertlane v2, 10, v1 + v4 = extractlane v3, 10 + return v4 +} +; run + +function %test_insertlane_f32() -> b1 { +block0: + v0 = f32const 0x42.42 + v1 = vconst.f32x4 0x00 + v2 = insertlane v1, 1, v0 + v3 = extractlane v2, 1 + v4 = fcmp eq v3, v0 + return v4 +} +; run + +function %test_insertlane_f64_lane1() -> b1 { +block0: + v0 = f64const 0x42.42 + v1 = vconst.f64x2 0x00 + v2 = insertlane v1, 1, v0 + v3 = extractlane v2, 1 + v4 = fcmp eq v3, v0 + return v4 +} +; run + +function %test_insertlane_f64_lane0() -> b1 { +block0: + v0 = f64const 0x42.42 + v1 = vconst.f64x2 0x00 + v2 = insertlane v1, 0, v0 + v3 = extractlane v2, 0 + v4 = fcmp eq v3, v0 + return v4 +} +; run + +function %test_extractlane_b8() -> b8 { +block0: + v1 = vconst.b8x16 [false false false false false false false false false false true false false + false false false] + v2 = extractlane v1, 10 + return v2 +} +; run + +function %test_extractlane_i16() -> b1 { +block0: + v0 = vconst.i16x8 0x00080007000600050004000300020001 + v1 = extractlane v0, 1 + v2 = icmp_imm eq v1, 2 + return v2 +} +; run + +function %test_extractlane_f32() -> b1 { +block0: + v0 = f32const 0x42.42 + v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42] + v2 = extractlane v1, 3 + v3 = fcmp eq v2, v0 + return v3 +} +; run + +function %test_extractlane_i32_with_vector_reuse() -> b1 { +block0: + v0 = iconst.i32 42 + v1 = iconst.i32 99 + + v2 = splat.i32x4 v0 + v3 = insertlane v2, 2, v1 + + v4 = extractlane v3, 3 + v5 = icmp eq v4, v0 + + v6 = extractlane v3, 2 + v7 = icmp eq v6, v1 + + v8 = band v5, v7 + return v8 +} +; run + +function %test_extractlane_f32_with_vector_reuse() -> b1 { +block0: + v0 = f32const 0x42.42 + v1 = f32const 0x99.99 + + v2 = splat.f32x4 v0 + v3 = insertlane v2, 2, v1 + + v4 = extractlane v3, 3 + v5 = fcmp eq v4, v0 + + v6 = extractlane v3, 2 + v7 = fcmp eq v6, v1 + + v8 = band v5, v7 + return v8 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/vconst-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif similarity index 100% rename from cranelift/filetests/filetests/isa/x86/vconst-binemit.clif rename to cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif diff --git a/cranelift/filetests/filetests/isa/x86/compile-vconst.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif similarity index 100% rename from cranelift/filetests/filetests/isa/x86/compile-vconst.clif rename to cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif diff --git a/cranelift/filetests/filetests/isa/x86/vconst-opt.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif similarity index 100% rename from cranelift/filetests/filetests/isa/x86/vconst-opt.clif rename to cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif diff --git a/cranelift/filetests/filetests/isa/x86/vconst-opt-run.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-run.clif similarity index 100% rename from cranelift/filetests/filetests/isa/x86/vconst-opt-run.clif rename to cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-run.clif diff --git a/cranelift/filetests/filetests/isa/x86/vconst-rodata.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif similarity index 100% rename from cranelift/filetests/filetests/isa/x86/vconst-rodata.clif rename to cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif diff --git a/cranelift/filetests/filetests/isa/x86/vconst-run.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-run.clif similarity index 100% rename from cranelift/filetests/filetests/isa/x86/vconst-run.clif rename to cranelift/filetests/filetests/isa/x86/simd-vconst-run.clif