From 224a4b4094b19819a4f08b7cf75e58db5e9a4256 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Tue, 14 Sep 2021 12:28:21 +0100 Subject: [PATCH 1/3] Implement `VhighBits` & `Vselect` for interpreter Implemented the following Opcodes for the Cranelift interpreter: - `VhighBits` to reduce a vector to a scalar integer formed by concatenating the MSB of each lane. - `Vselect` to select lanes from two vectors controlled by a boolean vector. Copyright (c) 2021, Arm Limited --- .../filetests/runtests/simd-vhighbits.clif | 53 +++++++++++++++++++ .../filetests/runtests/simd-vselect.clif | 29 ++++++++++ cranelift/interpreter/src/step.rs | 31 ++++++++++- 3 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/simd-vhighbits.clif diff --git a/cranelift/filetests/filetests/runtests/simd-vhighbits.clif b/cranelift/filetests/filetests/runtests/simd-vhighbits.clif new file mode 100644 index 0000000000..2a9c5d1a75 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-vhighbits.clif @@ -0,0 +1,53 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %vhighbits_i8x16(i8x16) -> i16 { +block0(v0: i8x16): + v1 = vhigh_bits.i16 v0 + return v1 +} +; run: %vhighbits_i8x16([0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]) == 0 +; run: %vhighbits_i8x16([0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]) == 0 +; run: %vhighbits_i8x16([1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7]) == 0 +; run: %vhighbits_i8x16([128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128]) == -1 +; run: %vhighbits_i8x16([128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 8]) == 32767 + +function %vhighbits_i16x8(i16x8) -> i8 { +block0(v0: i16x8): + v1 = vhigh_bits.i8 v0 + return v1 +} +; run: %vhighbits_i16x8([0 0 0 0 0 0 0 0]) == 0 +; run: %vhighbits_i16x8([0 0 0 0 0 0 0 1]) == 0 +; run: %vhighbits_i16x8([1 2 3 4 5 6 7 8]) == 0 +; run: %vhighbits_i16x8([128 128 128 128 128 128 128 128]) == 0 +; run: %vhighbits_i16x8([32768 32768 32768 32768 32768 32768 32768 0]) == 127 + + +function %vhighbits_i32x4(i32x4) -> i8 { +block0(v0: i32x4): + v1 = vhigh_bits.i8 v0 + return v1 +} +; run: %vhighbits_i32x4([0 0 0 0]) == 0 +; run: %vhighbits_i32x4([0 0 0 1]) == 0 +; run: %vhighbits_i32x4([1 2 3 4]) == 0 +; run: %vhighbits_i32x4([128 128 128 128]) == 0 +; run: %vhighbits_i32x4([2147483648 2147483648 2147483648 2147483648]) == 15 +; run: %vhighbits_i32x4([2147483648 0 2147483648 0]) == 5 + + +function %vhighbits_i64x2(i64x2) -> i8 { +block0(v0: i64x2): + v1 = vhigh_bits.i8 v0 + return v1 +} +; run: %vhighbits_i64x2([0 0]) == 0 +; run: %vhighbits_i64x2([0 1]) == 0 +; run: %vhighbits_i64x2([1 2]) == 0 +; run: %vhighbits_i64x2([128 128]) == 0 +; run: %vhighbits_i64x2([18446744073709551615 18446744073709551615]) == 3 +; run: %vhighbits_i64x2([18446744073709551615 0]) == 1 diff --git a/cranelift/filetests/filetests/runtests/simd-vselect.clif b/cranelift/filetests/filetests/runtests/simd-vselect.clif index 3817b2302f..8a78664f5b 100644 --- a/cranelift/filetests/filetests/runtests/simd-vselect.clif +++ b/cranelift/filetests/filetests/runtests/simd-vselect.clif @@ -1,3 +1,4 @@ +test interpret test run ; target s390x TODO: Not yet implemented on s390x target aarch64 @@ -45,3 +46,31 @@ block0: return v4 } ; run: %vselect_i64x2() == [200 101] + +function %vselect_p_i8x16(b8x16, i8x16, i8x16) -> i8x16 { +block0(v0: b8x16, v1: i8x16, v2: i8x16): + v3 = vselect v0, v1, v2 + return v3 +} +; run: %vselect_p_i8x16([true false true true true false false false true false true true true false false false], [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], [17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32]) == [1 18 3 4 5 22 23 24 9 26 11 12 13 30 31 32] + +function %vselect_p_i16x8(b16x8, i16x8, i16x8) -> i16x8 { +block0(v0: b16x8, v1: i16x8, v2: i16x8): + v3 = vselect v0, v1, v2 + return v3 +} +; run: %vselect_p_i16x8([true false true true true false false false], [1 2 3 4 5 6 7 8], [17 18 19 20 21 22 23 24]) == [1 18 3 4 5 22 23 24] + +function %vselect_p_i32x4(b32x4, i32x4, i32x4) -> i32x4 { +block0(v0: b32x4, v1: i32x4, v2: i32x4): + v3 = vselect v0, v1, v2 + return v3 +} +; run: %vselect_p_i32x4([true false true true], [1 2 3 4], [100000 200000 300000 400000]) == [1 200000 3 4] + +function %vselect_p_i64x2(b64x2, i64x2, i64x2) -> i64x2 { +block0(v0: b64x2, v1: i64x2, v2: i64x2): + v3 = vselect v0, v1, v2 + return v3 +} +; run: %vselect_p_i64x2([true false], [1 2], [100000000000 200000000000]) == [1 200000000000] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 6b3eb59ff8..32457d4462 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -824,10 +824,37 @@ where let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; assign(lanes[idx].clone()) } - Opcode::VhighBits => unimplemented!("VhighBits"), + Opcode::VhighBits => { + // `ctrl_ty` controls the return type for this, so the input type + // must be retrieved via `inst_context`. + let lane_type = inst_context + .type_of(inst_context.args()[0]) + .unwrap() + .lane_type(); + let a = extractlanes(&arg(0)?, lane_type)?; + let mut result: i128 = 0; + for (i, val) in a.into_iter().enumerate() { + let val = val.reverse_bits()?.into_int()?; // MSB -> LSB + result |= (val & 1) << i; + } + assign(Value::int(result, ctrl_ty)?) + } Opcode::Vsplit => unimplemented!("Vsplit"), Opcode::Vconcat => unimplemented!("Vconcat"), - Opcode::Vselect => unimplemented!("Vselect"), + Opcode::Vselect => { + let c = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let x = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; + let y = extractlanes(&arg(2)?, ctrl_ty.lane_type())?; + let mut new_vec = SimdVec::new(); + for (c, (x, y)) in c.into_iter().zip(x.into_iter().zip(y.into_iter())) { + if Value::eq(&c, &Value::int(0, ctrl_ty.lane_type())?)? { + new_vec.push(y); + } else { + new_vec.push(x); + } + } + assign(vectorizelanes(&new_vec, ctrl_ty)?) + } Opcode::VanyTrue => assign(fold_vector( arg(0)?, ctrl_ty, From 335177a97e1365d7b8e0091647d521a8a37bfa26 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Wed, 22 Sep 2021 09:42:18 +0100 Subject: [PATCH 2/3] Remove legacy backend from test Copyright (c) 2021, Arm Limited --- cranelift/filetests/filetests/runtests/simd-vselect.clif | 2 -- 1 file changed, 2 deletions(-) diff --git a/cranelift/filetests/filetests/runtests/simd-vselect.clif b/cranelift/filetests/filetests/runtests/simd-vselect.clif index 8a78664f5b..5c910df60d 100644 --- a/cranelift/filetests/filetests/runtests/simd-vselect.clif +++ b/cranelift/filetests/filetests/runtests/simd-vselect.clif @@ -4,8 +4,6 @@ test run target aarch64 set enable_simd target x86_64 machinst -set enable_simd -target x86_64 legacy haswell function %vselect_i8x16() -> i8x16 { block0: From 63d85e1dc3dc864a10d748c3d4465ef17709319c Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Wed, 22 Sep 2021 11:43:57 +0100 Subject: [PATCH 3/3] Prevent running `simd-vhighbits.clif` on legacy backend. Copyright (c) 2021, Arm Limited. --- cranelift/filetests/filetests/runtests/simd-vhighbits.clif | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/filetests/filetests/runtests/simd-vhighbits.clif b/cranelift/filetests/filetests/runtests/simd-vhighbits.clif index 2a9c5d1a75..a5d7146f11 100644 --- a/cranelift/filetests/filetests/runtests/simd-vhighbits.clif +++ b/cranelift/filetests/filetests/runtests/simd-vhighbits.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 +target x86_64 machinst function %vhighbits_i8x16(i8x16) -> i16 { block0(v0: i8x16):