From 3b9bfc818731d7566279e654ea46525ec6e9e509 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Mon, 6 Sep 2021 15:03:32 +0100 Subject: [PATCH 1/2] Implement `WideningPairwiseDotProductS` for interpreter Implemented `WideningPairwiseDotProductS` to perform sign-extending length-doubling multiplication on corresponding elements from two `i16x8` SIMD vectors, performing a pairwise add on the results (thus returning `i32x4`). Copyright (c) 2021, Arm Limited --- .../simd-wideningpairwisedotproducts.clif | 14 ++++++++++++++ cranelift/interpreter/src/step.rs | 16 +++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif diff --git a/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif b/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif new file mode 100644 index 0000000000..56987ef79d --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif @@ -0,0 +1,14 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %wpdps(i16x8, i16x8) -> i32x4 { +block0(v0: i16x8, v1: i16x8): + v2 = widening_pairwise_dot_product_s v0, v1 + return v2 +} +; run: %wpdps([1 2 3 4 5 6 7 8], [8000 7000 6000 5000 4000 3000 2000 1000]) == [22000 38000 38000 22000] +; run: %wpdps([1 -2 3 -4 5 -6 7 -8], [32767 32767 32767 32767 -32768 -32768 -32768 -32768]) == [-32767 -32767 32768 32768] +; run: %wpdps([-32768 -32768 32767 32767 -32768 -32768 32767 32767], [-32768 -32768 32767 32767 32767 32767 -32768 -32768]) == [2147483648 2147352578 -2147418112 -2147418112] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index ce53528c20..5157084330 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -845,7 +845,21 @@ where Opcode::AtomicLoad => unimplemented!("AtomicLoad"), Opcode::AtomicStore => unimplemented!("AtomicStore"), Opcode::Fence => unimplemented!("Fence"), - Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"), + Opcode::WideningPairwiseDotProductS => { + let ctrl_ty = types::I16X8; + let new_type = ctrl_ty.merge_lanes().unwrap(); + let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; + let mut new_vec = SimdVec::new(); + for (x, y) in arg0.chunks(2).into_iter().zip(arg1.chunks(2).into_iter()) { + let mut z = 0i128; + for (lhs, rhs) in x.into_iter().zip(y.into_iter()) { + z += lhs.clone().into_int()? * rhs.clone().into_int()?; + } + new_vec.push(Value::int(z, new_type.lane_type())?); + } + assign(vectorizelanes(&new_vec, new_type)?) + } Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"), Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?), From 8abb19cbd81a0f4562e3de2ed1e668f734879bce Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Mon, 20 Sep 2021 10:31:34 +0100 Subject: [PATCH 2/2] Generate `new_vec` using an iterator chain Copyright (c) 2021, Arm Limited --- cranelift/interpreter/src/step.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 5157084330..4efc5b88d7 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -850,14 +850,19 @@ where let new_type = ctrl_ty.merge_lanes().unwrap(); let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; - let mut new_vec = SimdVec::new(); - for (x, y) in arg0.chunks(2).into_iter().zip(arg1.chunks(2).into_iter()) { - let mut z = 0i128; - for (lhs, rhs) in x.into_iter().zip(y.into_iter()) { - z += lhs.clone().into_int()? * rhs.clone().into_int()?; - } - new_vec.push(Value::int(z, new_type.lane_type())?); - } + let new_vec = arg0 + .chunks(2) + .into_iter() + .zip(arg1.chunks(2)) + .into_iter() + .map(|(x, y)| { + let mut z = 0i128; + for (lhs, rhs) in x.into_iter().zip(y.into_iter()) { + z += lhs.clone().into_int()? * rhs.clone().into_int()?; + } + Value::int(z, new_type.lane_type()) + }) + .collect::>>()?; assign(vectorizelanes(&new_vec, new_type)?) } Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),