From 7a5646c5f4a411161295dbf98e192cfac189a08a Mon Sep 17 00:00:00 2001
From: dheaton-arm <Damian.Heaton@arm.com>
Date: Tue, 24 Aug 2021 16:08:43 +0100
Subject: [PATCH] Implement `IaddPairwise` for the interpreter

Implemented `IaddPairwise` for the Cranelift interpreter, to add pairs
of adjacent values in two SIMD vectors, concatenating them at the end
(preserving both lane size and number of lanes).

Copyright (c) 2021, Arm Limited
---
 .../filetests/runtests/simd-iaddpairwise.clif | 25 ++++++++++++++++
 cranelift/interpreter/src/step.rs             | 30 ++++++++++++++++++-
 2 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 cranelift/filetests/filetests/runtests/simd-iaddpairwise.clif
diff --git a/cranelift/filetests/filetests/runtests/simd-iaddpairwise.clif b/cranelift/filetests/filetests/runtests/simd-iaddpairwise.clif
new file mode 100644
index 0000000000..7884ba3b02
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-iaddpairwise.clif
@@ -0,0 +1,25 @@
+test interpret
+test run
+target aarch64
+
+function %iaddp_i8x16(i8x16, i8x16) -> i8x16 {
+block0(v0: i8x16, v1: i8x16):
+    v2 = iadd_pairwise v0, v1
+    return v2
+}
+; run: %iaddp_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], [17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32]) == [3 7 11 15 19 23 27 31 35 39 43 47 51 55 59 63]
+
+function %iaddp_i16x8(i16x8, i16x8) -> i16x8 {
+block0(v0: i16x8, v1: i16x8):
+    v2 = iadd_pairwise v0, v1
+    return v2
+}
+; run: %iaddp_i16x8([1 2 3 4 5 6 7 8], [100 99 98 97 96 95 94 93]) == [3 7 11 15 199 195 191 187]
+
+function %iaddp_i32x4(i32x4, i32x4) -> i32x4 {
+block0(v0: i32x4, v1: i32x4):
+    v2 = iadd_pairwise v0, v1
+    return v2
+}
+; run: %iaddp_i32x4([1 2 3 4], [5 6 7 8]) == [3 7 11 15]
+; run: %iaddp_i32x4([4294967290 5 4294967290 5], [100 100 100 100]) == [4294967295 4294967295 200 200]
diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
index 497ffb4173..3741b7405a 100644
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -709,7 +709,7 @@ where
         Opcode::Fence => unimplemented!("Fence"),
         Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
         Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
-        Opcode::IaddPairwise => unimplemented!("IaddPairwise"),
+        Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?),
 
         // TODO: these instructions should be removed once the new backend makes these obsolete
         // (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the
@@ -962,3 +962,31 @@ where
     }
     vectorizelanes(&result, vector_type)
 }
+
+/// Performs the supplied pairwise arithmetic `op` on two SIMD vectors, where
+/// pairs are formed from adjacent vector elements and the vectors are
+/// concatenated at the end.
+fn binary_pairwise<V, F>(x: V, y: V, vector_type: types::Type, op: F) -> ValueResult<V>
+where
+    V: Value,
+    F: Fn(V, V) -> ValueResult<V>,
+{
+    let arg0 = extractlanes(&x, vector_type.lane_type())?;
+    let arg1 = extractlanes(&y, vector_type.lane_type())?;
+    let mut result = SimdVec::new();
+    for pair in arg0.chunks(2) {
+        let lhs: V = Value::int(pair[0], vector_type.lane_type())?;
+        let rhs: V = Value::int(pair[1], vector_type.lane_type())?;
+        let sum = op(lhs, rhs)?;
+        let sum = sum.into_int()?;
+        result.push(sum);
+    }
+    for pair in arg1.chunks(2) {
+        let lhs: V = Value::int(pair[0], vector_type.lane_type())?;
+        let rhs: V = Value::int(pair[1], vector_type.lane_type())?;
+        let sum = op(lhs, rhs)?;
+        let sum = sum.into_int()?;
+        result.push(sum);
+    }
+    vectorizelanes(&result, vector_type)
+}