diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
index 56a135072a..a54a9818ca 100755
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -3134,41 +3134,6 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
-    let I16x8 = &TypeVar::new(
-        "I16x8",
-        "A SIMD vector type containing 8 integer lanes each 16 bits wide.",
-        TypeSetBuilder::new()
-            .ints(16..16)
-            .simd_lanes(8..8)
-            .includes_scalars(false)
-            .build(),
-    );
-
-    let x = &Operand::new("x", I16x8);
-    let y = &Operand::new("y", I16x8);
-    let a = &Operand::new("a", &I16x8.merge_lanes());
-
-    ig.push(
-        Inst::new(
-            "widening_pairwise_dot_product_s",
-            r#"
-        Takes corresponding elements in `x` and `y`, performs a sign-extending length-doubling
-        multiplication on them, then adds adjacent pairs of elements to form the result.  For
-        example, if the input vectors are `[x3, x2, x1, x0]` and `[y3, y2, y1, y0]`, it produces
-        the vector `[r1, r0]`, where `r1 = sx(x3) * sx(y3) + sx(x2) * sx(y2)` and
-        `r0 = sx(x1) * sx(y1) + sx(x0) * sx(y0)`, and `sx(n)` sign-extends `n` to twice its width.
-
-        This will double the lane width and halve the number of lanes.  So the resulting
-        vector has the same number of bits as `x` and `y` do (individually).
-
-        See <https://github.com/WebAssembly/simd/pull/127> for background info.
-            "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
     let IntTo = &TypeVar::new(
         "IntTo",
         "A larger integer type with the same number of lanes",
diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle
index fca811ea82..3eec28c118 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -204,15 +204,19 @@
 
 ;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
+;; special case for the `i16x8.extadd_pairwise_i8x16_s` wasm instruction
 (rule (lower (has_type $I16X8 (iadd_pairwise (swiden_low x) (swiden_high x))))
       (saddlp8 x))
 
+;; special case for the `i32x4.extadd_pairwise_i16x8_s` wasm instruction
 (rule (lower (has_type $I32X4 (iadd_pairwise (swiden_low x) (swiden_high x))))
       (saddlp16 x))
 
+;; special case for the `i16x8.extadd_pairwise_i8x16_u` wasm instruction
 (rule (lower (has_type $I16X8 (iadd_pairwise (uwiden_low x) (uwiden_high x))))
       (uaddlp8 x))
 
+;; special case for the `i32x4.extadd_pairwise_i16x8_u` wasm instruction
 (rule (lower (has_type $I32X4 (iadd_pairwise (uwiden_low x) (uwiden_high x))))
       (uaddlp16 x))
 
@@ -2030,18 +2034,6 @@
       (let ((tmp Reg (fpu_move_from_vec x 1 (VectorSize.Size32x2))))
        (vec_extend (VecExtendOp.Uxtl) tmp $false (lane_size ty))))
 
-;;;; Rules for `widening_pairwise_dot_product_s` ;;;;;;;;;;;;;;;;;;;;;;
-
-;; The args have type I16X8.
-;; "dst = i32x4.dot_i16x8_s(x, y)"
-;; => smull  tmp, x, y
-;;    smull2 dst, x, y
-;;    addp   dst, tmp, dst
-(rule (lower (has_type $I32X4 (widening_pairwise_dot_product_s x y)))
-      (let ((tmp Reg (vec_rrr_long (VecRRRLongOp.Smull16) x y $false))
-            (dst Reg (vec_rrr_long (VecRRRLongOp.Smull16) x y $true)))
-       (vec_rrr (VecALUOp.Addp) tmp dst (VectorSize.Size32x4))))
-
 ;;;; Rules for `Fence` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (fence))
diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle
index ed725d1dc3..84e10b4fda 100644
--- a/cranelift/codegen/src/isa/s390x/lower.isle
+++ b/cranelift/codegen/src/isa/s390x/lower.isle
@@ -138,6 +138,14 @@
                              (vec_add ty x (vec_lshr_by_byte x size))
                              (vec_add ty y (vec_lshr_by_byte y size)))))
 
+;; special case for the `i32x4.dot_i16x8_s` wasm instruction
+(rule 1 (lower
+        (has_type dst_ty (iadd_pairwise
+                           (imul (swiden_low x @ (value_type src_ty)) (swiden_low y))
+                           (imul (swiden_high x) (swiden_high y)))))
+      (vec_add dst_ty (vec_smul_even src_ty x y)
+                      (vec_smul_odd src_ty x y)))
+
 
 ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
@@ -484,15 +492,6 @@
         (mov_to_vec128 $I64X2 res_0 res_1)))
 
 
-;;;; Rules for `widening_pairwise_dot_product_s` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Widening pairwise dot product of two vector registers.
-(rule (lower (has_type dst_ty (widening_pairwise_dot_product_s
-                                 x @ (value_type src_ty) y)))
-      (vec_add dst_ty (vec_smul_even src_ty x y)
-                      (vec_smul_odd src_ty x y)))
-
-
 ;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; Fixed-point multiplication of two vector registers.
diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle
index dcc8bd1f0f..84f9f3ef80 100644
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -2147,12 +2147,6 @@
 (rule (lower (debugtrap))
       (side_effect (x64_hlt)))
 
-;; Rules for `widening_pairwise_dot_product_s` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (has_type $I32X4
-                       (widening_pairwise_dot_product_s x y)))
-      (x64_pmaddwd x y))
-
 ;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type $F32 (fadd x y)))
@@ -3179,6 +3173,7 @@
 
 ;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
+;; special case for the `i16x8.extadd_pairwise_i8x16_s` wasm instruction
 (rule (lower
         (has_type $I16X8 (iadd_pairwise
                            (swiden_low val @ (value_type $I8X16))
@@ -3186,6 +3181,7 @@
       (let ((mul_const Xmm (x64_xmm_load_const $I8X16 (iadd_pairwise_mul_const_16))))
         (x64_pmaddubsw mul_const val)))
 
+;; special case for the `i32x4.extadd_pairwise_i16x8_s` wasm instruction
 (rule (lower
         (has_type $I32X4 (iadd_pairwise
                            (swiden_low val @ (value_type $I16X8))
@@ -3193,6 +3189,7 @@
       (let ((mul_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_mul_const_32))))
         (x64_pmaddwd val mul_const)))
 
+;; special case for the `i16x8.extadd_pairwise_i8x16_u` wasm instruction
 (rule (lower
         (has_type $I16X8 (iadd_pairwise
                            (uwiden_low val @ (value_type $I8X16))
@@ -3200,6 +3197,7 @@
       (let ((mul_const Xmm (x64_xmm_load_const $I8X16 (iadd_pairwise_mul_const_16))))
         (x64_pmaddubsw val mul_const)))
 
+;; special case for the `i32x4.extadd_pairwise_i16x8_u` wasm instruction
 (rule (lower
         (has_type $I32X4 (iadd_pairwise
                            (uwiden_low val @ (value_type $I16X8))
@@ -3213,6 +3211,13 @@
             (addd_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_addd_const_32))))
         (x64_paddd dst addd_const)))
 
+;; special case for the `i32x4.dot_i16x8_s` wasm instruction
+(rule (lower
+        (has_type $I32X4 (iadd_pairwise
+                           (imul (swiden_low x) (swiden_low y))
+                           (imul (swiden_high x) (swiden_high y)))))
+      (x64_pmaddwd x y))
+
 ;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type $I16X8 (swiden_low val @ (value_type $I8X16))))
diff --git a/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif b/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif
index bb8da42fe5..960477cbbd 100644
--- a/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif
@@ -1295,8 +1295,14 @@ block0(v0: i8x16, v1: i8x16):
 
 function %widening_pairwise_dot_product_s_i16x8(i16x8, i16x8) -> i32x4 {
 block0(v0: i16x8, v1: i16x8):
-  v2 = widening_pairwise_dot_product_s v0, v1
-  return v2
+    v2 = swiden_low v0
+    v3 = swiden_low v1
+    v4 = imul v2, v3
+    v5 = swiden_high v0
+    v6 = swiden_high v1
+    v7 = imul v5, v6
+    v8 = iadd_pairwise v4, v7
+    return v8
 }
 
 ; VCode:
diff --git a/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif b/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif
index c38099c429..205f431811 100644
--- a/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif
+++ b/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif
@@ -1,4 +1,3 @@
-test interpret
 test run
 target aarch64
 target s390x
@@ -7,8 +6,14 @@ target x86_64 has_sse3 has_ssse3 has_sse41
 
 function %wpdps(i16x8, i16x8) -> i32x4 {
 block0(v0: i16x8, v1: i16x8):
-    v2 = widening_pairwise_dot_product_s v0, v1
-    return v2
+    v2 = swiden_low v0
+    v3 = swiden_low v1
+    v4 = imul v2, v3
+    v5 = swiden_high v0
+    v6 = swiden_high v1
+    v7 = imul v5, v6
+    v8 = iadd_pairwise v4, v7
+    return v8
 }
 ; run: %wpdps([1 2 3 4 5 6 7 8], [8000 7000 6000 5000 4000 3000 2000 1000]) == [22000 38000 38000 22000]
 ; run: %wpdps([1 -2 3 -4 5 -6 7 -8], [32767 32767 32767 32767 -32768 -32768 -32768 -32768]) == [-32767 -32767 32768 32768]
diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
index e64d90838a..d4da0a2d2c 100644
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -1308,26 +1308,6 @@ where
             // actually need to emit a fence here.
             ControlFlow::Continue
         }
-        Opcode::WideningPairwiseDotProductS => {
-            let ctrl_ty = types::I16X8;
-            let new_type = ctrl_ty.merge_lanes().unwrap();
-            let arg0 = extractlanes(&arg(0)?, ctrl_ty)?;
-            let arg1 = extractlanes(&arg(1)?, ctrl_ty)?;
-            let new_vec = arg0
-                .chunks(2)
-                .into_iter()
-                .zip(arg1.chunks(2))
-                .into_iter()
-                .map(|(x, y)| {
-                    let mut z = 0i128;
-                    for (lhs, rhs) in x.into_iter().zip(y.into_iter()) {
-                        z += lhs.clone().into_int()? * rhs.clone().into_int()?;
-                    }
-                    Value::int(z, new_type.lane_type())
-                })
-                .collect::<ValueResult<Vec<_>>>()?;
-            assign(vectorizelanes(&new_vec, new_type)?)
-        }
         Opcode::SqmulRoundSat => {
             let lane_type = ctrl_ty.lane_type();
             let double_width = ctrl_ty.double_width().unwrap().lane_type();
diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs
index fc5c05c66d..94301f9327 100644
--- a/cranelift/wasm/src/code_translator.rs
+++ b/cranelift/wasm/src/code_translator.rs
@@ -2059,7 +2059,13 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         }
         Operator::I32x4DotI16x8S => {
             let (a, b) = pop2_with_bitcast(state, I16X8, builder);
-            state.push1(builder.ins().widening_pairwise_dot_product_s(a, b));
+            let alow = builder.ins().swiden_low(a);
+            let blow = builder.ins().swiden_low(b);
+            let low = builder.ins().imul(alow, blow);
+            let ahigh = builder.ins().swiden_high(a);
+            let bhigh = builder.ins().swiden_high(b);
+            let high = builder.ins().imul(ahigh, bhigh);
+            state.push1(builder.ins().iadd_pairwise(low, high));
         }
         Operator::I8x16Popcnt => {
             let arg = pop1_with_bitcast(state, type_of(op), builder);