Remove the widening_pairwise_dot_product_s clif instruction (#5889)

This was added for the wasm SIMD proposal but I've been poking around at this recently and the instruction can instead be represented by its component parts with the same semantics I believe. This commit removes the instruction and instead represents it with the existing `iadd_pairwise` instruction (among others) and updates backends to with new pattern matches to have the same codegen as before. This interestingly entirely removed the codegen rule with no replacement on the AArch64 backend as the existing rules all existed to produce the same codegen.
2023-02-27 12:43:43 -06:00
parent 6cf7155052
commit 9b86a0b9b1
8 changed files with 46 additions and 88 deletions
--- a/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif
@@ -1295,8 +1295,14 @@ block0(v0: i8x16, v1: i8x16):

 function %widening_pairwise_dot_product_s_i16x8(i16x8, i16x8) -> i32x4 {
 block0(v0: i16x8, v1: i16x8):
-  v2 = widening_pairwise_dot_product_s v0, v1
-  return v2
+    v2 = swiden_low v0
+    v3 = swiden_low v1
+    v4 = imul v2, v3
+    v5 = swiden_high v0
+    v6 = swiden_high v1
+    v7 = imul v5, v6
+    v8 = iadd_pairwise v4, v7
+    return v8
 }

 ; VCode:
--- a/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif
+++ b/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif
@@ -1,4 +1,3 @@
-test interpret
 test run
 target aarch64
 target s390x
@@ -7,8 +6,14 @@ target x86_64 has_sse3 has_ssse3 has_sse41

 function %wpdps(i16x8, i16x8) -> i32x4 {
 block0(v0: i16x8, v1: i16x8):
-    v2 = widening_pairwise_dot_product_s v0, v1
-    return v2
+    v2 = swiden_low v0
+    v3 = swiden_low v1
+    v4 = imul v2, v3
+    v5 = swiden_high v0
+    v6 = swiden_high v1
+    v7 = imul v5, v6
+    v8 = iadd_pairwise v4, v7
+    return v8
 }
 ; run: %wpdps([1 2 3 4 5 6 7 8], [8000 7000 6000 5000 4000 3000 2000 1000]) == [22000 38000 38000 22000]
 ; run: %wpdps([1 -2 3 -4 5 -6 7 -8], [32767 32767 32767 32767 -32768 -32768 -32768 -32768]) == [-32767 -32767 32768 32768]