Remove the widening_pairwise_dot_product_s clif instruction (#5889)

This was added for the wasm SIMD proposal but I've been poking around at this recently and the instruction can instead be represented by its component parts with the same semantics I believe. This commit removes the instruction and instead represents it with the existing `iadd_pairwise` instruction (among others) and updates backends to with new pattern matches to have the same codegen as before. This interestingly entirely removed the codegen rule with no replacement on the AArch64 backend as the existing rules all existed to produce the same codegen.
2023-02-27 12:43:43 -06:00
parent 6cf7155052
commit 9b86a0b9b1
8 changed files with 46 additions and 88 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -204,15 +204,19 @@

 ;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

+;; special case for the `i16x8.extadd_pairwise_i8x16_s` wasm instruction
 (rule (lower (has_type $I16X8 (iadd_pairwise (swiden_low x) (swiden_high x))))
      (saddlp8 x))

+;; special case for the `i32x4.extadd_pairwise_i16x8_s` wasm instruction
 (rule (lower (has_type $I32X4 (iadd_pairwise (swiden_low x) (swiden_high x))))
      (saddlp16 x))

+;; special case for the `i16x8.extadd_pairwise_i8x16_u` wasm instruction
 (rule (lower (has_type $I16X8 (iadd_pairwise (uwiden_low x) (uwiden_high x))))
      (uaddlp8 x))

+;; special case for the `i32x4.extadd_pairwise_i16x8_u` wasm instruction
 (rule (lower (has_type $I32X4 (iadd_pairwise (uwiden_low x) (uwiden_high x))))
      (uaddlp16 x))

@@ -2030,18 +2034,6 @@
      (let ((tmp Reg (fpu_move_from_vec x 1 (VectorSize.Size32x2))))
       (vec_extend (VecExtendOp.Uxtl) tmp $false (lane_size ty))))

-;;;; Rules for `widening_pairwise_dot_product_s` ;;;;;;;;;;;;;;;;;;;;;;
-
-;; The args have type I16X8.
-;; "dst = i32x4.dot_i16x8_s(x, y)"
-;; => smull  tmp, x, y
-;;    smull2 dst, x, y
-;;    addp   dst, tmp, dst
-(rule (lower (has_type $I32X4 (widening_pairwise_dot_product_s x y)))
-      (let ((tmp Reg (vec_rrr_long (VecRRRLongOp.Smull16) x y $false))
-            (dst Reg (vec_rrr_long (VecRRRLongOp.Smull16) x y $true)))
-       (vec_rrr (VecALUOp.Addp) tmp dst (VectorSize.Size32x4))))
-
 ;;;; Rules for `Fence` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (fence))
--- a/cranelift/codegen/src/isa/s390x/lower.isle
+++ b/cranelift/codegen/src/isa/s390x/lower.isle
@@ -138,6 +138,14 @@
                             (vec_add ty x (vec_lshr_by_byte x size))
                             (vec_add ty y (vec_lshr_by_byte y size)))))

+;; special case for the `i32x4.dot_i16x8_s` wasm instruction
+(rule 1 (lower
+        (has_type dst_ty (iadd_pairwise
+                           (imul (swiden_low x @ (value_type src_ty)) (swiden_low y))
+                           (imul (swiden_high x) (swiden_high y)))))
+      (vec_add dst_ty (vec_smul_even src_ty x y)
+                      (vec_smul_odd src_ty x y)))
+

 ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -484,15 +492,6 @@
        (mov_to_vec128 $I64X2 res_0 res_1)))


-;;;; Rules for `widening_pairwise_dot_product_s` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Widening pairwise dot product of two vector registers.
-(rule (lower (has_type dst_ty (widening_pairwise_dot_product_s
-                                 x @ (value_type src_ty) y)))
-      (vec_add dst_ty (vec_smul_even src_ty x y)
-                      (vec_smul_odd src_ty x y)))
-
-
 ;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; Fixed-point multiplication of two vector registers.
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -2147,12 +2147,6 @@
 (rule (lower (debugtrap))
      (side_effect (x64_hlt)))

-;; Rules for `widening_pairwise_dot_product_s` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (has_type $I32X4
-                       (widening_pairwise_dot_product_s x y)))
-      (x64_pmaddwd x y))
-
 ;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (has_type $F32 (fadd x y)))
@@ -3179,6 +3173,7 @@

 ;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

+;; special case for the `i16x8.extadd_pairwise_i8x16_s` wasm instruction
 (rule (lower
        (has_type $I16X8 (iadd_pairwise
                           (swiden_low val @ (value_type $I8X16))
@@ -3186,6 +3181,7 @@
      (let ((mul_const Xmm (x64_xmm_load_const $I8X16 (iadd_pairwise_mul_const_16))))
        (x64_pmaddubsw mul_const val)))

+;; special case for the `i32x4.extadd_pairwise_i16x8_s` wasm instruction
 (rule (lower
        (has_type $I32X4 (iadd_pairwise
                           (swiden_low val @ (value_type $I16X8))
@@ -3193,6 +3189,7 @@
      (let ((mul_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_mul_const_32))))
        (x64_pmaddwd val mul_const)))

+;; special case for the `i16x8.extadd_pairwise_i8x16_u` wasm instruction
 (rule (lower
        (has_type $I16X8 (iadd_pairwise
                           (uwiden_low val @ (value_type $I8X16))
@@ -3200,6 +3197,7 @@
      (let ((mul_const Xmm (x64_xmm_load_const $I8X16 (iadd_pairwise_mul_const_16))))
        (x64_pmaddubsw val mul_const)))

+;; special case for the `i32x4.extadd_pairwise_i16x8_u` wasm instruction
 (rule (lower
        (has_type $I32X4 (iadd_pairwise
                           (uwiden_low val @ (value_type $I16X8))
@@ -3213,6 +3211,13 @@
            (addd_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_addd_const_32))))
        (x64_paddd dst addd_const)))

+;; special case for the `i32x4.dot_i16x8_s` wasm instruction
+(rule (lower
+        (has_type $I32X4 (iadd_pairwise
+                           (imul (swiden_low x) (swiden_low y))
+                           (imul (swiden_high x) (swiden_high y)))))
+      (x64_pmaddwd x y))
+
 ;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (has_type $I16X8 (swiden_low val @ (value_type $I8X16))))