[AArch64] Port IaddPairwise to ISLE (#4201)

2022-06-06 15:37:13 +01:00
parent 7148882867
commit acfeda4d80
7 changed files with 150 additions and 50 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -920,7 +920,9 @@

 ;; Helper for calculating the `VectorSize` corresponding to a type
 (decl vector_size (Type) VectorSize)
+(rule (vector_size (multi_lane 8 8)) (VectorSize.Size8x8))
 (rule (vector_size (multi_lane 8 16)) (VectorSize.Size8x16))
+(rule (vector_size (multi_lane 16 4)) (VectorSize.Size16x4))
 (rule (vector_size (multi_lane 16 8)) (VectorSize.Size16x8))
 (rule (vector_size (multi_lane 32 4)) (VectorSize.Size32x4))
 (rule (vector_size (multi_lane 64 2)) (VectorSize.Size64x2))
@@ -1540,6 +1542,13 @@
            (_ Unit (emit (MInst.VecRRRLong op dst src1 src2 high_half))))
        dst))

+;; Helper for emitting `MInst.VecRRPairLong` instructions.
+(decl vec_rr_pair_long (VecRRPairLongOp Reg) Reg)
+(rule (vec_rr_pair_long op src)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.VecRRPairLong op dst src))))
+        dst))
+
 ;; Helper for emitting `MInst.VecRRRLong` instructions, but for variants
 ;; where the operation both reads and modifies the destination register.
 ;;
@@ -1729,6 +1738,20 @@
 (decl shll32 (Reg bool) Reg)
 (rule (shll32 x high_half) (vec_rr_long (VecRRLongOp.Shll32) x high_half))

+;; Helpers for generating `addlp` instructions.
+
+(decl saddlp8 (Reg) Reg)
+(rule (saddlp8 x) (vec_rr_pair_long (VecRRPairLongOp.Saddlp8) x))
+
+(decl saddlp16 (Reg) Reg)
+(rule (saddlp16 x) (vec_rr_pair_long (VecRRPairLongOp.Saddlp16) x))
+
+(decl uaddlp8 (Reg) Reg)
+(rule (uaddlp8 x) (vec_rr_pair_long (VecRRPairLongOp.Uaddlp8) x))
+
+(decl uaddlp16 (Reg) Reg)
+(rule (uaddlp16 x) (vec_rr_pair_long (VecRRPairLongOp.Uaddlp16) x))
+
 ;; Helper for generating `umlal32` instructions.
 (decl umlal32 (Reg Reg Reg bool) Reg)
 (rule (umlal32 x y z high_half) (vec_rrrr_long (VecRRRLongOp.Umlal32) x y z high_half))
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -99,6 +99,27 @@
          (add_with_flags_paired $I64 x_lo y_lo)
          (adc_paired $I64 x_hi y_hi))))

+;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $I16X8 (iadd_pairwise (swiden_low x) (swiden_high y))))
+      (if-let z (same_value x y))
+      (saddlp8 z))
+
+(rule (lower (has_type $I32X4 (iadd_pairwise (swiden_low x) (swiden_high y))))
+      (if-let z (same_value x y))
+      (saddlp16 z))
+
+(rule (lower (has_type $I16X8 (iadd_pairwise (uwiden_low x) (uwiden_high y))))
+      (if-let z (same_value x y))
+      (uaddlp8 z))
+
+(rule (lower (has_type $I32X4 (iadd_pairwise (uwiden_low x) (uwiden_high y))))
+      (if-let z (same_value x y))
+      (uaddlp16 z))
+
+(rule (lower (has_type ty (iadd_pairwise x y)))
+      (addp x y (vector_size ty)))
+
 ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; `i64` and smaller
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1357,56 +1357,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            });
        }

-        Opcode::IaddPairwise => {
-            let ty = ty.unwrap();
-            let lane_type = ty.lane_type();
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-            let mut match_long_pair = |ext_low_op, ext_high_op| -> Option<(VecRRPairLongOp, Reg)> {
-                if let Some(lhs) = maybe_input_insn(ctx, inputs[0], ext_low_op) {
-                    if let Some(rhs) = maybe_input_insn(ctx, inputs[1], ext_high_op) {
-                        let lhs_inputs = insn_inputs(ctx, lhs);
-                        let rhs_inputs = insn_inputs(ctx, rhs);
-                        let low = put_input_in_reg(ctx, lhs_inputs[0], NarrowValueMode::None);
-                        let high = put_input_in_reg(ctx, rhs_inputs[0], NarrowValueMode::None);
-                        if low == high {
-                            match (lane_type, ext_low_op) {
-                                (I16, Opcode::SwidenLow) => {
-                                    return Some((VecRRPairLongOp::Saddlp8, low))
-                                }
-                                (I32, Opcode::SwidenLow) => {
-                                    return Some((VecRRPairLongOp::Saddlp16, low))
-                                }
-                                (I16, Opcode::UwidenLow) => {
-                                    return Some((VecRRPairLongOp::Uaddlp8, low))
-                                }
-                                (I32, Opcode::UwidenLow) => {
-                                    return Some((VecRRPairLongOp::Uaddlp16, low))
-                                }
-                                _ => (),
-                            };
-                        }
-                    }
-                }
-                None
-            };
-
-            if let Some((op, rn)) = match_long_pair(Opcode::SwidenLow, Opcode::SwidenHigh) {
-                ctx.emit(Inst::VecRRPairLong { op, rd, rn });
-            } else if let Some((op, rn)) = match_long_pair(Opcode::UwidenLow, Opcode::UwidenHigh) {
-                ctx.emit(Inst::VecRRPairLong { op, rd, rn });
-            } else {
-                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-                let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-                ctx.emit(Inst::VecRRR {
-                    alu_op: VecALUOp::Addp,
-                    rd,
-                    rn,
-                    rm,
-                    size: VectorSize::from_ty(ty),
-                });
-            }
-        }
+        Opcode::IaddPairwise => implemented_in_isle(ctx),

        Opcode::WideningPairwiseDotProductS => {
            let r_y = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
--- a/cranelift/codegen/src/machinst/isle.rs
+++ b/cranelift/codegen/src/machinst/isle.rs
@@ -29,6 +29,15 @@ pub type BoxExternalName = Box<ExternalName>;
 #[doc(hidden)]
 macro_rules! isle_prelude_methods {
    () => {
+        #[inline]
+        fn same_value(&mut self, a: Value, b: Value) -> Option<Value> {
+            if a == b {
+                Some(a)
+            } else {
+                None
+            }
+        }
+
        #[inline]
        fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) {
            let [a, b] = *arr;
--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -371,6 +371,10 @@
 (extractor (unwrap_head_value_list_2 head1 head2 tail)
           (value_list_slice (value_slice_unwrap head1 (value_slice_unwrap head2 tail))))

+;; Constructor to test whether two values are same.
+(decl pure same_value (Value Value) Value)
+(extern constructor same_value same_value)
+
 ;; Turn a `Writable<Reg>` into a `Reg` via `Writable::to_reg`.
 (decl writable_reg_to_reg (WritableReg) Reg)
 (extern constructor writable_reg_to_reg writable_reg_to_reg)
--- a/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif
@@ -107,3 +107,53 @@ block0(v0: i8x16):
 ;   addp v0.8h, v2.8h, v4.8h
 ;   ret

+function %fn9(i8x8, i8x8) -> i8x8 {
+block0(v0: i8x8, v1: i8x8):
+  v2 = iadd_pairwise v0, v1
+  return v2
+}
+
+; block0:
+;   addp v0.8b, v0.8b, v1.8b
+;   ret
+
+function %fn10(i8x16, i8x16) -> i8x16 {
+block0(v0: i8x16, v1: i8x16):
+  v2 = iadd_pairwise v0, v1
+  return v2
+}
+
+; block0:
+;   addp v0.16b, v0.16b, v1.16b
+;   ret
+
+function %fn11(i16x4, i16x4) -> i16x4 {
+block0(v0: i16x4, v1: i16x4):
+  v2 = iadd_pairwise v0, v1
+  return v2
+}
+
+; block0:
+;   addp v0.4h, v0.4h, v1.4h
+;   ret
+
+function %fn12(i16x8, i16x8) -> i16x8 {
+block0(v0: i16x8, v1: i16x8):
+  v2 = iadd_pairwise v0, v1
+  return v2
+}
+
+; block0:
+;   addp v0.8h, v0.8h, v1.8h
+;   ret
+
+function %fn14(i32x4, i32x4) -> i32x4 {
+block0(v0: i32x4, v1: i32x4):
+  v2 = iadd_pairwise v0, v1
+  return v2
+}
+
+; block0:
+;   addp v0.4s, v0.4s, v1.4s
+;   ret
+
--- a/cranelift/filetests/filetests/runtests/simd-iaddpairwise.clif
+++ b/cranelift/filetests/filetests/runtests/simd-iaddpairwise.clif
@@ -23,3 +23,45 @@ block0(v0: i32x4, v1: i32x4):
 }
 ; run: %iaddp_i32x4([1 2 3 4], [5 6 7 8]) == [3 7 11 15]
 ; run: %iaddp_i32x4([4294967290 5 4294967290 5], [100 100 100 100]) == [4294967295 4294967295 200 200]
+
+function %swiden_i8x16(i8x16) -> i16x8 {
+block0(v0: i8x16):
+  v1 = swiden_low v0
+  v2 = swiden_high v0
+  v3 = iadd_pairwise v1, v2
+  return v3
+}
+; run: %swiden_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [3 7 11 15 19 23 27 31]
+; run: %swiden_i8x16([-1 2 -3 4 -5 6 -7 8 -9 10 -11 12 -13 14 -15 16]) == [1 1 1 1 1 1 1 1]
+; run: %swiden_i8x16([127 1 126 2 125 3 124 4 123 5 122 6 121 7 120 8]) == [128 128 128 128 128 128 128 128]
+
+function %uwiden_i8x16(i8x16) -> i16x8 {
+block0(v0: i8x16):
+  v1 = uwiden_low v0
+  v2 = uwiden_high v0
+  v3 = iadd_pairwise v1, v2
+  return v3
+}
+; run: %uwiden_i8x16([17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32]) == [35 39 43 47 51 55 59 63]
+; run: %uwiden_i8x16([2 254 3 253 4 252 5 251 6 250 7 249 8 248 9 247]) == [256 256 256 256 256 256 256 256]
+
+function %swiden_i16x8(i16x8) -> i32x4 {
+block0(v0: i16x8):
+  v1 = swiden_low v0
+  v2 = swiden_high v0
+  v3 = iadd_pairwise v1, v2
+  return v3
+}
+; run: %swiden_i16x8([1 2 3 4 5 6 7 8]) == [3 7 11 15]
+; run: %swiden_i16x8([32767 1 32766 3 32765 5 32764 8]) == [32768 32769 32770 32772]
+; run: %swiden_i16x8([-32768 -1 32766 3 32765 5 -32764 -8]) == [-32769 32769 32770 -32772]
+
+function %uwiden_i16x8(i16x8) -> i32x4 {
+block0(v0: i16x8):
+  v1 = uwiden_low v0
+  v2 = uwiden_high v0
+  v3 = iadd_pairwise v1, v2
+  return v3
+}
+; run: %uwiden_i16x8([100 99 98 97 96 95 94 93]) == [199 195 191 187]
+; run: %uwiden_i16x8([65535 1 65534 3 65533 5 65532 8]) == [65536 65537 65538 65540]