[AArch64] Port min/max to ISLE (#4374)

2022-07-05 17:16:45 +01:00
parent e91f493ff5
commit d9e0e6a6a9
6 changed files with 511 additions and 28 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -925,6 +925,7 @@
 (rule (vector_size (multi_lane 8 16)) (VectorSize.Size8x16))
 (rule (vector_size (multi_lane 16 4)) (VectorSize.Size16x4))
 (rule (vector_size (multi_lane 16 8)) (VectorSize.Size16x8))
+(rule (vector_size (multi_lane 32 2)) (VectorSize.Size32x2))
 (rule (vector_size (multi_lane 32 4)) (VectorSize.Size32x4))
 (rule (vector_size (multi_lane 64 2)) (VectorSize.Size64x2))

--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -3576,6 +3576,18 @@ fn test_aarch64_binemit() {
        "sshl v8.2d, v22.2d, v2.2d",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umin,
+            rd: writable_vreg(0),
+            rn: vreg(11),
+            rm: vreg(2),
+            size: VectorSize::Size8x8,
+        },
+        "606D222E",
+        "umin v0.8b, v11.8b, v2.8b",
+    ));
+
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umin,
@@ -3588,6 +3600,18 @@ fn test_aarch64_binemit() {
        "umin v1.16b, v12.16b, v3.16b",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umin,
+            rd: writable_vreg(29),
+            rn: vreg(19),
+            rm: vreg(9),
+            size: VectorSize::Size16x4,
+        },
+        "7D6E692E",
+        "umin v29.4h, v19.4h, v9.4h",
+    ));
+
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umin,
@@ -3600,6 +3624,18 @@ fn test_aarch64_binemit() {
        "umin v30.8h, v20.8h, v10.8h",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umin,
+            rd: writable_vreg(7),
+            rn: vreg(21),
+            rm: vreg(20),
+            size: VectorSize::Size32x2,
+        },
+        "A76EB42E",
+        "umin v7.2s, v21.2s, v20.2s",
+    ));
+
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umin,
@@ -3612,6 +3648,18 @@ fn test_aarch64_binemit() {
        "umin v8.4s, v22.4s, v21.4s",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smin,
+            rd: writable_vreg(2),
+            rn: vreg(13),
+            rm: vreg(4),
+            size: VectorSize::Size8x8,
+        },
+        "A26D240E",
+        "smin v2.8b, v13.8b, v4.8b",
+    ));
+
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smin,
@@ -3624,6 +3672,18 @@ fn test_aarch64_binemit() {
        "smin v1.16b, v12.16b, v3.16b",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smin,
+            rd: writable_vreg(3),
+            rn: vreg(2),
+            rm: vreg(1),
+            size: VectorSize::Size16x4,
+        },
+        "436C610E",
+        "smin v3.4h, v2.4h, v1.4h",
+    ));
+
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smin,
@@ -3636,6 +3696,18 @@ fn test_aarch64_binemit() {
        "smin v30.8h, v20.8h, v10.8h",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smin,
+            rd: writable_vreg(9),
+            rn: vreg(22),
+            rm: vreg(20),
+            size: VectorSize::Size32x2,
+        },
+        "C96EB40E",
+        "smin v9.2s, v22.2s, v20.2s",
+    ));
+
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smin,
@@ -3660,6 +3732,30 @@ fn test_aarch64_binemit() {
        "umax v6.8b, v9.8b, v8.8b",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umax,
+            rd: writable_vreg(5),
+            rn: vreg(15),
+            rm: vreg(8),
+            size: VectorSize::Size8x16,
+        },
+        "E565286E",
+        "umax v5.16b, v15.16b, v8.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umax,
+            rd: writable_vreg(12),
+            rn: vreg(14),
+            rm: vreg(3),
+            size: VectorSize::Size16x4,
+        },
+        "CC65632E",
+        "umax v12.4h, v14.4h, v3.4h",
+    ));
+
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umax,
@@ -3672,6 +3768,18 @@ fn test_aarch64_binemit() {
        "umax v11.8h, v13.8h, v2.8h",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umax,
+            rd: writable_vreg(9),
+            rn: vreg(13),
+            rm: vreg(15),
+            size: VectorSize::Size32x2,
+        },
+        "A965AF2E",
+        "umax v9.2s, v13.2s, v15.2s",
+    ));
+
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Umax,
@@ -3684,6 +3792,18 @@ fn test_aarch64_binemit() {
        "umax v8.4s, v12.4s, v14.4s",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smax,
+            rd: writable_vreg(7),
+            rn: vreg(8),
+            rm: vreg(9),
+            size: VectorSize::Size8x8,
+        },
+        "0765290E",
+        "smax v7.8b, v8.8b, v9.8b",
+    ));
+
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smax,
@@ -3696,6 +3816,18 @@ fn test_aarch64_binemit() {
        "smax v6.16b, v9.16b, v8.16b",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smax,
+            rd: writable_vreg(11),
+            rn: vreg(12),
+            rm: vreg(13),
+            size: VectorSize::Size16x4,
+        },
+        "8B656D0E",
+        "smax v11.4h, v12.4h, v13.4h",
+    ));
+
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smax,
@@ -3708,6 +3840,18 @@ fn test_aarch64_binemit() {
        "smax v11.8h, v13.8h, v2.8h",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Smax,
+            rd: writable_vreg(14),
+            rn: vreg(16),
+            rm: vreg(18),
+            size: VectorSize::Size32x2,
+        },
+        "0E66B20E",
+        "smax v14.2s, v16.2s, v18.2s",
+    ));
+
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Smax,
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -499,6 +499,20 @@
            (result Reg (msub $I64 div y64 x64)))
        result))

+;;; Rules for integer min/max: umin, imin, umax, imax ;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty @ (not_i64x2) (imin x y)))
+      (vec_rrr (VecALUOp.Smin) x y (vector_size ty)))
+
+(rule (lower (has_type ty @ (not_i64x2) (umin x y)))
+      (vec_rrr (VecALUOp.Umin) x y (vector_size ty)))
+
+(rule (lower (has_type ty @ (not_i64x2) (imax x y)))
+      (vec_rrr (VecALUOp.Smax) x y (vector_size ty)))
+
+(rule (lower (has_type ty @ (not_i64x2) (umax x y)))
+      (vec_rrr (VecALUOp.Umax) x y (vector_size ty)))
+
 ;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; General rule for extending input to an output which fits in a single
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1245,34 +1245,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            ctx.emit(Inst::gen_move(dst.regs()[1], src_hi, I64));
        }

-        Opcode::Imax | Opcode::Umax | Opcode::Umin | Opcode::Imin => {
-            let ty = ty.unwrap();
-
-            if !ty.is_vector() || ty.lane_bits() == 64 {
-                return Err(CodegenError::Unsupported(format!(
-                    "{}: Unsupported type: {:?}",
-                    op, ty
-                )));
-            }
-
-            let alu_op = match op {
-                Opcode::Umin => VecALUOp::Umin,
-                Opcode::Imin => VecALUOp::Smin,
-                Opcode::Umax => VecALUOp::Umax,
-                Opcode::Imax => VecALUOp::Smax,
-                _ => unreachable!(),
-            };
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-            ctx.emit(Inst::VecRRR {
-                alu_op,
-                rd,
-                rn,
-                rm,
-                size: VectorSize::from_ty(ty),
-            });
-        }
+        Opcode::Imax | Opcode::Umax | Opcode::Umin | Opcode::Imin => implemented_in_isle(ctx),

        Opcode::IaddPairwise => implemented_in_isle(ctx),