aarch64: Implement ireduce/breduce in ISLE (#4331)

* aarch64: Implement `ireduce`/`breduce` in ISLE * cranelift: Remove vector versions of `breduce`/`ireduce`
2022-06-30 19:15:47 +01:00
parent d1446f767d
commit 919604b8c5
3 changed files with 43 additions and 41 deletions
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -3273,20 +3273,14 @@ pub(crate) fn define(

    let Bool = &TypeVar::new(
        "Bool",
-        "A scalar or vector boolean type",
-        TypeSetBuilder::new()
-            .bools(Interval::All)
-            .simd_lanes(Interval::All)
-            .build(),
+        "A scalar boolean type",
+        TypeSetBuilder::new().bools(Interval::All).build(),
    );

    let BoolTo = &TypeVar::new(
        "BoolTo",
-        "A smaller boolean type with the same number of lanes",
-        TypeSetBuilder::new()
-            .bools(Interval::All)
-            .simd_lanes(Interval::All)
-            .build(),
+        "A smaller boolean type",
+        TypeSetBuilder::new().bools(Interval::All).build(),
    );

    let x = &Operand::new("x", Bool);
@@ -3296,11 +3290,7 @@ pub(crate) fn define(
        Inst::new(
            "breduce",
            r#"
-        Convert `x` to a smaller boolean type in the platform-defined way.
-
-        The result type must have the same number of vector lanes as the input,
-        and each lane must not have more bits that the input lanes. If the
-        input and output types are the same, this is a no-op.
+        Convert `x` to a smaller boolean type by discarding the most significant bits.
        "#,
            &formats.unary,
        )
@@ -3308,6 +3298,14 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

+    let Bool = &TypeVar::new(
+        "Bool",
+        "A scalar or vector boolean type",
+        TypeSetBuilder::new()
+            .bools(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
    let BoolTo = &TypeVar::new(
        "BoolTo",
        "A larger boolean type with the same number of lanes",
@@ -3385,20 +3383,14 @@ pub(crate) fn define(

    let Int = &TypeVar::new(
        "Int",
-        "A scalar or vector integer type",
-        TypeSetBuilder::new()
-            .ints(Interval::All)
-            .simd_lanes(Interval::All)
-            .build(),
+        "A scalar integer type",
+        TypeSetBuilder::new().ints(Interval::All).build(),
    );

    let IntTo = &TypeVar::new(
        "IntTo",
-        "A smaller integer type with the same number of lanes",
-        TypeSetBuilder::new()
-            .ints(Interval::All)
-            .simd_lanes(Interval::All)
-            .build(),
+        "A smaller integer type",
+        TypeSetBuilder::new().ints(Interval::All).build(),
    );
    let x = &Operand::new("x", Int);
    let a = &Operand::new("a", IntTo);
@@ -3407,15 +3399,10 @@ pub(crate) fn define(
        Inst::new(
            "ireduce",
            r#"
-        Convert `x` to a smaller integer type by dropping high bits.
+        Convert `x` to a smaller integer type by discarding
+        the most significant bits.

-        Each lane in `x` is converted to a smaller integer type by discarding
-        the most significant bits. This is the same as reducing modulo
-        `2^n`.
-
-        The result type must have the same number of vector lanes as the input,
-        and each lane must not have more bits that the input lanes. If the
-        input and output types are the same, this is a no-op.
+        This is the same as reducing modulo `2^n`.
        "#,
            &formats.unary,
        )
@@ -3855,6 +3842,14 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

+    let Int = &TypeVar::new(
+        "Int",
+        "A scalar or vector integer type",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
    let x = &Operand::new("x", Int);
    let a = &Operand::new("a", FloatTo);

--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -1193,6 +1193,20 @@
 (rule (lower (has_type (ty_vec128 ty) (vselect c x y)))
        (bsl ty c x y))

+;;;; Rules for `ireduce` / `breduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; T -> I{64,32,16,8}: We can simply pass through the value: values
+;; are always stored with high bits undefined, so we can just leave
+;; them be.
+(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (ireduce src)))
+    (value_regs_get src 0))
+
+;; Likewise for breduce.
+
+(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (breduce src)))
+      (value_regs_get src 0))
+
+
 ;;;; Rules for `fcmp` 32 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f32const (zero_value_f32 y))))))
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -423,14 +423,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            ctx.emit(Inst::gen_move(rd, rn, ty));
        }

-        Opcode::Breduce | Opcode::Ireduce => {
-            // Smaller integers/booleans are stored with high-order bits
-            // undefined, so we can simply do a copy.
-            let rn = put_input_in_regs(ctx, inputs[0]).regs()[0];
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let ty = ctx.input_ty(insn, 0);
-            ctx.emit(Inst::gen_move(rd, rn, ty));
-        }
+        Opcode::Breduce | Opcode::Ireduce => implemented_in_isle(ctx),

        Opcode::Bextend | Opcode::Bmask => {
            // Bextend and Bmask both simply sign-extend. This works for: