aarch64: Implement ireduce/breduce in ISLE (#4331)

* aarch64: Implement `ireduce`/`breduce` in ISLE * cranelift: Remove vector versions of `breduce`/`ireduce`
2022-06-30 19:15:47 +01:00
parent d1446f767d
commit 919604b8c5
3 changed files with 43 additions and 41 deletions
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -3273,20 +3273,14 @@ pub(crate) fn define(
    let Bool = &TypeVar::new(
        "Bool",
-        "A scalar or vector boolean type",
+        "A scalar boolean type",
-        TypeSetBuilder::new()
+        TypeSetBuilder::new().bools(Interval::All).build(),
            .bools(Interval::All)
            .simd_lanes(Interval::All)
            .build(),
    );
    let BoolTo = &TypeVar::new(
        "BoolTo",
-        "A smaller boolean type with the same number of lanes",
+        "A smaller boolean type",
-        TypeSetBuilder::new()
+        TypeSetBuilder::new().bools(Interval::All).build(),
            .bools(Interval::All)
            .simd_lanes(Interval::All)
            .build(),
    );
    let x = &Operand::new("x", Bool);
@@ -3296,11 +3290,7 @@ pub(crate) fn define(
        Inst::new(
            "breduce",
            r#"
-        Convert `x` to a smaller boolean type in the platform-defined way.
+        Convert `x` to a smaller boolean type by discarding the most significant bits.
        The result type must have the same number of vector lanes as the input,
        and each lane must not have more bits that the input lanes. If the
        input and output types are the same, this is a no-op.
        "#,
            &formats.unary,
        )
@@ -3308,6 +3298,14 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );
    let Bool = &TypeVar::new(
        "Bool",
        "A scalar or vector boolean type",
        TypeSetBuilder::new()
            .bools(Interval::All)
            .simd_lanes(Interval::All)
            .build(),
    );
    let BoolTo = &TypeVar::new(
        "BoolTo",
        "A larger boolean type with the same number of lanes",
@@ -3385,20 +3383,14 @@ pub(crate) fn define(
    let Int = &TypeVar::new(
        "Int",
-        "A scalar or vector integer type",
+        "A scalar integer type",
-        TypeSetBuilder::new()
+        TypeSetBuilder::new().ints(Interval::All).build(),
            .ints(Interval::All)
            .simd_lanes(Interval::All)
            .build(),
    );
    let IntTo = &TypeVar::new(
        "IntTo",
-        "A smaller integer type with the same number of lanes",
+        "A smaller integer type",
-        TypeSetBuilder::new()
+        TypeSetBuilder::new().ints(Interval::All).build(),
            .ints(Interval::All)
            .simd_lanes(Interval::All)
            .build(),
    );
    let x = &Operand::new("x", Int);
    let a = &Operand::new("a", IntTo);
@@ -3407,15 +3399,10 @@ pub(crate) fn define(
        Inst::new(
            "ireduce",
            r#"
-        Convert `x` to a smaller integer type by dropping high bits.
+        Convert `x` to a smaller integer type by discarding
        the most significant bits.
-        Each lane in `x` is converted to a smaller integer type by discarding
+        This is the same as reducing modulo `2^n`.
        the most significant bits. This is the same as reducing modulo
        `2^n`.
        The result type must have the same number of vector lanes as the input,
        and each lane must not have more bits that the input lanes. If the
        input and output types are the same, this is a no-op.
        "#,
            &formats.unary,
        )
@@ -3855,6 +3842,14 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );
    let Int = &TypeVar::new(
        "Int",
        "A scalar or vector integer type",
        TypeSetBuilder::new()
            .ints(Interval::All)
            .simd_lanes(Interval::All)
            .build(),
    );
    let x = &Operand::new("x", Int);
    let a = &Operand::new("a", FloatTo);
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -1193,6 +1193,20 @@
 (rule (lower (has_type (ty_vec128 ty) (vselect c x y)))
        (bsl ty c x y))
 ;;;; Rules for `ireduce` / `breduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; T -> I{64,32,16,8}: We can simply pass through the value: values
 ;; are always stored with high bits undefined, so we can just leave
 ;; them be.
 (rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (ireduce src)))
    (value_regs_get src 0))
 ;; Likewise for breduce.
 (rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (breduce src)))
      (value_regs_get src 0))
 ;;;; Rules for `fcmp` 32 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f32const (zero_value_f32 y))))))
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -423,14 +423,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            ctx.emit(Inst::gen_move(rd, rn, ty));
        }
-        Opcode::Breduce | Opcode::Ireduce => {
+        Opcode::Breduce | Opcode::Ireduce => implemented_in_isle(ctx),
            // Smaller integers/booleans are stored with high-order bits
            // undefined, so we can simply do a copy.
            let rn = put_input_in_regs(ctx, inputs[0]).regs()[0];
            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
            let ty = ctx.input_ty(insn, 0);
            ctx.emit(Inst::gen_move(rd, rn, ty));
        }
        Opcode::Bextend | Opcode::Bmask => {
            // Bextend and Bmask both simply sign-extend. This works for: