aarch64: Implement ireduce/breduce in ISLE (#4331)

* aarch64: Implement `ireduce`/`breduce` in ISLE

* cranelift: Remove vector versions of `breduce`/`ireduce`
This commit is contained in:
Afonso Bordado
2022-06-30 19:15:47 +01:00
committed by GitHub
parent d1446f767d
commit 919604b8c5
3 changed files with 43 additions and 41 deletions

View File

@@ -3273,20 +3273,14 @@ pub(crate) fn define(
let Bool = &TypeVar::new( let Bool = &TypeVar::new(
"Bool", "Bool",
"A scalar or vector boolean type", "A scalar boolean type",
TypeSetBuilder::new() TypeSetBuilder::new().bools(Interval::All).build(),
.bools(Interval::All)
.simd_lanes(Interval::All)
.build(),
); );
let BoolTo = &TypeVar::new( let BoolTo = &TypeVar::new(
"BoolTo", "BoolTo",
"A smaller boolean type with the same number of lanes", "A smaller boolean type",
TypeSetBuilder::new() TypeSetBuilder::new().bools(Interval::All).build(),
.bools(Interval::All)
.simd_lanes(Interval::All)
.build(),
); );
let x = &Operand::new("x", Bool); let x = &Operand::new("x", Bool);
@@ -3296,11 +3290,7 @@ pub(crate) fn define(
Inst::new( Inst::new(
"breduce", "breduce",
r#" r#"
Convert `x` to a smaller boolean type in the platform-defined way. Convert `x` to a smaller boolean type by discarding the most significant bits.
The result type must have the same number of vector lanes as the input,
and each lane must not have more bits that the input lanes. If the
input and output types are the same, this is a no-op.
"#, "#,
&formats.unary, &formats.unary,
) )
@@ -3308,6 +3298,14 @@ pub(crate) fn define(
.operands_out(vec![a]), .operands_out(vec![a]),
); );
let Bool = &TypeVar::new(
"Bool",
"A scalar or vector boolean type",
TypeSetBuilder::new()
.bools(Interval::All)
.simd_lanes(Interval::All)
.build(),
);
let BoolTo = &TypeVar::new( let BoolTo = &TypeVar::new(
"BoolTo", "BoolTo",
"A larger boolean type with the same number of lanes", "A larger boolean type with the same number of lanes",
@@ -3385,20 +3383,14 @@ pub(crate) fn define(
let Int = &TypeVar::new( let Int = &TypeVar::new(
"Int", "Int",
"A scalar or vector integer type", "A scalar integer type",
TypeSetBuilder::new() TypeSetBuilder::new().ints(Interval::All).build(),
.ints(Interval::All)
.simd_lanes(Interval::All)
.build(),
); );
let IntTo = &TypeVar::new( let IntTo = &TypeVar::new(
"IntTo", "IntTo",
"A smaller integer type with the same number of lanes", "A smaller integer type",
TypeSetBuilder::new() TypeSetBuilder::new().ints(Interval::All).build(),
.ints(Interval::All)
.simd_lanes(Interval::All)
.build(),
); );
let x = &Operand::new("x", Int); let x = &Operand::new("x", Int);
let a = &Operand::new("a", IntTo); let a = &Operand::new("a", IntTo);
@@ -3407,15 +3399,10 @@ pub(crate) fn define(
Inst::new( Inst::new(
"ireduce", "ireduce",
r#" r#"
Convert `x` to a smaller integer type by dropping high bits. Convert `x` to a smaller integer type by discarding
the most significant bits.
Each lane in `x` is converted to a smaller integer type by discarding This is the same as reducing modulo `2^n`.
the most significant bits. This is the same as reducing modulo
`2^n`.
The result type must have the same number of vector lanes as the input,
and each lane must not have more bits that the input lanes. If the
input and output types are the same, this is a no-op.
"#, "#,
&formats.unary, &formats.unary,
) )
@@ -3855,6 +3842,14 @@ pub(crate) fn define(
.operands_out(vec![a]), .operands_out(vec![a]),
); );
let Int = &TypeVar::new(
"Int",
"A scalar or vector integer type",
TypeSetBuilder::new()
.ints(Interval::All)
.simd_lanes(Interval::All)
.build(),
);
let x = &Operand::new("x", Int); let x = &Operand::new("x", Int);
let a = &Operand::new("a", FloatTo); let a = &Operand::new("a", FloatTo);

View File

@@ -1193,6 +1193,20 @@
(rule (lower (has_type (ty_vec128 ty) (vselect c x y))) (rule (lower (has_type (ty_vec128 ty) (vselect c x y)))
(bsl ty c x y)) (bsl ty c x y))
;;;; Rules for `ireduce` / `breduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; T -> I{64,32,16,8}: We can simply pass through the value: values
;; are always stored with high bits undefined, so we can just leave
;; them be.
(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (ireduce src)))
(value_regs_get src 0))
;; Likewise for breduce.
(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (breduce src)))
(value_regs_get src 0))
;;;; Rules for `fcmp` 32 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `fcmp` 32 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f32const (zero_value_f32 y)))))) (rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f32const (zero_value_f32 y))))))

View File

@@ -423,14 +423,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::gen_move(rd, rn, ty));
} }
Opcode::Breduce | Opcode::Ireduce => { Opcode::Breduce | Opcode::Ireduce => implemented_in_isle(ctx),
// Smaller integers/booleans are stored with high-order bits
// undefined, so we can simply do a copy.
let rn = put_input_in_regs(ctx, inputs[0]).regs()[0];
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ctx.input_ty(insn, 0);
ctx.emit(Inst::gen_move(rd, rn, ty));
}
Opcode::Bextend | Opcode::Bmask => { Opcode::Bextend | Opcode::Bmask => {
// Bextend and Bmask both simply sign-extend. This works for: // Bextend and Bmask both simply sign-extend. This works for: