Cranelift AArch64: Migrate Bitselect and Vselect to ISLE (#4139)

2022-05-16 17:39:28 +01:00
parent f19d8cc851
commit edf07a8da6
7 changed files with 91 additions and 55 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -1809,6 +1809,9 @@

 ;; Helpers for generating `and` instructions.

+(decl and_reg (Type Reg Reg) Reg)
+(rule (and_reg ty x y) (alu_rrr (ALUOp.And) ty x y))
+
 (decl and_imm (Type Reg ImmLogic) Reg)
 (rule (and_imm ty x y) (alu_rr_imm_logic (ALUOp.And) ty x y))

@@ -1820,6 +1823,10 @@
 (rule (eor_vec x y size) (vec_rrr (VecALUOp.Eor) x y size))

 ;; Helpers for generating `bic` instructions.
+
+(decl bic (Type Reg Reg) Reg)
+(rule (bic ty x y) (alu_rrr (ALUOp.AndNot) ty x y))
+
 (decl bic_vec (Reg Reg VectorSize) Reg)
 (rule (bic_vec x y size) (vec_rrr (VecALUOp.Bic) x y size))

@@ -1864,6 +1871,15 @@
 (decl vec_cnt (Reg VectorSize) Reg)
 (rule (vec_cnt x size) (vec_misc (VecMisc2.Cnt) x size))

+;; Helpers for generating a `bsl` instruction.
+
+(decl bsl (Type Reg Reg Reg) Reg)
+(rule (bsl ty c x y)
+      (let ((dst WritableReg (temp_writable_reg ty))
+            (_1 Unit (emit (MInst.FpuMove128 dst c)))
+            (_2 Unit (emit (MInst.VecRRR (VecALUOp.Bsl) dst x y (vector_size ty)))))
+        dst))
+
 ;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (decl imm (Type u64) Reg)
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -1142,7 +1142,22 @@
 (rule (lower (has_type $I8X16 (popcnt x)))
      (vec_cnt x (VectorSize.Size8x16)))

-;;;; Rules for `fcmp` 32 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (bitselect c x y)))
+      (let ((tmp1 Reg (and_reg ty x c))
+            (tmp2 Reg (bic ty y c)))
+        (orr ty tmp1 tmp2)))
+
+(rule (lower (has_type (ty_vec128 ty) (bitselect c x y)))
+        (bsl ty c x y))
+
+;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (ty_vec128 ty) (vselect c x y)))
+        (bsl ty c x y))
+
+;;;; Rules for `fcmp` 32 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f32const (zero_value_f32 y))))))
      (let ((rn Reg x)
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -420,55 +420,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            }
        }

-        Opcode::Bitselect | Opcode::Vselect => {
-            let ty = ty.unwrap();
-            if !ty.is_vector() {
-                debug_assert_ne!(Opcode::Vselect, op);
-                let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
-                let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-                let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-                let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
-                // AND rTmp, rn, rcond
-                ctx.emit(Inst::AluRRR {
-                    alu_op: ALUOp::And,
-                    size: OperandSize::Size64,
-                    rd: tmp,
-                    rn,
-                    rm: rcond,
-                });
-                // BIC rd, rm, rcond
-                ctx.emit(Inst::AluRRR {
-                    alu_op: ALUOp::AndNot,
-                    size: OperandSize::Size64,
-                    rd,
-                    rn: rm,
-                    rm: rcond,
-                });
-                // ORR rd, rd, rTmp
-                ctx.emit(Inst::AluRRR {
-                    alu_op: ALUOp::Orr,
-                    size: OperandSize::Size64,
-                    rd,
-                    rn: rd.to_reg(),
-                    rm: tmp.to_reg(),
-                });
-            } else {
-                let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-                let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-                let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
-                let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                ctx.emit(Inst::gen_move(rd, rcond, ty));
-
-                ctx.emit(Inst::VecRRR {
-                    alu_op: VecALUOp::Bsl,
-                    rd,
-                    rn,
-                    rm,
-                    size: VectorSize::from_ty(ty),
-                });
-            }
-        }
+        Opcode::Bitselect | Opcode::Vselect => implemented_in_isle(ctx),

        Opcode::Trueif => {
            let condcode = ctx.data(insn).cond_code().unwrap();
--- a/cranelift/codegen/src/machinst/isle.rs
+++ b/cranelift/codegen/src/machinst/isle.rs
@@ -249,6 +249,15 @@ macro_rules! isle_prelude_methods {
            }
        }

+        #[inline]
+        fn ty_int_bool_ref_scalar_64(&mut self, ty: Type) -> Option<Type> {
+            if ty.bits() <= 64 && !ty.is_float() && !ty.is_vector() {
+                Some(ty)
+            } else {
+                None
+            }
+        }
+
        #[inline]
        fn ty_32_or_64(&mut self, ty: Type) -> Option<Type> {
            if ty.bits() == 32 || ty.bits() == 64 {
--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -293,6 +293,11 @@
 (decl fits_in_64 (Type) Type)
 (extern extractor fits_in_64 fits_in_64)

+;; An extractor that only matches scalar booleans, integers, and references that
+;; can fit in 64 bits.
+(decl ty_int_bool_ref_scalar_64 (Type) Type)
+(extern extractor ty_int_bool_ref_scalar_64 ty_int_bool_ref_scalar_64)
+
 ;; An extractor that matches 32- and 64-bit types only.
 (decl ty_32_or_64 (Type) Type)
 (extern extractor ty_32_or_64 ty_32_or_64)