Port Shuffle to ISLE (AArch64) (#4596)

* Port `Shuffle` to ISLE (AArch64) Ported the existing implementation of `Shuffle` for AArch64 to ISLE. Copyright (c) 2022 Arm Limited * Cleanup by shadowing `rn`, `rn2`, and `_` Copyright (c) 2022 Arm Limited
2022-08-04 16:43:23 +01:00
parent 70ce288dc7
commit 12a9705fbc
4 changed files with 53 additions and 34 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -1428,6 +1428,9 @@
 (decl xreg (u8) Reg)
 (extern constructor xreg xreg)

+(decl writable_vreg (u8) WritableReg)
+(extern constructor writable_vreg writable_vreg)
+
 (decl writable_xreg (u8) WritableReg)
 (extern constructor writable_xreg writable_xreg)

@@ -1647,6 +1650,26 @@
            (_ Unit (emit (MInst.VecTbl dst rn rm is_extension))))
        dst))

+;; Helper for emitting `MInst.VecTbl2` instructions.
+;; - 2 register table vector lookups require consecutive table registers;
+;;   we satisfy this constraint by hardcoding the usage of v30 and v31.
+;; - Make sure that both args are in virtual regs, since it is not guaranteed
+;;   that we can get them safely to the temporaries if either is in a real
+;;   register.
+(decl vec_tbl2 (Reg Reg Reg bool Type) Reg)
+(rule (vec_tbl2 rn rn2 rm is_extension ty)
+      (let (
+            (temp WritableReg (writable_vreg 30))
+            (temp2 WritableReg (writable_vreg 31))
+            (dst WritableReg (temp_writable_reg $I8X16))
+            (rn Reg (ensure_in_vreg rn ty))
+            (rn2 Reg (ensure_in_vreg rn2 ty))
+            (_ Unit (emit (MInst.FpuMove128 temp rn)))
+            (_ Unit (emit (MInst.FpuMove128 temp2 rn2)))
+            (_ Unit (emit (MInst.VecTbl2 dst temp temp2 rm is_extension)))
+        )
+        dst))
+
 ;; Helper for emitting `MInst.VecRRRLong` instructions.
 (decl vec_rrr_long (VecRRRLongOp Reg Reg bool) Reg)
 (rule (vec_rrr_long op src1 src2 high_half)
@@ -2325,6 +2348,11 @@
      (if-let addr_reg (amode_is_reg addr))
      addr_reg)

+;; Lower a constant f128.
+(decl constant_f128 (u128) Reg)
+;; TODO: Port lower_constant_f128() to ISLE.
+(extern constructor constant_f128 constant_f128)
+
 ;; Lower a vector splat with a constant parameter.
 (decl splat_const (u64 VectorSize) Reg)
 ;; TODO: Port lower_splat_const() to ISLE.
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -99,6 +99,12 @@
          (add_with_flags_paired $I64 x_lo y_lo)
          (adc_paired $I64 x_hi y_hi))))

+;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty (shuffle rn rn2 (u128_from_immediate mask))))
+      (let ((mask_reg Reg (constant_f128 mask)))
+       (vec_tbl2 rn rn2 mask_reg $false ty)))
+
 ;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (has_type vec_i128_ty (swizzle rn rm)))
--- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs
@@ -5,11 +5,11 @@ pub mod generated_code;

 // Types that the generated ISLE code uses via `use super::*`.
 use super::{
-    insn_inputs, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget,
-    CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
-    Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
-    Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
-    NZCV,
+    insn_inputs, lower_constant_f128, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm,
+    ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI,
+    FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, MachLabel,
+    MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, ScalarSize,
+    ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
 };
 use crate::isa::aarch64::lower::{lower_address, lower_splat_const};
 use crate::isa::aarch64::settings::Flags as IsaFlags;
@@ -22,7 +22,7 @@ use crate::{
        TrapCode, Value, ValueList,
    },
    isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm},
-    isa::aarch64::lower::{writable_xreg, xreg},
+    isa::aarch64::lower::{writable_vreg, writable_xreg, xreg},
    isa::unwind::UnwindInst,
    machinst::{ty_bits, InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData},
 };
@@ -275,6 +275,10 @@ where
        writable_xreg(index)
    }

+    fn writable_vreg(&mut self, index: u8) -> WritableReg {
+        writable_vreg(index)
+    }
+
    fn extended_value_from_value(&mut self, val: Value) -> Option<ExtendedValue> {
        let (val, extend) =
            super::get_as_extended_value(self.lower_ctx, val, NarrowValueMode::None)?;
@@ -468,6 +472,14 @@ where
        address.is_reg()
    }

+    fn constant_f128(&mut self, value: u128) -> Reg {
+        let rd = self.temp_writable_reg(I8X16);
+
+        lower_constant_f128(self.lower_ctx, rd, value);
+
+        rd.to_reg()
+    }
+
    fn splat_const(&mut self, value: u64, size: &VectorSize) -> Reg {
        let rd = self.temp_writable_reg(I8X16);

--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1074,34 +1074,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            }
        }

-        Opcode::Shuffle => {
-            let mask = const_param_to_u128(ctx, insn).expect("Invalid immediate mask bytes");
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-            // 2 register table vector lookups require consecutive table registers;
-            // we satisfy this constraint by hardcoding the usage of v29 and v30.
-            let temp = writable_vreg(29);
-            let temp2 = writable_vreg(30);
-            let input_ty = ctx.input_ty(insn, 0);
-            assert_eq!(input_ty, ctx.input_ty(insn, 1));
-            // Make sure that both inputs are in virtual registers, since it is
-            // not guaranteed that we can get them safely to the temporaries if
-            // either is in a real register.
-            let rn = ctx.ensure_in_vreg(rn, input_ty);
-            let rn2 = ctx.ensure_in_vreg(rn2, input_ty);
-
-            lower_constant_f128(ctx, rd, mask);
-            ctx.emit(Inst::gen_move(temp, rn, input_ty));
-            ctx.emit(Inst::gen_move(temp2, rn2, input_ty));
-            ctx.emit(Inst::VecTbl2 {
-                rd,
-                rn: temp.to_reg(),
-                rn2: temp2.to_reg(),
-                rm: rd.to_reg(),
-                is_extension: false,
-            });
-        }
+        Opcode::Shuffle => implemented_in_isle(ctx),

        Opcode::Swizzle => implemented_in_isle(ctx),