Port Shuffle to ISLE (AArch64) (#4596)

* Port `Shuffle` to ISLE (AArch64)

Ported the existing implementation of `Shuffle` for AArch64 to ISLE.

Copyright (c) 2022 Arm Limited

* Cleanup by shadowing `rn`, `rn2`, and `_`

Copyright (c) 2022 Arm Limited
This commit is contained in:
Damian Heaton
2022-08-04 16:43:23 +01:00
committed by GitHub
parent 70ce288dc7
commit 12a9705fbc
4 changed files with 53 additions and 34 deletions

View File

@@ -1428,6 +1428,9 @@
(decl xreg (u8) Reg) (decl xreg (u8) Reg)
(extern constructor xreg xreg) (extern constructor xreg xreg)
(decl writable_vreg (u8) WritableReg)
(extern constructor writable_vreg writable_vreg)
(decl writable_xreg (u8) WritableReg) (decl writable_xreg (u8) WritableReg)
(extern constructor writable_xreg writable_xreg) (extern constructor writable_xreg writable_xreg)
@@ -1647,6 +1650,26 @@
(_ Unit (emit (MInst.VecTbl dst rn rm is_extension)))) (_ Unit (emit (MInst.VecTbl dst rn rm is_extension))))
dst)) dst))
;; Helper for emitting `MInst.VecTbl2` instructions.
;; - 2 register table vector lookups require consecutive table registers;
;; we satisfy this constraint by hardcoding the usage of v30 and v31.
;; - Make sure that both args are in virtual regs, since it is not guaranteed
;; that we can get them safely to the temporaries if either is in a real
;; register.
(decl vec_tbl2 (Reg Reg Reg bool Type) Reg)
(rule (vec_tbl2 rn rn2 rm is_extension ty)
(let (
(temp WritableReg (writable_vreg 30))
(temp2 WritableReg (writable_vreg 31))
(dst WritableReg (temp_writable_reg $I8X16))
(rn Reg (ensure_in_vreg rn ty))
(rn2 Reg (ensure_in_vreg rn2 ty))
(_ Unit (emit (MInst.FpuMove128 temp rn)))
(_ Unit (emit (MInst.FpuMove128 temp2 rn2)))
(_ Unit (emit (MInst.VecTbl2 dst temp temp2 rm is_extension)))
)
dst))
;; Helper for emitting `MInst.VecRRRLong` instructions. ;; Helper for emitting `MInst.VecRRRLong` instructions.
(decl vec_rrr_long (VecRRRLongOp Reg Reg bool) Reg) (decl vec_rrr_long (VecRRRLongOp Reg Reg bool) Reg)
(rule (vec_rrr_long op src1 src2 high_half) (rule (vec_rrr_long op src1 src2 high_half)
@@ -2325,6 +2348,11 @@
(if-let addr_reg (amode_is_reg addr)) (if-let addr_reg (amode_is_reg addr))
addr_reg) addr_reg)
;; Lower a constant f128.
(decl constant_f128 (u128) Reg)
;; TODO: Port lower_constant_f128() to ISLE.
(extern constructor constant_f128 constant_f128)
;; Lower a vector splat with a constant parameter. ;; Lower a vector splat with a constant parameter.
(decl splat_const (u64 VectorSize) Reg) (decl splat_const (u64 VectorSize) Reg)
;; TODO: Port lower_splat_const() to ISLE. ;; TODO: Port lower_splat_const() to ISLE.

View File

@@ -99,6 +99,12 @@
(add_with_flags_paired $I64 x_lo y_lo) (add_with_flags_paired $I64 x_lo y_lo)
(adc_paired $I64 x_hi y_hi)))) (adc_paired $I64 x_hi y_hi))))
;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (shuffle rn rn2 (u128_from_immediate mask))))
(let ((mask_reg Reg (constant_f128 mask)))
(vec_tbl2 rn rn2 mask_reg $false ty)))
;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type vec_i128_ty (swizzle rn rm))) (rule (lower (has_type vec_i128_ty (swizzle rn rm)))

View File

@@ -5,11 +5,11 @@ pub mod generated_code;
// Types that the generated ISLE code uses via `use super::*`. // Types that the generated ISLE code uses via `use super::*`.
use super::{ use super::{
insn_inputs, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, insn_inputs, lower_constant_f128, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm,
CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI,
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, MachLabel,
Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, ScalarSize,
NZCV, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
}; };
use crate::isa::aarch64::lower::{lower_address, lower_splat_const}; use crate::isa::aarch64::lower::{lower_address, lower_splat_const};
use crate::isa::aarch64::settings::Flags as IsaFlags; use crate::isa::aarch64::settings::Flags as IsaFlags;
@@ -22,7 +22,7 @@ use crate::{
TrapCode, Value, ValueList, TrapCode, Value, ValueList,
}, },
isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm}, isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm},
isa::aarch64::lower::{writable_xreg, xreg}, isa::aarch64::lower::{writable_vreg, writable_xreg, xreg},
isa::unwind::UnwindInst, isa::unwind::UnwindInst,
machinst::{ty_bits, InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData}, machinst::{ty_bits, InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData},
}; };
@@ -275,6 +275,10 @@ where
writable_xreg(index) writable_xreg(index)
} }
fn writable_vreg(&mut self, index: u8) -> WritableReg {
writable_vreg(index)
}
fn extended_value_from_value(&mut self, val: Value) -> Option<ExtendedValue> { fn extended_value_from_value(&mut self, val: Value) -> Option<ExtendedValue> {
let (val, extend) = let (val, extend) =
super::get_as_extended_value(self.lower_ctx, val, NarrowValueMode::None)?; super::get_as_extended_value(self.lower_ctx, val, NarrowValueMode::None)?;
@@ -468,6 +472,14 @@ where
address.is_reg() address.is_reg()
} }
fn constant_f128(&mut self, value: u128) -> Reg {
let rd = self.temp_writable_reg(I8X16);
lower_constant_f128(self.lower_ctx, rd, value);
rd.to_reg()
}
fn splat_const(&mut self, value: u64, size: &VectorSize) -> Reg { fn splat_const(&mut self, value: u64, size: &VectorSize) -> Reg {
let rd = self.temp_writable_reg(I8X16); let rd = self.temp_writable_reg(I8X16);

View File

@@ -1074,34 +1074,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} }
} }
Opcode::Shuffle => { Opcode::Shuffle => implemented_in_isle(ctx),
let mask = const_param_to_u128(ctx, insn).expect("Invalid immediate mask bytes");
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
// 2 register table vector lookups require consecutive table registers;
// we satisfy this constraint by hardcoding the usage of v29 and v30.
let temp = writable_vreg(29);
let temp2 = writable_vreg(30);
let input_ty = ctx.input_ty(insn, 0);
assert_eq!(input_ty, ctx.input_ty(insn, 1));
// Make sure that both inputs are in virtual registers, since it is
// not guaranteed that we can get them safely to the temporaries if
// either is in a real register.
let rn = ctx.ensure_in_vreg(rn, input_ty);
let rn2 = ctx.ensure_in_vreg(rn2, input_ty);
lower_constant_f128(ctx, rd, mask);
ctx.emit(Inst::gen_move(temp, rn, input_ty));
ctx.emit(Inst::gen_move(temp2, rn2, input_ty));
ctx.emit(Inst::VecTbl2 {
rd,
rn: temp.to_reg(),
rn2: temp2.to_reg(),
rm: rd.to_reg(),
is_extension: false,
});
}
Opcode::Swizzle => implemented_in_isle(ctx), Opcode::Swizzle => implemented_in_isle(ctx),