Port Shuffle to ISLE (AArch64) (#4596)

* Port `Shuffle` to ISLE (AArch64)

Ported the existing implementation of `Shuffle` for AArch64 to ISLE.

Copyright (c) 2022 Arm Limited

* Cleanup by shadowing `rn`, `rn2`, and `_`

Copyright (c) 2022 Arm Limited
This commit is contained in:
Damian Heaton
2022-08-04 16:43:23 +01:00
committed by GitHub
parent 70ce288dc7
commit 12a9705fbc
4 changed files with 53 additions and 34 deletions

View File

@@ -1428,6 +1428,9 @@
(decl xreg (u8) Reg)
(extern constructor xreg xreg)
(decl writable_vreg (u8) WritableReg)
(extern constructor writable_vreg writable_vreg)
(decl writable_xreg (u8) WritableReg)
(extern constructor writable_xreg writable_xreg)
@@ -1647,6 +1650,26 @@
(_ Unit (emit (MInst.VecTbl dst rn rm is_extension))))
dst))
;; Helper for emitting `MInst.VecTbl2` instructions.
;; - 2 register table vector lookups require consecutive table registers;
;; we satisfy this constraint by hardcoding the usage of v30 and v31.
;; - Make sure that both args are in virtual regs, since it is not guaranteed
;; that we can get them safely to the temporaries if either is in a real
;; register.
(decl vec_tbl2 (Reg Reg Reg bool Type) Reg)
(rule (vec_tbl2 rn rn2 rm is_extension ty)
(let (
(temp WritableReg (writable_vreg 30))
(temp2 WritableReg (writable_vreg 31))
(dst WritableReg (temp_writable_reg $I8X16))
(rn Reg (ensure_in_vreg rn ty))
(rn2 Reg (ensure_in_vreg rn2 ty))
(_ Unit (emit (MInst.FpuMove128 temp rn)))
(_ Unit (emit (MInst.FpuMove128 temp2 rn2)))
(_ Unit (emit (MInst.VecTbl2 dst temp temp2 rm is_extension)))
)
dst))
;; Helper for emitting `MInst.VecRRRLong` instructions.
(decl vec_rrr_long (VecRRRLongOp Reg Reg bool) Reg)
(rule (vec_rrr_long op src1 src2 high_half)
@@ -2325,6 +2348,11 @@
(if-let addr_reg (amode_is_reg addr))
addr_reg)
;; Lower a constant f128.
(decl constant_f128 (u128) Reg)
;; TODO: Port lower_constant_f128() to ISLE.
(extern constructor constant_f128 constant_f128)
;; Lower a vector splat with a constant parameter.
(decl splat_const (u64 VectorSize) Reg)
;; TODO: Port lower_splat_const() to ISLE.

View File

@@ -99,6 +99,12 @@
(add_with_flags_paired $I64 x_lo y_lo)
(adc_paired $I64 x_hi y_hi))))
;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (shuffle rn rn2 (u128_from_immediate mask))))
(let ((mask_reg Reg (constant_f128 mask)))
(vec_tbl2 rn rn2 mask_reg $false ty)))
;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type vec_i128_ty (swizzle rn rm)))

View File

@@ -5,11 +5,11 @@ pub mod generated_code;
// Types that the generated ISLE code uses via `use super::*`.
use super::{
insn_inputs, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget,
CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
NZCV,
insn_inputs, lower_constant_f128, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm,
ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI,
FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, MachLabel,
MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, ScalarSize,
ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
};
use crate::isa::aarch64::lower::{lower_address, lower_splat_const};
use crate::isa::aarch64::settings::Flags as IsaFlags;
@@ -22,7 +22,7 @@ use crate::{
TrapCode, Value, ValueList,
},
isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm},
isa::aarch64::lower::{writable_xreg, xreg},
isa::aarch64::lower::{writable_vreg, writable_xreg, xreg},
isa::unwind::UnwindInst,
machinst::{ty_bits, InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData},
};
@@ -275,6 +275,10 @@ where
writable_xreg(index)
}
fn writable_vreg(&mut self, index: u8) -> WritableReg {
writable_vreg(index)
}
fn extended_value_from_value(&mut self, val: Value) -> Option<ExtendedValue> {
let (val, extend) =
super::get_as_extended_value(self.lower_ctx, val, NarrowValueMode::None)?;
@@ -468,6 +472,14 @@ where
address.is_reg()
}
fn constant_f128(&mut self, value: u128) -> Reg {
let rd = self.temp_writable_reg(I8X16);
lower_constant_f128(self.lower_ctx, rd, value);
rd.to_reg()
}
fn splat_const(&mut self, value: u64, size: &VectorSize) -> Reg {
let rd = self.temp_writable_reg(I8X16);

View File

@@ -1074,34 +1074,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
}
Opcode::Shuffle => {
let mask = const_param_to_u128(ctx, insn).expect("Invalid immediate mask bytes");
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
// 2 register table vector lookups require consecutive table registers;
// we satisfy this constraint by hardcoding the usage of v29 and v30.
let temp = writable_vreg(29);
let temp2 = writable_vreg(30);
let input_ty = ctx.input_ty(insn, 0);
assert_eq!(input_ty, ctx.input_ty(insn, 1));
// Make sure that both inputs are in virtual registers, since it is
// not guaranteed that we can get them safely to the temporaries if
// either is in a real register.
let rn = ctx.ensure_in_vreg(rn, input_ty);
let rn2 = ctx.ensure_in_vreg(rn2, input_ty);
lower_constant_f128(ctx, rd, mask);
ctx.emit(Inst::gen_move(temp, rn, input_ty));
ctx.emit(Inst::gen_move(temp2, rn2, input_ty));
ctx.emit(Inst::VecTbl2 {
rd,
rn: temp.to_reg(),
rn2: temp2.to_reg(),
rm: rd.to_reg(),
is_extension: false,
});
}
Opcode::Shuffle => implemented_in_isle(ctx),
Opcode::Swizzle => implemented_in_isle(ctx),