Port Shuffle to ISLE (AArch64) (#4596)
* Port `Shuffle` to ISLE (AArch64) Ported the existing implementation of `Shuffle` for AArch64 to ISLE. Copyright (c) 2022 Arm Limited * Cleanup by shadowing `rn`, `rn2`, and `_` Copyright (c) 2022 Arm Limited
This commit is contained in:
@@ -1428,6 +1428,9 @@
|
||||
(decl xreg (u8) Reg)
|
||||
(extern constructor xreg xreg)
|
||||
|
||||
(decl writable_vreg (u8) WritableReg)
|
||||
(extern constructor writable_vreg writable_vreg)
|
||||
|
||||
(decl writable_xreg (u8) WritableReg)
|
||||
(extern constructor writable_xreg writable_xreg)
|
||||
|
||||
@@ -1647,6 +1650,26 @@
|
||||
(_ Unit (emit (MInst.VecTbl dst rn rm is_extension))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecTbl2` instructions.
|
||||
;; - 2 register table vector lookups require consecutive table registers;
|
||||
;; we satisfy this constraint by hardcoding the usage of v30 and v31.
|
||||
;; - Make sure that both args are in virtual regs, since it is not guaranteed
|
||||
;; that we can get them safely to the temporaries if either is in a real
|
||||
;; register.
|
||||
(decl vec_tbl2 (Reg Reg Reg bool Type) Reg)
|
||||
(rule (vec_tbl2 rn rn2 rm is_extension ty)
|
||||
(let (
|
||||
(temp WritableReg (writable_vreg 30))
|
||||
(temp2 WritableReg (writable_vreg 31))
|
||||
(dst WritableReg (temp_writable_reg $I8X16))
|
||||
(rn Reg (ensure_in_vreg rn ty))
|
||||
(rn2 Reg (ensure_in_vreg rn2 ty))
|
||||
(_ Unit (emit (MInst.FpuMove128 temp rn)))
|
||||
(_ Unit (emit (MInst.FpuMove128 temp2 rn2)))
|
||||
(_ Unit (emit (MInst.VecTbl2 dst temp temp2 rm is_extension)))
|
||||
)
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecRRRLong` instructions.
|
||||
(decl vec_rrr_long (VecRRRLongOp Reg Reg bool) Reg)
|
||||
(rule (vec_rrr_long op src1 src2 high_half)
|
||||
@@ -2325,6 +2348,11 @@
|
||||
(if-let addr_reg (amode_is_reg addr))
|
||||
addr_reg)
|
||||
|
||||
;; Lower a constant f128.
|
||||
(decl constant_f128 (u128) Reg)
|
||||
;; TODO: Port lower_constant_f128() to ISLE.
|
||||
(extern constructor constant_f128 constant_f128)
|
||||
|
||||
;; Lower a vector splat with a constant parameter.
|
||||
(decl splat_const (u64 VectorSize) Reg)
|
||||
;; TODO: Port lower_splat_const() to ISLE.
|
||||
|
||||
@@ -99,6 +99,12 @@
|
||||
(add_with_flags_paired $I64 x_lo y_lo)
|
||||
(adc_paired $I64 x_hi y_hi))))
|
||||
|
||||
;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type ty (shuffle rn rn2 (u128_from_immediate mask))))
|
||||
(let ((mask_reg Reg (constant_f128 mask)))
|
||||
(vec_tbl2 rn rn2 mask_reg $false ty)))
|
||||
|
||||
;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type vec_i128_ty (swizzle rn rm)))
|
||||
|
||||
@@ -5,11 +5,11 @@ pub mod generated_code;
|
||||
|
||||
// Types that the generated ISLE code uses via `use super::*`.
|
||||
use super::{
|
||||
insn_inputs, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget,
|
||||
CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
|
||||
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
|
||||
Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
|
||||
NZCV,
|
||||
insn_inputs, lower_constant_f128, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm,
|
||||
ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI,
|
||||
FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, MachLabel,
|
||||
MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, ScalarSize,
|
||||
ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
|
||||
};
|
||||
use crate::isa::aarch64::lower::{lower_address, lower_splat_const};
|
||||
use crate::isa::aarch64::settings::Flags as IsaFlags;
|
||||
@@ -22,7 +22,7 @@ use crate::{
|
||||
TrapCode, Value, ValueList,
|
||||
},
|
||||
isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm},
|
||||
isa::aarch64::lower::{writable_xreg, xreg},
|
||||
isa::aarch64::lower::{writable_vreg, writable_xreg, xreg},
|
||||
isa::unwind::UnwindInst,
|
||||
machinst::{ty_bits, InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData},
|
||||
};
|
||||
@@ -275,6 +275,10 @@ where
|
||||
writable_xreg(index)
|
||||
}
|
||||
|
||||
fn writable_vreg(&mut self, index: u8) -> WritableReg {
|
||||
writable_vreg(index)
|
||||
}
|
||||
|
||||
fn extended_value_from_value(&mut self, val: Value) -> Option<ExtendedValue> {
|
||||
let (val, extend) =
|
||||
super::get_as_extended_value(self.lower_ctx, val, NarrowValueMode::None)?;
|
||||
@@ -468,6 +472,14 @@ where
|
||||
address.is_reg()
|
||||
}
|
||||
|
||||
fn constant_f128(&mut self, value: u128) -> Reg {
|
||||
let rd = self.temp_writable_reg(I8X16);
|
||||
|
||||
lower_constant_f128(self.lower_ctx, rd, value);
|
||||
|
||||
rd.to_reg()
|
||||
}
|
||||
|
||||
fn splat_const(&mut self, value: u64, size: &VectorSize) -> Reg {
|
||||
let rd = self.temp_writable_reg(I8X16);
|
||||
|
||||
|
||||
@@ -1074,34 +1074,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Shuffle => {
|
||||
let mask = const_param_to_u128(ctx, insn).expect("Invalid immediate mask bytes");
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
// 2 register table vector lookups require consecutive table registers;
|
||||
// we satisfy this constraint by hardcoding the usage of v29 and v30.
|
||||
let temp = writable_vreg(29);
|
||||
let temp2 = writable_vreg(30);
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
assert_eq!(input_ty, ctx.input_ty(insn, 1));
|
||||
// Make sure that both inputs are in virtual registers, since it is
|
||||
// not guaranteed that we can get them safely to the temporaries if
|
||||
// either is in a real register.
|
||||
let rn = ctx.ensure_in_vreg(rn, input_ty);
|
||||
let rn2 = ctx.ensure_in_vreg(rn2, input_ty);
|
||||
|
||||
lower_constant_f128(ctx, rd, mask);
|
||||
ctx.emit(Inst::gen_move(temp, rn, input_ty));
|
||||
ctx.emit(Inst::gen_move(temp2, rn2, input_ty));
|
||||
ctx.emit(Inst::VecTbl2 {
|
||||
rd,
|
||||
rn: temp.to_reg(),
|
||||
rn2: temp2.to_reg(),
|
||||
rm: rd.to_reg(),
|
||||
is_extension: false,
|
||||
});
|
||||
}
|
||||
Opcode::Shuffle => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::Swizzle => implemented_in_isle(ctx),
|
||||
|
||||
|
||||
Reference in New Issue
Block a user