diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index e81868bccc..6130d679c7 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1428,6 +1428,9 @@ (decl xreg (u8) Reg) (extern constructor xreg xreg) +(decl writable_vreg (u8) WritableReg) +(extern constructor writable_vreg writable_vreg) + (decl writable_xreg (u8) WritableReg) (extern constructor writable_xreg writable_xreg) @@ -1647,6 +1650,26 @@ (_ Unit (emit (MInst.VecTbl dst rn rm is_extension)))) dst)) +;; Helper for emitting `MInst.VecTbl2` instructions. +;; - 2 register table vector lookups require consecutive table registers; +;; we satisfy this constraint by hardcoding the usage of v30 and v31. +;; - Make sure that both args are in virtual regs, since it is not guaranteed +;; that we can get them safely to the temporaries if either is in a real +;; register. +(decl vec_tbl2 (Reg Reg Reg bool Type) Reg) +(rule (vec_tbl2 rn rn2 rm is_extension ty) + (let ( + (temp WritableReg (writable_vreg 30)) + (temp2 WritableReg (writable_vreg 31)) + (dst WritableReg (temp_writable_reg $I8X16)) + (rn Reg (ensure_in_vreg rn ty)) + (rn2 Reg (ensure_in_vreg rn2 ty)) + (_ Unit (emit (MInst.FpuMove128 temp rn))) + (_ Unit (emit (MInst.FpuMove128 temp2 rn2))) + (_ Unit (emit (MInst.VecTbl2 dst temp temp2 rm is_extension))) + ) + dst)) + ;; Helper for emitting `MInst.VecRRRLong` instructions. (decl vec_rrr_long (VecRRRLongOp Reg Reg bool) Reg) (rule (vec_rrr_long op src1 src2 high_half) @@ -2325,6 +2348,11 @@ (if-let addr_reg (amode_is_reg addr)) addr_reg) +;; Lower a constant f128. +(decl constant_f128 (u128) Reg) +;; TODO: Port lower_constant_f128() to ISLE. +(extern constructor constant_f128 constant_f128) + ;; Lower a vector splat with a constant parameter. (decl splat_const (u64 VectorSize) Reg) ;; TODO: Port lower_splat_const() to ISLE. diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 808a7324a5..71050183a4 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -99,6 +99,12 @@ (add_with_flags_paired $I64 x_lo y_lo) (adc_paired $I64 x_hi y_hi)))) +;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty (shuffle rn rn2 (u128_from_immediate mask)))) + (let ((mask_reg Reg (constant_f128 mask))) + (vec_tbl2 rn rn2 mask_reg $false ty))) + ;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type vec_i128_ty (swizzle rn rm))) diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 7d23b9c311..8293a0bec7 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -5,11 +5,11 @@ pub mod generated_code; // Types that the generated ISLE code uses via `use super::*`. use super::{ - insn_inputs, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, - CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, - Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, - Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, - NZCV, + insn_inputs, lower_constant_f128, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, + ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, + FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo, MachLabel, + MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, ScalarSize, + ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV, }; use crate::isa::aarch64::lower::{lower_address, lower_splat_const}; use crate::isa::aarch64::settings::Flags as IsaFlags; @@ -22,7 +22,7 @@ use crate::{ TrapCode, Value, ValueList, }, isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm}, - isa::aarch64::lower::{writable_xreg, xreg}, + isa::aarch64::lower::{writable_vreg, writable_xreg, xreg}, isa::unwind::UnwindInst, machinst::{ty_bits, InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData}, }; @@ -275,6 +275,10 @@ where writable_xreg(index) } + fn writable_vreg(&mut self, index: u8) -> WritableReg { + writable_vreg(index) + } + fn extended_value_from_value(&mut self, val: Value) -> Option { let (val, extend) = super::get_as_extended_value(self.lower_ctx, val, NarrowValueMode::None)?; @@ -468,6 +472,14 @@ where address.is_reg() } + fn constant_f128(&mut self, value: u128) -> Reg { + let rd = self.temp_writable_reg(I8X16); + + lower_constant_f128(self.lower_ctx, rd, value); + + rd.to_reg() + } + fn splat_const(&mut self, value: u64, size: &VectorSize) -> Reg { let rd = self.temp_writable_reg(I8X16); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 842342d5b9..4c6774cff5 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1074,34 +1074,7 @@ pub(crate) fn lower_insn_to_regs>( } } - Opcode::Shuffle => { - let mask = const_param_to_u128(ctx, insn).expect("Invalid immediate mask bytes"); - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - // 2 register table vector lookups require consecutive table registers; - // we satisfy this constraint by hardcoding the usage of v29 and v30. - let temp = writable_vreg(29); - let temp2 = writable_vreg(30); - let input_ty = ctx.input_ty(insn, 0); - assert_eq!(input_ty, ctx.input_ty(insn, 1)); - // Make sure that both inputs are in virtual registers, since it is - // not guaranteed that we can get them safely to the temporaries if - // either is in a real register. - let rn = ctx.ensure_in_vreg(rn, input_ty); - let rn2 = ctx.ensure_in_vreg(rn2, input_ty); - - lower_constant_f128(ctx, rd, mask); - ctx.emit(Inst::gen_move(temp, rn, input_ty)); - ctx.emit(Inst::gen_move(temp2, rn2, input_ty)); - ctx.emit(Inst::VecTbl2 { - rd, - rn: temp.to_reg(), - rn2: temp2.to_reg(), - rm: rd.to_reg(), - is_extension: false, - }); - } + Opcode::Shuffle => implemented_in_isle(ctx), Opcode::Swizzle => implemented_in_isle(ctx),