Convert fma, valltrue & vanytrue to ISLE (AArch64) (#4608)
* Convert `fma`, `valltrue` & `vanytrue` to ISLE (AArch64)
Ported the existing implementations of the following opcodes to ISLE on
AArch64:
- `fma`
- Introduced missing support for `fma` on vector values, as per the
docs.
- `valltrue`
- `vanytrue`
Also fixed `fcmp` on scalar values in the interpreter, and enabled
interpreter tests in `simd-fma.clif`.
This introduces the `FMLA` machine instruction.
Copyright (c) 2022 Arm Limited
* Add comments for `Fmla` and `Bsl`
Copyright (c) 2022 Arm Limited
This commit is contained in:
@@ -335,8 +335,10 @@
|
||||
(rn Reg))
|
||||
|
||||
;; 3-op FPU instruction.
|
||||
;; 16-bit scalars require half-precision floating-point support (FEAT_FP16).
|
||||
(FpuRRRR
|
||||
(fpu_op FPUOp3)
|
||||
(size ScalarSize)
|
||||
(rd WritableReg)
|
||||
(rn Reg)
|
||||
(rm Reg)
|
||||
@@ -478,7 +480,7 @@
|
||||
(rd WritableReg)
|
||||
(rn Reg)
|
||||
(idx u8)
|
||||
(size VectorSize))
|
||||
(size ScalarSize))
|
||||
|
||||
;; Signed move from a vector element to a GPR.
|
||||
(MovFromVecSigned
|
||||
@@ -1011,8 +1013,7 @@
|
||||
;; A floating-point unit (FPU) operation with three args.
|
||||
(type FPUOp3
|
||||
(enum
|
||||
(MAdd32)
|
||||
(MAdd64)
|
||||
(MAdd)
|
||||
))
|
||||
|
||||
;; A conversion from an FP to an integer value.
|
||||
@@ -1108,6 +1109,8 @@
|
||||
;; Bitwise exclusive or
|
||||
(Eor)
|
||||
;; Bitwise select
|
||||
;; This opcode should only be used with the `vec_rrr_inplace`
|
||||
;; constructor.
|
||||
(Bsl)
|
||||
;; Unsigned maximum pairwise
|
||||
(Umaxp)
|
||||
@@ -1143,6 +1146,10 @@
|
||||
(Fmin)
|
||||
;; Floating-point multiply
|
||||
(Fmul)
|
||||
;; Floating-point fused multiply-add vectors
|
||||
;; This opcode should only be used with the `vec_rrr_inplace`
|
||||
;; constructor.
|
||||
(Fmla)
|
||||
;; Add pairwise
|
||||
(Addp)
|
||||
;; Zip vectors (primary) [meaning, high halves]
|
||||
@@ -1364,6 +1371,9 @@
|
||||
(decl imm12_from_negated_u64 (Imm12) u64)
|
||||
(extern extractor imm12_from_negated_u64 imm12_from_negated_u64)
|
||||
|
||||
(decl pure lshr_from_u64 (Type u64) ShiftOpAndAmt)
|
||||
(extern constructor lshr_from_u64 lshr_from_u64)
|
||||
|
||||
(decl pure lshl_from_imm64 (Type Imm64) ShiftOpAndAmt)
|
||||
(extern constructor lshl_from_imm64 lshl_from_imm64)
|
||||
|
||||
@@ -1494,6 +1504,15 @@
|
||||
(rule (fpu_rr op src size)
|
||||
(let ((dst WritableReg (temp_writable_reg $F64))
|
||||
(_ Unit (emit (MInst.FpuRR op size dst src))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecRRR` instructions which use three registers,
|
||||
;; one of which is both source and output.
|
||||
(decl vec_rrr_inplace (VecALUOp Reg Reg Reg VectorSize) Reg)
|
||||
(rule (vec_rrr_inplace op src1 src2 src3 size)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_1 Unit (emit (MInst.FpuMove128 dst src1)))
|
||||
(_2 Unit (emit (MInst.VecRRR op dst src2 src3 size))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.FpuRRR` instructions.
|
||||
@@ -1503,6 +1522,13 @@
|
||||
(_ Unit (emit (MInst.FpuRRR op size dst src1 src2))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.FpuRRRR` instructions.
|
||||
(decl fpu_rrrr (FPUOp3 ScalarSize Reg Reg Reg) Reg)
|
||||
(rule (fpu_rrrr size op src1 src2 src3)
|
||||
(let ((dst WritableReg (temp_writable_reg $F64))
|
||||
(_ Unit (emit (MInst.FpuRRRR size op dst src1 src2 src3))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.FpuCmp` instructions.
|
||||
(decl fpu_cmp (ScalarSize Reg Reg) ProducesFlags)
|
||||
(rule (fpu_cmp size rn rm)
|
||||
@@ -1544,6 +1570,15 @@
|
||||
(_ Unit (emit (MInst.AluRRRShift op (operand_size ty) dst src1 src2 shift))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `cmp` instructions, setting flags, with a right-shifted
|
||||
;; second operand register.
|
||||
(decl cmp_rr_shift (OperandSize Reg Reg u64) ProducesFlags)
|
||||
(rule (cmp_rr_shift size src1 src2 shift_amount)
|
||||
(if-let shift (lshr_from_u64 $I64 shift_amount))
|
||||
(ProducesFlags.ProducesFlagsSideEffect
|
||||
(MInst.AluRRRShift (ALUOp.SubS) size (writable_zero_reg)
|
||||
src1 src2 shift)))
|
||||
|
||||
;; Helper for emitting `MInst.AluRRRExtend` instructions.
|
||||
(decl alu_rrr_extend (ALUOp Type Reg Reg ExtendOp) Reg)
|
||||
(rule (alu_rrr_extend op ty src1 src2 extend)
|
||||
@@ -1764,7 +1799,7 @@
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.MovFromVec` instructions.
|
||||
(decl mov_from_vec (Reg u8 VectorSize) Reg)
|
||||
(decl mov_from_vec (Reg u8 ScalarSize) Reg)
|
||||
(rule (mov_from_vec rn idx size)
|
||||
(let ((dst WritableReg (temp_writable_reg $I64))
|
||||
(_ Unit (emit (MInst.MovFromVec dst rn idx size))))
|
||||
@@ -1840,6 +1875,22 @@
|
||||
(MInst.CSNeg dst cond if_true if_false)
|
||||
dst)))
|
||||
|
||||
;; Helper for generating `MInst.CCmpImm` instructions.
|
||||
(decl ccmp_imm (OperandSize u8 Reg UImm5 NZCV Cond) ConsumesFlags)
|
||||
(rule (ccmp_imm size 1 rn imm nzcv cond)
|
||||
(let ((dst WritableReg (temp_writable_reg $I64)))
|
||||
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
|
||||
(MInst.CCmpImm size rn imm nzcv cond)
|
||||
(MInst.CSet dst cond)
|
||||
(value_reg dst))))
|
||||
|
||||
(rule (ccmp_imm size _ty_bits rn imm nzcv cond)
|
||||
(let ((dst WritableReg (temp_writable_reg $I64)))
|
||||
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
|
||||
(MInst.CCmpImm size rn imm nzcv cond)
|
||||
(MInst.CSetm dst cond)
|
||||
(value_reg dst))))
|
||||
|
||||
;; Helpers for generating `add` instructions.
|
||||
|
||||
(decl add (Type Reg Reg) Reg)
|
||||
|
||||
Reference in New Issue
Block a user