Convert fma, valltrue & vanytrue to ISLE (AArch64) (#4608)

* Convert `fma`, `valltrue` & `vanytrue` to ISLE (AArch64)

Ported the existing implementations of the following opcodes to ISLE on
AArch64:
- `fma`
  - Introduced missing support for `fma` on vector values, as per the
    docs.
- `valltrue`
- `vanytrue`

Also fixed `fcmp` on scalar values in the interpreter, and enabled
interpreter tests in `simd-fma.clif`.

This introduces the `FMLA` machine instruction.

Copyright (c) 2022 Arm Limited

* Add comments for `Fmla` and `Bsl`

Copyright (c) 2022 Arm Limited
This commit is contained in:
Damian Heaton
2022-08-05 17:47:56 +01:00
committed by GitHub
parent 1ed7b43e62
commit eb332b8369
19 changed files with 608 additions and 206 deletions

View File

@@ -335,8 +335,10 @@
(rn Reg))
;; 3-op FPU instruction.
;; 16-bit scalars require half-precision floating-point support (FEAT_FP16).
(FpuRRRR
(fpu_op FPUOp3)
(size ScalarSize)
(rd WritableReg)
(rn Reg)
(rm Reg)
@@ -478,7 +480,7 @@
(rd WritableReg)
(rn Reg)
(idx u8)
(size VectorSize))
(size ScalarSize))
;; Signed move from a vector element to a GPR.
(MovFromVecSigned
@@ -1011,8 +1013,7 @@
;; A floating-point unit (FPU) operation with three args.
(type FPUOp3
(enum
(MAdd32)
(MAdd64)
(MAdd)
))
;; A conversion from an FP to an integer value.
@@ -1108,6 +1109,8 @@
;; Bitwise exclusive or
(Eor)
;; Bitwise select
;; This opcode should only be used with the `vec_rrr_inplace`
;; constructor.
(Bsl)
;; Unsigned maximum pairwise
(Umaxp)
@@ -1143,6 +1146,10 @@
(Fmin)
;; Floating-point multiply
(Fmul)
;; Floating-point fused multiply-add vectors
;; This opcode should only be used with the `vec_rrr_inplace`
;; constructor.
(Fmla)
;; Add pairwise
(Addp)
;; Zip vectors (primary) [meaning, high halves]
@@ -1364,6 +1371,9 @@
(decl imm12_from_negated_u64 (Imm12) u64)
(extern extractor imm12_from_negated_u64 imm12_from_negated_u64)
(decl pure lshr_from_u64 (Type u64) ShiftOpAndAmt)
(extern constructor lshr_from_u64 lshr_from_u64)
(decl pure lshl_from_imm64 (Type Imm64) ShiftOpAndAmt)
(extern constructor lshl_from_imm64 lshl_from_imm64)
@@ -1494,6 +1504,15 @@
(rule (fpu_rr op src size)
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuRR op size dst src))))
dst))
;; Helper for emitting `MInst.VecRRR` instructions which use three registers,
;; one of which is both source and output.
(decl vec_rrr_inplace (VecALUOp Reg Reg Reg VectorSize) Reg)
(rule (vec_rrr_inplace op src1 src2 src3 size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_1 Unit (emit (MInst.FpuMove128 dst src1)))
(_2 Unit (emit (MInst.VecRRR op dst src2 src3 size))))
dst))
;; Helper for emitting `MInst.FpuRRR` instructions.
@@ -1503,6 +1522,13 @@
(_ Unit (emit (MInst.FpuRRR op size dst src1 src2))))
dst))
;; Helper for emitting `MInst.FpuRRRR` instructions.
(decl fpu_rrrr (FPUOp3 ScalarSize Reg Reg Reg) Reg)
(rule (fpu_rrrr size op src1 src2 src3)
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuRRRR size op dst src1 src2 src3))))
dst))
;; Helper for emitting `MInst.FpuCmp` instructions.
(decl fpu_cmp (ScalarSize Reg Reg) ProducesFlags)
(rule (fpu_cmp size rn rm)
@@ -1544,6 +1570,15 @@
(_ Unit (emit (MInst.AluRRRShift op (operand_size ty) dst src1 src2 shift))))
dst))
;; Helper for emitting `cmp` instructions, setting flags, with a right-shifted
;; second operand register.
(decl cmp_rr_shift (OperandSize Reg Reg u64) ProducesFlags)
(rule (cmp_rr_shift size src1 src2 shift_amount)
(if-let shift (lshr_from_u64 $I64 shift_amount))
(ProducesFlags.ProducesFlagsSideEffect
(MInst.AluRRRShift (ALUOp.SubS) size (writable_zero_reg)
src1 src2 shift)))
;; Helper for emitting `MInst.AluRRRExtend` instructions.
(decl alu_rrr_extend (ALUOp Type Reg Reg ExtendOp) Reg)
(rule (alu_rrr_extend op ty src1 src2 extend)
@@ -1764,7 +1799,7 @@
dst))
;; Helper for emitting `MInst.MovFromVec` instructions.
(decl mov_from_vec (Reg u8 VectorSize) Reg)
(decl mov_from_vec (Reg u8 ScalarSize) Reg)
(rule (mov_from_vec rn idx size)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.MovFromVec dst rn idx size))))
@@ -1840,6 +1875,22 @@
(MInst.CSNeg dst cond if_true if_false)
dst)))
;; Helper for generating `MInst.CCmpImm` instructions.
(decl ccmp_imm (OperandSize u8 Reg UImm5 NZCV Cond) ConsumesFlags)
(rule (ccmp_imm size 1 rn imm nzcv cond)
(let ((dst WritableReg (temp_writable_reg $I64)))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
(MInst.CCmpImm size rn imm nzcv cond)
(MInst.CSet dst cond)
(value_reg dst))))
(rule (ccmp_imm size _ty_bits rn imm nzcv cond)
(let ((dst WritableReg (temp_writable_reg $I64)))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
(MInst.CCmpImm size rn imm nzcv cond)
(MInst.CSetm dst cond)
(value_reg dst))))
;; Helpers for generating `add` instructions.
(decl add (Type Reg Reg) Reg)