[AArch64] Port SIMD narrowing to ISLE (#4478)

* [AArch64] Port SIMD narrowing to ISLE

Fvdemote, snarrow, unarrow and uunarrow.

Also refactor the aarch64 instructions descriptions to parameterize
on ScalarSize instead of using different opcodes.

The zero_value pure constructor has been introduced and used by the
integer narrow operations and it replaces, and extends, the compare
zero patterns.

Copright (c) 2022, Arm Limited.

* use short 'if' patterns
This commit is contained in:
Sam Parker
2022-07-25 20:40:36 +01:00
committed by GitHub
parent dd40bf075a
commit c5ddb4b803
15 changed files with 1340 additions and 337 deletions

View File

@@ -531,7 +531,8 @@
(op VecRRNarrowOp)
(rd WritableReg)
(rn Reg)
(high_half bool))
(high_half bool)
(lane_size ScalarSize))
;; 1-operand vector instruction that operates on a pair of elements.
(VecRRPair
@@ -905,6 +906,17 @@
(rule (scalar_size $F32) (ScalarSize.Size32))
(rule (scalar_size $F64) (ScalarSize.Size64))
;; Helper for calculating the `ScalarSize` lane type from vector type
(decl lane_size (Type) ScalarSize)
(rule (lane_size (multi_lane 8 _)) (ScalarSize.Size8))
(rule (lane_size (multi_lane 16 _)) (ScalarSize.Size16))
(rule (lane_size (multi_lane 32 _)) (ScalarSize.Size32))
(rule (lane_size (multi_lane 64 _)) (ScalarSize.Size64))
(rule (lane_size (dynamic_lane 8 _)) (ScalarSize.Size8))
(rule (lane_size (dynamic_lane 16 _)) (ScalarSize.Size16))
(rule (lane_size (dynamic_lane 32 _)) (ScalarSize.Size32))
(rule (lane_size (dynamic_lane 64 _)) (ScalarSize.Size64))
(type Cond extern
(enum
(Eq)
@@ -936,17 +948,6 @@
(Size64x2)
))
(type DynamicVectorSize extern
(enum
(Size8x8xN)
(Size8x16xN)
(Size16x4xN)
(Size16x8xN)
(Size32x2xN)
(Size32x4xN)
(Size64x2xN)
))
;; Helper for calculating the `VectorSize` corresponding to a type
(decl vector_size (Type) VectorSize)
(rule (vector_size (multi_lane 8 8)) (VectorSize.Size8x8))
@@ -1203,34 +1204,16 @@
;; A vector narrowing operation with one argument.
(type VecRRNarrowOp
(enum
;; Extract narrow, 16-bit elements
(Xtn16)
;; Extract narrow, 32-bit elements
(Xtn32)
;; Extract narrow, 64-bit elements
(Xtn64)
;; Signed saturating extract narrow, 16-bit elements
(Sqxtn16)
;; Signed saturating extract narrow, 32-bit elements
(Sqxtn32)
;; Signed saturating extract narrow, 64-bit elements
(Sqxtn64)
;; Signed saturating extract unsigned narrow, 16-bit elements
(Sqxtun16)
;; Signed saturating extract unsigned narrow, 32-bit elements
(Sqxtun32)
;; Signed saturating extract unsigned narrow, 64-bit elements
(Sqxtun64)
;; Unsigned saturating extract narrow, 16-bit elements
(Uqxtn16)
;; Unsigned saturating extract narrow, 32-bit elements
(Uqxtn32)
;; Unsigned saturating extract narrow, 64-bit elements
(Uqxtn64)
;; Floating-point convert to lower precision narrow, 32-bit elements
(Fcvtn32)
;; Floating-point convert to lower precision narrow, 64-bit elements
(Fcvtn64)
;; Extract narrow.
(Xtn)
;; Signed saturating extract narrow.
(Sqxtn)
;; Signed saturating extract unsigned narrow.
(Sqxtun)
;; Unsigned saturating extract narrow.
(Uqxtn)
;; Floating-point convert to lower precision narrow.
(Fcvtn)
))
(type VecRRRLongOp
@@ -1623,10 +1606,19 @@
dst))
;; Helper for emitting `MInst.VecRRNarrow` instructions.
(decl vec_rr_narrow (VecRRNarrowOp Reg bool) Reg)
(rule (vec_rr_narrow op src high_half)
(decl vec_rr_narrow (VecRRNarrowOp Reg ScalarSize) Reg)
(rule (vec_rr_narrow op src size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecRRNarrow op dst src high_half))))
(_ Unit (emit (MInst.VecRRNarrow op dst src $false size))))
dst))
;; Helper for emitting `MInst.VecRRNarrow` instructions which update the
;; high half of the destination register.
(decl vec_rr_narrow_high (VecRRNarrowOp Reg Reg ScalarSize) Reg)
(rule (vec_rr_narrow_high op mod src size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_1 Unit (emit (MInst.FpuMove128 dst mod)))
(_2 Unit (emit (MInst.VecRRNarrow op dst src $true size))))
dst))
;; Helper for emitting `MInst.VecRRLong` instructions.
@@ -1673,6 +1665,14 @@
(_2 Unit (emit (MInst.MovToVec dst src2 lane size))))
dst))
;; Helper for emitting `MInst.VecMovElement` instructions.
(decl mov_vec_elem (Reg Reg u8 u8 VectorSize) Reg)
(rule (mov_vec_elem src1 src2 dst_idx src_idx size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_1 Unit (emit (MInst.FpuMove128 dst src1)))
(_2 Unit (emit (MInst.VecMovElement dst src2 dst_idx src_idx size))))
dst))
;; Helper for emitting `MInst.MovFromVec` instructions.
(decl mov_from_vec (Reg u8 VectorSize) Reg)
(rule (mov_from_vec rn idx size)
@@ -1830,9 +1830,37 @@
(decl rev64 (Reg VectorSize) Reg)
(rule (rev64 x size) (vec_misc (VecMisc2.Rev64) x size))
;; Helper for generating `xtn64` instructions.
(decl xtn64 (Reg bool) Reg)
(rule (xtn64 x high_half) (vec_rr_narrow (VecRRNarrowOp.Xtn64) x high_half))
;; Helper for generating `xtn` instructions.
(decl xtn (Reg ScalarSize) Reg)
(rule (xtn x size) (vec_rr_narrow (VecRRNarrowOp.Xtn) x size))
;; Helper for generating `fcvtn` instructions.
(decl fcvtn (Reg ScalarSize) Reg)
(rule (fcvtn x size) (vec_rr_narrow (VecRRNarrowOp.Fcvtn) x size))
;; Helper for generating `sqxtn` instructions.
(decl sqxtn (Reg ScalarSize) Reg)
(rule (sqxtn x size) (vec_rr_narrow (VecRRNarrowOp.Sqxtn) x size))
;; Helper for generating `sqxtn2` instructions.
(decl sqxtn2 (Reg Reg ScalarSize) Reg)
(rule (sqxtn2 x y size) (vec_rr_narrow_high (VecRRNarrowOp.Sqxtn) x y size))
;; Helper for generating `sqxtun` instructions.
(decl sqxtun (Reg ScalarSize) Reg)
(rule (sqxtun x size) (vec_rr_narrow (VecRRNarrowOp.Sqxtun) x size))
;; Helper for generating `sqxtun2` instructions.
(decl sqxtun2 (Reg Reg ScalarSize) Reg)
(rule (sqxtun2 x y size) (vec_rr_narrow_high (VecRRNarrowOp.Sqxtun) x y size))
;; Helper for generating `uqxtn` instructions.
(decl uqxtn (Reg ScalarSize) Reg)
(rule (uqxtn x size) (vec_rr_narrow (VecRRNarrowOp.Uqxtn) x size))
;; Helper for generating `uqxtn2` instructions.
(decl uqxtn2 (Reg Reg ScalarSize) Reg)
(rule (uqxtn2 x y size) (vec_rr_narrow_high (VecRRNarrowOp.Uqxtn) x y size))
;; Helper for generating `addp` instructions.
(decl addp (Reg Reg VectorSize) Reg)
@@ -2202,16 +2230,6 @@
(alu_rrr op ty x_lo y_lo)
(alu_rrr op ty x_hi y_hi))))
;; Float vector compare helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Match 32 bit float 0 value
(decl zero_value_f32 (Ieee32) Ieee32)
(extern extractor zero_value_f32 zero_value_f32)
;; Match 64 bit float 0 value
(decl zero_value_f64 (Ieee64) Ieee64)
(extern extractor zero_value_f64 zero_value_f64)
;; Generate comparison to zero operator from input condition code
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
@@ -2242,12 +2260,6 @@
(rule (fcmeq0 rn size)
(vec_misc (VecMisc2.Fcmeq0) rn size))
;; Int vector compare helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Match integer 0 value
(decl zero_value (Imm64) Imm64)
(extern extractor zero_value zero_value)
;; Generate comparison to zero operator from input condition code
(decl int_cc_cmp_zero_to_vec_misc_op (IntCC) VecMisc2)
(extern constructor int_cc_cmp_zero_to_vec_misc_op int_cc_cmp_zero_to_vec_misc_op)