x64: Lower fcvt_from_uint in ISLE (#4684)

* Add a test for the existing behavior of fcvt_from_unit

* Migrate the I8, I16, I32 cases of fcvt_from_uint

* Implement the I64 case of fcvt_from_uint

* Add a test for the existing behavior of fcvt_from_uint.f64x2

* Migrate fcvt_from_uint.f64x2 to ISLE

* Lower the last case of `fcvt_from_uint`

* Add a test for `fcvt_from_uint`

* Finish lowering fcmp_from_uint

* Format
This commit is contained in:
Trevor Elliott
2022-08-11 12:28:41 -07:00
committed by GitHub
parent c4fd6a95da
commit 0c2e0494bd
8 changed files with 223 additions and 280 deletions

View File

@@ -2985,3 +2985,76 @@
(rule (lower (fcvt_low_from_sint a @ (value_type ty)))
(x64_cvtdq2pd ty a))
;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (fcvt_from_uint val @ (value_type (fits_in_32 (ty_int ty))))))
(x64_cvtsi2ss $I64 (extend_to_gpr val $I64 (ExtendKind.Zero))))
(rule (lower (has_type $F64 (fcvt_from_uint val @ (value_type (fits_in_32 (ty_int ty))))))
(x64_cvtsi2sd $I64 (extend_to_gpr val $I64 (ExtendKind.Zero))))
(rule (lower (has_type ty (fcvt_from_uint val @ (value_type $I64))))
(cvt_u64_to_float_seq ty val))
;; Algorithm uses unpcklps to help create a float that is equivalent
;; 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent
;; every value of the mantissa represents a corresponding uint32 number.
;; When we subtract 0x1.0p52 we are left with double(src).
(rule (lower (has_type $F64X2 (fcvt_from_uint (uwiden_low val @ (value_type $I32X4)))))
(let ((uint_mask Xmm (x64_xmm_load_const $I32X4 (fcvt_uint_mask_const)))
(res Xmm (x64_unpcklps val uint_mask))
(uint_mask_high Xmm (x64_xmm_load_const $I32X4 (fcvt_uint_mask_high_const))))
(x64_subpd res uint_mask_high)))
;; When AVX512VL and AVX512F are available,
;; `fcvt_from_uint` can be lowered to a single instruction.
;;
;; NOTE: the priority of 1 here is to break ties with the next case for $F32X4,
;; as it doesn't require either of the avx512 extensions to be enabled.
(rule 1 (lower (has_type (and (avx512vl_enabled) (avx512f_enabled) $F32X4)
(fcvt_from_uint src)))
(x64_vcvtudq2ps src))
;; Converting packed unsigned integers to packed floats
;; requires a few steps. There is no single instruction
;; lowering for converting unsigned floats but there is for
;; converting packed signed integers to float (cvtdq2ps). In
;; the steps below we isolate the upper half (16 bits) and
;; lower half (16 bits) of each lane and then we convert
;; each half separately using cvtdq2ps meant for signed
;; integers. In order for this to work for the upper half
;; bits we must shift right by 1 (divide by 2) these bits in
;; order to ensure the most significant bit is 0 not signed,
;; and then after the conversion we double the value.
;; Finally we add the converted values where addition will
;; correctly round.
;;
;; Sequence:
;; -> A = 0xffffffff
;; -> Ah = 0xffff0000
;; -> Al = 0x0000ffff
;; -> Convert(Al) // Convert int to float
;; -> Ah = Ah >> 1 // Shift right 1 to assure Ah conversion isn't treated as signed
;; -> Convert(Ah) // Convert .. with no loss of significant digits from previous shift
;; -> Ah = Ah + Ah // Double Ah to account for shift right before the conversion.
;; -> dst = Ah + Al // Add the two floats together
(rule (lower (has_type $F32X4 (fcvt_from_uint a)))
(let (;; get the low 16 bits
(a_lo Xmm (x64_pslld a (RegMemImm.Imm 16)))
(a_lo Xmm (x64_psrld a_lo (RegMemImm.Imm 16)))
;; get the high 16 bits
(a_hi Xmm (x64_psubd a a_lo))
;; convert the low 16 bits
(a_lo Xmm (x64_cvtdq2ps a_lo))
;; shift the high bits by 1, convert, and double to get the correct
;; value
(a_hi Xmm (x64_psrld a_hi (RegMemImm.Imm 1)))
(a_hi Xmm (x64_cvtdq2ps a_hi))
(a_hi Xmm (x64_addps a_hi a_hi)))
;; add together the two converted values
(x64_addps a_hi a_lo)))