x64: Lower fcvt_from_uint in ISLE (#4684)

* Add a test for the existing behavior of fcvt_from_unit

* Migrate the I8, I16, I32 cases of fcvt_from_uint

* Implement the I64 case of fcvt_from_uint

* Add a test for the existing behavior of fcvt_from_uint.f64x2

* Migrate fcvt_from_uint.f64x2 to ISLE

* Lower the last case of `fcvt_from_uint`

* Add a test for `fcvt_from_uint`

* Finish lowering fcmp_from_uint

* Format
This commit is contained in:
Trevor Elliott
2022-08-11 12:28:41 -07:00
committed by GitHub
parent c4fd6a95da
commit 0c2e0494bd
8 changed files with 223 additions and 280 deletions

View File

@@ -0,0 +1,18 @@
test compile precise-output
set enable_simd
target x86_64 has_avx512vl has_avx512f
function %f1(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_uint.f32x4 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vcvtudq2ps %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -131,3 +131,72 @@ block0(v0: i32x4):
; popq %rbp
; ret
function %f10(i8, i16, i32, i64) -> f32 {
block0(v0: i8, v1: i16, v2: i32, v3: i64):
v4 = fcvt_from_uint.f32 v0
v5 = fcvt_from_uint.f32 v1
v6 = fcvt_from_uint.f32 v2
v7 = fcvt_from_uint.f32 v3
v8 = fadd.f32 v4, v5
v9 = fadd.f32 v8, v6
v10 = fadd.f32 v9, v7
return v10
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movzbq %dil, %rax
; cvtsi2ss %rax, %xmm0
; movzwq %si, %rax
; cvtsi2ss %rax, %xmm6
; movl %edx, %eax
; cvtsi2ss %rax, %xmm7
; u64_to_f32_seq %rcx, %xmm4, %r8, %rdx
; addss %xmm0, %xmm6, %xmm0
; addss %xmm0, %xmm7, %xmm0
; addss %xmm0, %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f11(i32x4) -> f64x2 {
block0(v0: i32x4):
v1 = uwiden_low v0
v2 = fcvt_from_uint.f64x2 v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; load_const VCodeConstant(0), %xmm3
; unpcklps %xmm0, %xmm3, %xmm0
; load_const VCodeConstant(1), %xmm7
; subpd %xmm0, %xmm7, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
function %f12(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_uint.f32x4 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm0, %xmm4
; pslld %xmm4, $16, %xmm4
; psrld %xmm4, $16, %xmm4
; psubd %xmm0, %xmm4, %xmm0
; cvtdq2ps %xmm4, %xmm9
; psrld %xmm0, $1, %xmm0
; cvtdq2ps %xmm0, %xmm0
; addps %xmm0, %xmm0, %xmm0
; addps %xmm0, %xmm9, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret