x64: Lower fcvt_from_uint in ISLE (#4684)
* Add a test for the existing behavior of fcvt_from_unit * Migrate the I8, I16, I32 cases of fcvt_from_uint * Implement the I64 case of fcvt_from_uint * Add a test for the existing behavior of fcvt_from_uint.f64x2 * Migrate fcvt_from_uint.f64x2 to ISLE * Lower the last case of `fcvt_from_uint` * Add a test for `fcvt_from_uint` * Finish lowering fcmp_from_uint * Format
This commit is contained in:
18
cranelift/filetests/filetests/isa/x64/fcvt-simd.clif
Normal file
18
cranelift/filetests/filetests/isa/x64/fcvt-simd.clif
Normal file
@@ -0,0 +1,18 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64 has_avx512vl has_avx512f
|
||||
|
||||
function %f1(i32x4) -> f32x4 {
|
||||
block0(v0: i32x4):
|
||||
v1 = fcvt_from_uint.f32x4 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vcvtudq2ps %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
@@ -131,3 +131,72 @@ block0(v0: i32x4):
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %f10(i8, i16, i32, i64) -> f32 {
|
||||
block0(v0: i8, v1: i16, v2: i32, v3: i64):
|
||||
v4 = fcvt_from_uint.f32 v0
|
||||
v5 = fcvt_from_uint.f32 v1
|
||||
v6 = fcvt_from_uint.f32 v2
|
||||
v7 = fcvt_from_uint.f32 v3
|
||||
v8 = fadd.f32 v4, v5
|
||||
v9 = fadd.f32 v8, v6
|
||||
v10 = fadd.f32 v9, v7
|
||||
return v10
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movzbq %dil, %rax
|
||||
; cvtsi2ss %rax, %xmm0
|
||||
; movzwq %si, %rax
|
||||
; cvtsi2ss %rax, %xmm6
|
||||
; movl %edx, %eax
|
||||
; cvtsi2ss %rax, %xmm7
|
||||
; u64_to_f32_seq %rcx, %xmm4, %r8, %rdx
|
||||
; addss %xmm0, %xmm6, %xmm0
|
||||
; addss %xmm0, %xmm7, %xmm0
|
||||
; addss %xmm0, %xmm4, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %f11(i32x4) -> f64x2 {
|
||||
block0(v0: i32x4):
|
||||
v1 = uwiden_low v0
|
||||
v2 = fcvt_from_uint.f64x2 v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; load_const VCodeConstant(0), %xmm3
|
||||
; unpcklps %xmm0, %xmm3, %xmm0
|
||||
; load_const VCodeConstant(1), %xmm7
|
||||
; subpd %xmm0, %xmm7, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %f12(i32x4) -> f32x4 {
|
||||
block0(v0: i32x4):
|
||||
v1 = fcvt_from_uint.f32x4 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movdqa %xmm0, %xmm4
|
||||
; pslld %xmm4, $16, %xmm4
|
||||
; psrld %xmm4, $16, %xmm4
|
||||
; psubd %xmm0, %xmm4, %xmm0
|
||||
; cvtdq2ps %xmm4, %xmm9
|
||||
; psrld %xmm0, $1, %xmm0
|
||||
; cvtdq2ps %xmm0, %xmm0
|
||||
; addps %xmm0, %xmm0, %xmm0
|
||||
; addps %xmm0, %xmm9, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
|
||||
Reference in New Issue
Block a user