Add x86 legalization for fcvt_from_uint.f32x4

This converts an `i32x4` into an `f32x4` with some rounding either by using an AVX512VL/F instruction--VCVTUDQ2PS--or a long sequence of SSE4.1 compatible instructions.
This commit is contained in:
Andrew Brown
2020-05-26 15:20:30 -07:00
parent 23ed48f269
commit 01d34e71b9
6 changed files with 93 additions and 1 deletions

View File

@@ -0,0 +1,10 @@
test legalizer
set enable_simd
target x86_64 skylake has_avx512f=true
function %fcvt_from_uint(i32x4) -> f32x4 {
block0(v0:i32x4):
v1 = fcvt_from_uint.f32x4 v0
; check: v1 = x86_vcvtudq2ps v0
return v1
}

View File

@@ -0,0 +1,19 @@
test legalizer
set enable_simd
target x86_64 skylake
function %fcvt_from_uint(i32x4) -> f32x4 {
block0(v0:i32x4):
v1 = fcvt_from_uint.f32x4 v0
; check: v2 = raw_bitcast.i16x8 v0
; nextln: v3 = vconst.i16x8 const0
; nextln: v4 = x86_pblendw v3, v2, 85
; nextln: v5 = raw_bitcast.i32x4 v4
; nextln: v6 = isub v0, v5
; nextln: v7 = fcvt_from_sint.f32x4 v5
; nextln: v8 = ushr_imm v6, 1
; nextln: v9 = fcvt_from_sint.f32x4 v8
; nextln: v10 = fadd v9, v9
; nextln: v1 = fadd v10, v7
return v1
}

View File

@@ -13,3 +13,10 @@ block0:
return v4
}
; run
function %fcvt_from_uint(i32x4) -> f32x4 {
block0(v0:i32x4):
v1 = fcvt_from_uint.f32x4 v0
return v1
}
; run: %fcvt_from_uint([0 0 0 0]) == [0x0.0 0x0.0 0x0.0 0x0.0]