x64: Add non-SSE4.1 lowerings of ceil/trunc/floor/nearest (#6224)

* x64: Add non-SSE4.1 lowerings of ceil/trunc/floor/nearest

This commit adds lowerings that work with SSE2 for CLIF `ceil`, `trunc`,
`floor`, and `nearest` instructions over vectors. To get these working
`insertlane` for float vectors was also implemented for non-SSE4.1
instructions as well.

Note that the goal of these lowerings is not speed but rather "it
works", so the decompose-to-call-libcalls logic for vector is probably
horrendously slow but should at least be correct.

* Skip new tests on riscv64

* Update cranelift/codegen/src/isa/x64/inst.isle

Co-authored-by: Andrew Brown <andrew.brown@intel.com>

---------

Co-authored-by: Andrew Brown <andrew.brown@intel.com>
This commit is contained in:
Alex Crichton
2023-04-18 12:23:18 -05:00
committed by GitHub
parent 299131ae2d
commit 62f8928bee
6 changed files with 180 additions and 96 deletions

View File

@@ -3,10 +3,11 @@ test run
target x86_64
target x86_64 has_sse41=false
set enable_simd
target x86_64 has_avx
target x86_64 sse42 has_avx
target aarch64
target s390x
target riscv64
;; FIXME: needs support for vectors
;;target riscv64
function %ceil_f32(f32) -> f32 {
block0(v0: f32):
@@ -149,3 +150,19 @@ block0(v0: f64):
; run: %ceil_is_nan_f64(-sNaN:0x1) == 1
; run: %ceil_is_nan_f64(+sNaN:0x4000000000001) == 1
; run: %ceil_is_nan_f64(-sNaN:0x4000000000001) == 1
function %ceil_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = ceil v0
return v1
}
; run: %ceil_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x1.0 0x1.0 0x1.0p1 0x1.8p1]
; run: %ceil_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.0p1]
function %ceil_f64x2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = ceil v0
return v1
}
; run: %ceil_f64x2([0x0.5 0x1.0]) == [0x1.0 0x1.0]
; run: %ceil_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]

View File

@@ -3,10 +3,11 @@ test run
target x86_64
target x86_64 has_sse41=false
set enable_simd
target x86_64 has_avx
target x86_64 sse42 has_avx
target aarch64
target s390x
target riscv64
;; FIXME: needs support for vectors
;;target riscv64
function %floor_f32(f32) -> f32 {
block0(v0: f32):
@@ -149,3 +150,19 @@ block0(v0: f64):
; run: %floor_is_nan_f64(-sNaN:0x1) == 1
; run: %floor_is_nan_f64(+sNaN:0x4000000000001) == 1
; run: %floor_is_nan_f64(-sNaN:0x4000000000001) == 1
function %floor_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = floor v0
return v1
}
; run: %floor_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.0p1]
; run: %floor_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x1.0 -0x1.0 -0x1.0p1 -0x1.8p1]
function %floor_f64x2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = floor v0
return v1
}
; run: %floor_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
; run: %floor_f64x2([-0x0.5 -0x1.0]) == [-0x1.0 -0x1.0]

View File

@@ -3,10 +3,11 @@ test run
target x86_64
target x86_64 has_sse41=false
set enable_simd
target x86_64 has_avx
target x86_64 sse42 has_avx
target aarch64
target s390x
target riscv64
;; FIXME: needs support for vectors
;;target riscv64
function %nearest_f32(f32) -> f32 {
block0(v0: f32):
@@ -149,3 +150,19 @@ block0(v0: f64):
; run: %near_is_nan_f64(-sNaN:0x1) == 1
; run: %near_is_nan_f64(+sNaN:0x4000000000001) == 1
; run: %near_is_nan_f64(-sNaN:0x4000000000001) == 1
function %nearest_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = nearest v0
return v1
}
; run: %nearest_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.8p1]
; run: %nearest_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.8p1]
function %nearest_f64x2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = nearest v0
return v1
}
; run: %nearest_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
; run: %nearest_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]

View File

@@ -2,9 +2,12 @@ test interpret
test run
target x86_64
target x86_64 has_sse41=false
set enable_simd
target x86_64 sse42 has_avx
target aarch64
target s390x
target riscv64
;; FIXME: needs support for vectors
;;target riscv64
function %trunc_f32(f32) -> f32 {
block0(v0: f32):
@@ -147,3 +150,19 @@ block0(v0: f64):
; run: %trunc_is_nan_f64(-sNaN:0x1) == 1
; run: %trunc_is_nan_f64(+sNaN:0x4000000000001) == 1
; run: %trunc_is_nan_f64(-sNaN:0x4000000000001) == 1
function %trunc_f32x4(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = trunc v0
return v1
}
; run: %trunc_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.0p1]
; run: %trunc_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.0p1]
function %trunc_f64x2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = trunc v0
return v1
}
; run: %trunc_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
; run: %trunc_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]