x64: Add non-SSE4.1 lowerings of ceil/trunc/floor/nearest (#6224)
* x64: Add non-SSE4.1 lowerings of ceil/trunc/floor/nearest This commit adds lowerings that work with SSE2 for CLIF `ceil`, `trunc`, `floor`, and `nearest` instructions over vectors. To get these working `insertlane` for float vectors was also implemented for non-SSE4.1 instructions as well. Note that the goal of these lowerings is not speed but rather "it works", so the decompose-to-call-libcalls logic for vector is probably horrendously slow but should at least be correct. * Skip new tests on riscv64 * Update cranelift/codegen/src/isa/x64/inst.isle Co-authored-by: Andrew Brown <andrew.brown@intel.com> --------- Co-authored-by: Andrew Brown <andrew.brown@intel.com>
This commit is contained in:
@@ -3,10 +3,11 @@ test run
|
||||
target x86_64
|
||||
target x86_64 has_sse41=false
|
||||
set enable_simd
|
||||
target x86_64 has_avx
|
||||
target x86_64 sse42 has_avx
|
||||
target aarch64
|
||||
target s390x
|
||||
target riscv64
|
||||
;; FIXME: needs support for vectors
|
||||
;;target riscv64
|
||||
|
||||
function %ceil_f32(f32) -> f32 {
|
||||
block0(v0: f32):
|
||||
@@ -149,3 +150,19 @@ block0(v0: f64):
|
||||
; run: %ceil_is_nan_f64(-sNaN:0x1) == 1
|
||||
; run: %ceil_is_nan_f64(+sNaN:0x4000000000001) == 1
|
||||
; run: %ceil_is_nan_f64(-sNaN:0x4000000000001) == 1
|
||||
|
||||
function %ceil_f32x4(f32x4) -> f32x4 {
|
||||
block0(v0: f32x4):
|
||||
v1 = ceil v0
|
||||
return v1
|
||||
}
|
||||
; run: %ceil_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x1.0 0x1.0 0x1.0p1 0x1.8p1]
|
||||
; run: %ceil_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.0p1]
|
||||
|
||||
function %ceil_f64x2(f64x2) -> f64x2 {
|
||||
block0(v0: f64x2):
|
||||
v1 = ceil v0
|
||||
return v1
|
||||
}
|
||||
; run: %ceil_f64x2([0x0.5 0x1.0]) == [0x1.0 0x1.0]
|
||||
; run: %ceil_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]
|
||||
|
||||
@@ -3,10 +3,11 @@ test run
|
||||
target x86_64
|
||||
target x86_64 has_sse41=false
|
||||
set enable_simd
|
||||
target x86_64 has_avx
|
||||
target x86_64 sse42 has_avx
|
||||
target aarch64
|
||||
target s390x
|
||||
target riscv64
|
||||
;; FIXME: needs support for vectors
|
||||
;;target riscv64
|
||||
|
||||
function %floor_f32(f32) -> f32 {
|
||||
block0(v0: f32):
|
||||
@@ -149,3 +150,19 @@ block0(v0: f64):
|
||||
; run: %floor_is_nan_f64(-sNaN:0x1) == 1
|
||||
; run: %floor_is_nan_f64(+sNaN:0x4000000000001) == 1
|
||||
; run: %floor_is_nan_f64(-sNaN:0x4000000000001) == 1
|
||||
|
||||
function %floor_f32x4(f32x4) -> f32x4 {
|
||||
block0(v0: f32x4):
|
||||
v1 = floor v0
|
||||
return v1
|
||||
}
|
||||
; run: %floor_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.0p1]
|
||||
; run: %floor_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x1.0 -0x1.0 -0x1.0p1 -0x1.8p1]
|
||||
|
||||
function %floor_f64x2(f64x2) -> f64x2 {
|
||||
block0(v0: f64x2):
|
||||
v1 = floor v0
|
||||
return v1
|
||||
}
|
||||
; run: %floor_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
|
||||
; run: %floor_f64x2([-0x0.5 -0x1.0]) == [-0x1.0 -0x1.0]
|
||||
|
||||
@@ -3,10 +3,11 @@ test run
|
||||
target x86_64
|
||||
target x86_64 has_sse41=false
|
||||
set enable_simd
|
||||
target x86_64 has_avx
|
||||
target x86_64 sse42 has_avx
|
||||
target aarch64
|
||||
target s390x
|
||||
target riscv64
|
||||
;; FIXME: needs support for vectors
|
||||
;;target riscv64
|
||||
|
||||
function %nearest_f32(f32) -> f32 {
|
||||
block0(v0: f32):
|
||||
@@ -149,3 +150,19 @@ block0(v0: f64):
|
||||
; run: %near_is_nan_f64(-sNaN:0x1) == 1
|
||||
; run: %near_is_nan_f64(+sNaN:0x4000000000001) == 1
|
||||
; run: %near_is_nan_f64(-sNaN:0x4000000000001) == 1
|
||||
|
||||
function %nearest_f32x4(f32x4) -> f32x4 {
|
||||
block0(v0: f32x4):
|
||||
v1 = nearest v0
|
||||
return v1
|
||||
}
|
||||
; run: %nearest_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.8p1]
|
||||
; run: %nearest_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.8p1]
|
||||
|
||||
function %nearest_f64x2(f64x2) -> f64x2 {
|
||||
block0(v0: f64x2):
|
||||
v1 = nearest v0
|
||||
return v1
|
||||
}
|
||||
; run: %nearest_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
|
||||
; run: %nearest_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]
|
||||
|
||||
@@ -2,9 +2,12 @@ test interpret
|
||||
test run
|
||||
target x86_64
|
||||
target x86_64 has_sse41=false
|
||||
set enable_simd
|
||||
target x86_64 sse42 has_avx
|
||||
target aarch64
|
||||
target s390x
|
||||
target riscv64
|
||||
;; FIXME: needs support for vectors
|
||||
;;target riscv64
|
||||
|
||||
function %trunc_f32(f32) -> f32 {
|
||||
block0(v0: f32):
|
||||
@@ -147,3 +150,19 @@ block0(v0: f64):
|
||||
; run: %trunc_is_nan_f64(-sNaN:0x1) == 1
|
||||
; run: %trunc_is_nan_f64(+sNaN:0x4000000000001) == 1
|
||||
; run: %trunc_is_nan_f64(-sNaN:0x4000000000001) == 1
|
||||
|
||||
function %trunc_f32x4(f32x4) -> f32x4 {
|
||||
block0(v0: f32x4):
|
||||
v1 = trunc v0
|
||||
return v1
|
||||
}
|
||||
; run: %trunc_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.0p1]
|
||||
; run: %trunc_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.0p1]
|
||||
|
||||
function %trunc_f64x2(f64x2) -> f64x2 {
|
||||
block0(v0: f64x2):
|
||||
v1 = trunc v0
|
||||
return v1
|
||||
}
|
||||
; run: %trunc_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
|
||||
; run: %trunc_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]
|
||||
|
||||
Reference in New Issue
Block a user