x64: Add non-SSE4.1 lowerings of ceil/trunc/floor/nearest (#6224)

* x64: Add non-SSE4.1 lowerings of ceil/trunc/floor/nearest This commit adds lowerings that work with SSE2 for CLIF `ceil`, `trunc`, `floor`, and `nearest` instructions over vectors. To get these working `insertlane` for float vectors was also implemented for non-SSE4.1 instructions as well. Note that the goal of these lowerings is not speed but rather "it works", so the decompose-to-call-libcalls logic for vector is probably horrendously slow but should at least be correct. * Skip new tests on riscv64 * Update cranelift/codegen/src/isa/x64/inst.isle Co-authored-by: Andrew Brown <andrew.brown@intel.com> --------- Co-authored-by: Andrew Brown <andrew.brown@intel.com>
2023-04-18 12:23:18 -05:00
parent 299131ae2d
commit 62f8928bee
6 changed files with 180 additions and 96 deletions
--- a/cranelift/filetests/filetests/runtests/ceil.clif
+++ b/cranelift/filetests/filetests/runtests/ceil.clif
@@ -3,10 +3,11 @@ test run
 target x86_64
 target x86_64 has_sse41=false
 set enable_simd
-target x86_64 has_avx
+target x86_64 sse42 has_avx
 target aarch64
 target s390x
-target riscv64
+;; FIXME: needs support for vectors
+;;target riscv64

 function %ceil_f32(f32) -> f32 {
 block0(v0: f32):
@@ -149,3 +150,19 @@ block0(v0: f64):
 ; run: %ceil_is_nan_f64(-sNaN:0x1) == 1
 ; run: %ceil_is_nan_f64(+sNaN:0x4000000000001) == 1
 ; run: %ceil_is_nan_f64(-sNaN:0x4000000000001) == 1
+
+function %ceil_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = ceil v0
+  return v1
+}
+; run: %ceil_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x1.0 0x1.0 0x1.0p1 0x1.8p1]
+; run: %ceil_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.0p1]
+
+function %ceil_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = ceil v0
+  return v1
+}
+; run: %ceil_f64x2([0x0.5 0x1.0]) == [0x1.0 0x1.0]
+; run: %ceil_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]
--- a/cranelift/filetests/filetests/runtests/floor.clif
+++ b/cranelift/filetests/filetests/runtests/floor.clif
@@ -3,10 +3,11 @@ test run
 target x86_64
 target x86_64 has_sse41=false
 set enable_simd
-target x86_64 has_avx
+target x86_64 sse42 has_avx
 target aarch64
 target s390x
-target riscv64
+;; FIXME: needs support for vectors
+;;target riscv64

 function %floor_f32(f32) -> f32 {
 block0(v0: f32):
@@ -149,3 +150,19 @@ block0(v0: f64):
 ; run: %floor_is_nan_f64(-sNaN:0x1) == 1
 ; run: %floor_is_nan_f64(+sNaN:0x4000000000001) == 1
 ; run: %floor_is_nan_f64(-sNaN:0x4000000000001) == 1
+
+function %floor_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = floor v0
+  return v1
+}
+; run: %floor_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.0p1]
+; run: %floor_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x1.0 -0x1.0 -0x1.0p1 -0x1.8p1]
+
+function %floor_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = floor v0
+  return v1
+}
+; run: %floor_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
+; run: %floor_f64x2([-0x0.5 -0x1.0]) == [-0x1.0 -0x1.0]
--- a/cranelift/filetests/filetests/runtests/nearest.clif
+++ b/cranelift/filetests/filetests/runtests/nearest.clif
@@ -3,10 +3,11 @@ test run
 target x86_64
 target x86_64 has_sse41=false
 set enable_simd
-target x86_64 has_avx
+target x86_64 sse42 has_avx
 target aarch64
 target s390x
-target riscv64
+;; FIXME: needs support for vectors
+;;target riscv64

 function %nearest_f32(f32) -> f32 {
 block0(v0: f32):
@@ -149,3 +150,19 @@ block0(v0: f64):
 ; run: %near_is_nan_f64(-sNaN:0x1) == 1
 ; run: %near_is_nan_f64(+sNaN:0x4000000000001) == 1
 ; run: %near_is_nan_f64(-sNaN:0x4000000000001) == 1
+
+function %nearest_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = nearest v0
+  return v1
+}
+; run: %nearest_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.8p1]
+; run: %nearest_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.8p1]
+
+function %nearest_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = nearest v0
+  return v1
+}
+; run: %nearest_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
+; run: %nearest_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]
--- a/cranelift/filetests/filetests/runtests/trunc.clif
+++ b/cranelift/filetests/filetests/runtests/trunc.clif
@@ -2,9 +2,12 @@ test interpret
 test run
 target x86_64
 target x86_64 has_sse41=false
+set enable_simd
+target x86_64 sse42 has_avx
 target aarch64
 target s390x
-target riscv64
+;; FIXME: needs support for vectors
+;;target riscv64

 function %trunc_f32(f32) -> f32 {
 block0(v0: f32):
@@ -147,3 +150,19 @@ block0(v0: f64):
 ; run: %trunc_is_nan_f64(-sNaN:0x1) == 1
 ; run: %trunc_is_nan_f64(+sNaN:0x4000000000001) == 1
 ; run: %trunc_is_nan_f64(-sNaN:0x4000000000001) == 1
+
+function %trunc_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = trunc v0
+  return v1
+}
+; run: %trunc_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.0p1]
+; run: %trunc_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.0p1]
+
+function %trunc_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = trunc v0
+  return v1
+}
+; run: %trunc_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
+; run: %trunc_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]