Convert fma, valltrue & vanytrue to ISLE (AArch64) (#4608)

* Convert `fma`, `valltrue` & `vanytrue` to ISLE (AArch64) Ported the existing implementations of the following opcodes to ISLE on AArch64: - `fma` - Introduced missing support for `fma` on vector values, as per the docs. - `valltrue` - `vanytrue` Also fixed `fcmp` on scalar values in the interpreter, and enabled interpreter tests in `simd-fma.clif`. This introduces the `FMLA` machine instruction. Copyright (c) 2022 Arm Limited * Add comments for `Fmla` and `Bsl` Copyright (c) 2022 Arm Limited
2022-08-05 17:47:56 +01:00
parent 1ed7b43e62
commit eb332b8369
19 changed files with 608 additions and 206 deletions
--- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
@@ -910,3 +910,39 @@ block0(v0: f64x2):
 ; block0:
 ;   frintn v0.2d, v0.2d
 ;   ret
+
+function %f78(f32x4, f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4, v2: f32x4):
+  v3 = fma v0, v1, v2
+  return v3
+}
+
+; block0:
+;   mov v17.16b, v0.16b
+;   mov v0.16b, v2.16b
+;   fmla v0.4s, v17.4s, v1.4s
+;   ret
+
+function %f79(f32x2, f32x2, f32x2) -> f32x2 {
+block0(v0: f32x2, v1: f32x2, v2: f32x2):
+  v3 = fma v0, v1, v2
+  return v3
+}
+
+; block0:
+;   mov v17.16b, v0.16b
+;   mov v0.16b, v2.16b
+;   fmla v0.2s, v17.2s, v1.2s
+;   ret
+
+function %f80(f64x2, f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2, v2: f64x2):
+  v3 = fma v0, v1, v2
+  return v3
+}
+
+; block0:
+;   mov v17.16b, v0.16b
+;   mov v0.16b, v2.16b
+;   fmla v0.2d, v17.2d, v1.2d
+;   ret
--- a/cranelift/filetests/filetests/isa/aarch64/simd-valltrue.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-valltrue.clif
@@ -0,0 +1,94 @@
+test compile precise-output
+set unwind_info=false
+target aarch64
+
+function %fn0(b8x8) -> b1 {
+block0(v0: b8x8):
+    v1 = vall_true v0
+    return v1
+}
+
+; block0:
+;   uminv b3, v0.8b
+;   mov x5, v3.d[0]
+;   subs xzr, x5, #0
+;   cset x0, ne
+;   ret
+
+function %fn1(b8x16) -> b1 {
+block0(v0: b8x16):
+    v1 = vall_true v0
+    return v1
+}
+
+; block0:
+;   uminv b3, v0.16b
+;   mov x5, v3.d[0]
+;   subs xzr, x5, #0
+;   cset x0, ne
+;   ret
+
+function %fn2(b16x4) -> b1 {
+block0(v0: b16x4):
+    v1 = vall_true v0
+    return v1
+}
+
+; block0:
+;   uminv h3, v0.4h
+;   mov x5, v3.d[0]
+;   subs xzr, x5, #0
+;   cset x0, ne
+;   ret
+
+function %fn3(b16x8) -> b1 {
+block0(v0: b16x8):
+    v1 = vall_true v0
+    return v1
+}
+
+; block0:
+;   uminv h3, v0.8h
+;   mov x5, v3.d[0]
+;   subs xzr, x5, #0
+;   cset x0, ne
+;   ret
+
+function %fn4(b32x2) -> b1 {
+block0(v0: b32x2):
+    v1 = vall_true v0
+    return v1
+}
+
+; block0:
+;   mov x3, v0.d[0]
+;   subs xzr, xzr, x3, LSR 32
+;   ccmp w3, #0, #nZcv, ne
+;   cset x0, ne
+;   ret
+
+function %fn5(b32x4) -> b1 {
+block0(v0: b32x4):
+    v1 = vall_true v0
+    return v1
+}
+
+; block0:
+;   uminv s3, v0.4s
+;   mov x5, v3.d[0]
+;   subs xzr, x5, #0
+;   cset x0, ne
+;   ret
+
+function %fn6(b64x2) -> b1 {
+block0(v0: b64x2):
+    v1 = vall_true v0
+    return v1
+}
+
+; block0:
+;   cmeq v3.2d, v0.2d, #0
+;   addp v5.2d, v3.2d, v3.2d
+;   fcmp d5, d5
+;   cset x0, eq
+;   ret