Convert fma, valltrue & vanytrue to ISLE (AArch64) (#4608)

* Convert `fma`, `valltrue` & `vanytrue` to ISLE (AArch64)

Ported the existing implementations of the following opcodes to ISLE on
AArch64:
- `fma`
  - Introduced missing support for `fma` on vector values, as per the
    docs.
- `valltrue`
- `vanytrue`

Also fixed `fcmp` on scalar values in the interpreter, and enabled
interpreter tests in `simd-fma.clif`.

This introduces the `FMLA` machine instruction.

Copyright (c) 2022 Arm Limited

* Add comments for `Fmla` and `Bsl`

Copyright (c) 2022 Arm Limited
This commit is contained in:
Damian Heaton
2022-08-05 17:47:56 +01:00
committed by GitHub
parent 1ed7b43e62
commit eb332b8369
19 changed files with 608 additions and 206 deletions

View File

@@ -910,3 +910,39 @@ block0(v0: f64x2):
; block0:
; frintn v0.2d, v0.2d
; ret
function %f78(f32x4, f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4, v2: f32x4):
v3 = fma v0, v1, v2
return v3
}
; block0:
; mov v17.16b, v0.16b
; mov v0.16b, v2.16b
; fmla v0.4s, v17.4s, v1.4s
; ret
function %f79(f32x2, f32x2, f32x2) -> f32x2 {
block0(v0: f32x2, v1: f32x2, v2: f32x2):
v3 = fma v0, v1, v2
return v3
}
; block0:
; mov v17.16b, v0.16b
; mov v0.16b, v2.16b
; fmla v0.2s, v17.2s, v1.2s
; ret
function %f80(f64x2, f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2, v2: f64x2):
v3 = fma v0, v1, v2
return v3
}
; block0:
; mov v17.16b, v0.16b
; mov v0.16b, v2.16b
; fmla v0.2d, v17.2d, v1.2d
; ret

View File

@@ -0,0 +1,94 @@
test compile precise-output
set unwind_info=false
target aarch64
function %fn0(b8x8) -> b1 {
block0(v0: b8x8):
v1 = vall_true v0
return v1
}
; block0:
; uminv b3, v0.8b
; mov x5, v3.d[0]
; subs xzr, x5, #0
; cset x0, ne
; ret
function %fn1(b8x16) -> b1 {
block0(v0: b8x16):
v1 = vall_true v0
return v1
}
; block0:
; uminv b3, v0.16b
; mov x5, v3.d[0]
; subs xzr, x5, #0
; cset x0, ne
; ret
function %fn2(b16x4) -> b1 {
block0(v0: b16x4):
v1 = vall_true v0
return v1
}
; block0:
; uminv h3, v0.4h
; mov x5, v3.d[0]
; subs xzr, x5, #0
; cset x0, ne
; ret
function %fn3(b16x8) -> b1 {
block0(v0: b16x8):
v1 = vall_true v0
return v1
}
; block0:
; uminv h3, v0.8h
; mov x5, v3.d[0]
; subs xzr, x5, #0
; cset x0, ne
; ret
function %fn4(b32x2) -> b1 {
block0(v0: b32x2):
v1 = vall_true v0
return v1
}
; block0:
; mov x3, v0.d[0]
; subs xzr, xzr, x3, LSR 32
; ccmp w3, #0, #nZcv, ne
; cset x0, ne
; ret
function %fn5(b32x4) -> b1 {
block0(v0: b32x4):
v1 = vall_true v0
return v1
}
; block0:
; uminv s3, v0.4s
; mov x5, v3.d[0]
; subs xzr, x5, #0
; cset x0, ne
; ret
function %fn6(b64x2) -> b1 {
block0(v0: b64x2):
v1 = vall_true v0
return v1
}
; block0:
; cmeq v3.2d, v0.2d, #0
; addp v5.2d, v3.2d, v3.2d
; fcmp d5, d5
; cset x0, eq
; ret