Files
wasmtime/cranelift/filetests/filetests/runtests/simd-fma.clif
Alex Crichton 967543eb43 aarch64: Add more lowerings for the CLIF fma (#6150)
This commit adds new lowerings to the AArch64 backend of the
element-based `fmla` and `fmls` instructions. These instructions have
one of the multiplicands as an implicit broadcast of a single lane of
another register and can help remove `shuffle` or `dup` instructions
that would otherwise be used to implement them.
2023-04-05 17:22:55 +00:00

126 lines
6.0 KiB
Plaintext

test interpret
test run
set enable_simd
target x86_64 has_avx has_fma
target x86_64 has_avx=false has_fma=false
target aarch64
function %fma_f32x4(f32x4, f32x4, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32x4, v2: f32x4):
v3 = fma v0, v1, v2
return v3
}
; run: %fma_f32x4([0x9.0 0x83.0 0x1.99999ap-2 -0x1.4cccccp0], [0x9.0 0x2.68091p6 0x1.333334p-1 -0x1.666666p1], [0x9.0 0x9.88721p1 0x1.400000p1 -0x1.b33334p0]) == [0x1.680000p6 0x1.3b88e6p14 0x1.5eb852p1 0x1.f0a3d2p0]
; Zeroes
; run: %fma_f32x4([0x0.0 0x0.0 0x0.0 -0x0.0], [0x0.0 0x0.0 -0x0.0 0x0.0], [0x0.0 -0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
; Infinites
; run: %fma_f32x4([-Inf Inf -Inf Inf], [-Inf -Inf Inf -Inf], [0x0.0 0x0.0 0x0.0 -Inf]) == [Inf -Inf -Inf -Inf]
; run: %fma_f32x4([-Inf 0x0.0 0x0.0 0x0.0], [Inf 0x0.0 0x0.0 0x0.0], [-Inf 0x0.0 0x0.0 0x0.0]) == [-Inf 0x0.0 0x0.0 0x0.0]
; F32 Epsilon / Max / Min Positive
; run: %fma_f32x4([0x1.000000p-23 0x0.0 0x1.fffffep127 0x0.0], [0x1.000000p-23 0x0.0 0x1.fffffep127 0x0.0], [0x1.000000p-23 0x1.000000p-23 0x1.fffffep127 0x1.fffffep127]) == [0x1.000002p-23 0x1.000000p-23 +Inf 0x1.fffffep127]
; run: %fma_f32x4([0x1.000000p-126 0x0.0 0x0.0 0x0.0], [0x1.000000p-126 0x0.0 0x0.0 0x0.0], [0x1.000000p-126 0x1.000000p-126 0x0.0 0x0.0]) == [0x1.000000p-126 0x1.000000p-126 0x0.0 0x0.0]
; F32 Subnormals
; run: %fma_f32x4([0x0.800000p-126 0x0.800000p-126 0x0.0 0x0.000002p-126], [0x0.800000p-126 0x0.800000p-126 0x0.0 0x0.000002p-126], [0x0.800000p-126 0x0.0 0x0.000002p-126 0x0.000002p-126]) == [0x0.800000p-126 0x0.0 0x0.000002p-126 0x0.000002p-126]
; run: %fma_f32x4([0x0.000002p-126 0x0.0 0x0.0 0x0.0], [0x0.000002p-126 0x0.0 0x0.0 0x0.0], [0x0.0 0x0.000002p-126 0x0.0 0x0.0]) == [0x0.0 0x0.000002p-126 0x0.0 0x0.0]
;; The IEEE754 Standard does not make a lot of guarantees about what
;; comes out of NaN producing operations, we just check if its a NaN
function %fma_is_nan_f32x4(f32x4, f32x4, f32x4) -> i8 {
block0(v0: f32x4, v1: f32x4, v2: f32x4):
v3 = fma v0, v1, v2
v4 = fcmp ne v3, v3
v5 = vall_true v4
return v5
}
; run: %fma_is_nan_f32x4([Inf -Inf -Inf +NaN], [-Inf Inf -Inf 0x0.0], [Inf Inf -Inf 0x0.0]) == 1
; run: %fma_is_nan_f32x4([0x0.0 0x0.0 -NaN 0x0.0], [+NaN 0x0.0 0x0.0 -NaN], [0x0.0 +NaN 0x0.0 0x0.0]) == 1
; run: %fma_is_nan_f32x4([0x0.0 NaN NaN NaN], [0x0.0 NaN NaN NaN], [-NaN NaN NaN NaN]) == 1
function %fma_f64x2(f64x2, f64x2, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64x2, v2: f64x2):
v3 = fma v0, v1, v2
return v3
}
; run: %fma_f64x2([0x9.0 0x1.3b88ea148dd4ap14], [0x9.0 0x2.680916809121p6], [0x9.0 0x9.887218721837p1]) == [0x1.680000p6 0x1.7ba6ebee17417p21]
; Zeroes
; run: %fma_f64x2([0x0.0 0x0.0], [0x0.0 0x0.0], [0x0.0 -0x0.0]) == [0x0.0 0x0.0]
; run: %fma_f64x2([0x0.0 -0x0.0], [-0x0.0 0x0.0], [0x0.0 0x0.0]) == [0x0.0 0x0.0]
; Infinites
; run: %fma_f64x2([-Inf Inf], [-Inf -Inf], [0x0.0 0x0.0]) == [+Inf -Inf]
; run: %fma_f64x2([-Inf Inf], [Inf -Inf], [0x0.0 -Inf]) == [-Inf -Inf]
; run: %fma_f64x2([-Inf Inf], [Inf Inf], [-Inf Inf]) == [-Inf Inf]
; F64 Epsilon / Max / Min Positive
; run: %fma_f64x2([0x1.0p-52 0x0.0], [0x1.0p-52 0x0.0], [0x1.0p-52 0x1.0p-52]) == [0x1.0000000000001p-52 0x1.0p-52]
; run: %fma_f64x2([0x1.fffffffffffffp1023 0x0.0], [0x1.fffffffffffffp1023 0x0.0], [0x1.fffffffffffffp1023 0x1.fffffffffffffp1023]) == [+Inf 0x1.fffffffffffffp1023]
; run: %fma_f64x2([0x1.0p-1022 0x0.0], [0x1.0p-1022 0x0.0], [0x1.0p-1022 0x1.0p-1022]) == [0x1.0p-1022 0x1.0p-1022]
; F64 Subnormals
; run: %fma_f64x2([0x0.8p-1022 0x0.8p-1022], [0x0.8p-1022 0x0.8p-1022], [0x0.8p-1022 0x0.0]) == [0x0.8p-1022 0x0.0]
; run: %fma_f64x2([0x0.0 0x0.0000000000001p-1022], [0x0.0 0x0.0000000000001p-1022], [0x0.8p-1022 0x0.0000000000001p-1022]) == [0x0.8p-1022 0x0.0000000000001p-1022]
; run: %fma_f64x2([0x0.0000000000001p-1022 0x0.0], [0x0.0000000000001p-1022 0x0.0], [0x0.0 0x0.0000000000001p-1022]) == [0x0.0 0x0.0000000000001p-1022]
;; The IEEE754 Standard does not make a lot of guarantees about what
;; comes out of NaN producing operations, we just check if its a NaN
function %fma_is_nan_f64x2(f64x2, f64x2, f64x2) -> i8 {
block0(v0: f64x2, v1: f64x2, v2: f64x2):
v3 = fma v0, v1, v2
v4 = fcmp ne v3, v3
v5 = vall_true v4
return v5
}
; run: %fma_is_nan_f64x2([Inf -Inf], [-Inf Inf], [Inf Inf]) == 1
; run: %fma_is_nan_f64x2([-Inf +NaN], [-Inf 0x0.0], [-Inf 0x0.0]) == 1
; run: %fma_is_nan_f64x2([0x0.0 0x0.0], [+NaN 0x0.0], [0x0.0 +NaN]) == 1
; run: %fma_is_nan_f64x2([-NaN 0x0.0], [0x0.0 -NaN], [0x0.0 0x0.0]) == 1
; run: %fma_is_nan_f64x2([0x0.0 NaN], [0x0.0 NaN], [-NaN NaN]) == 1
function %fma_f32x4_splat1(f32x4, f32, f32x4) -> f32x4 {
block0(v0: f32x4, v1: f32, v2: f32x4):
v3 = splat.f32x4 v1
v4 = fma v0, v3, v2
return v4
}
; run: %fma_f32x4_splat1([0x9.0 0x9.0 0x9.0 0x9.0], 0x9.0, [0x9.0 0x9.0 0x9.0 0x9.0]) == [0x1.680000p6 0x1.680000p6 0x1.680000p6 0x1.680000p6]
; run: %fma_f32x4_splat1([0x1.0 0x2.0 0x3.0 0x4.0], 0x0.0, [0x5.0 0x6.0 0x7.0 0x8.0]) == [0x5.0 0x6.0 0x7.0 0x8.0]
function %fma_f32x4_splat2(f32, f32x4, f32x4) -> f32x4 {
block0(v0: f32, v1: f32x4, v2: f32x4):
v3 = splat.f32x4 v0
v4 = fma v3, v1, v2
return v4
}
; run: %fma_f32x4_splat2(0x9.0, [0x9.0 0x9.0 0x9.0 0x9.0], [0x9.0 0x9.0 0x9.0 0x9.0]) == [0x1.680000p6 0x1.680000p6 0x1.680000p6 0x1.680000p6]
; run: %fma_f32x4_splat2(0x0.0, [0x1.0 0x2.0 0x3.0 0x4.0], [0x5.0 0x6.0 0x7.0 0x8.0]) == [0x5.0 0x6.0 0x7.0 0x8.0]
function %fma_f64x2_splat1(f64x2, f64, f64x2) -> f64x2 {
block0(v0: f64x2, v1: f64, v2: f64x2):
v3 = splat.f64x2 v1
v4 = fma v0, v3, v2
return v4
}
; run: %fma_f64x2_splat1([0x9.0 0x9.0], 0x9.0, [0x9.0 0x9.0]) == [0x1.680000p6 0x1.680000p6]
; run: %fma_f64x2_splat1([0x1.0 0x2.0], 0x0.0, [0x5.0 0x6.0]) == [0x5.0 0x6.0]
function %fma_f64x2_splat2(f64, f64x2, f64x2) -> f64x2 {
block0(v0: f64, v1: f64x2, v2: f64x2):
v3 = splat.f64x2 v0
v4 = fma v3, v1, v2
return v4
}
; run: %fma_f64x2_splat2(0x9.0, [0x9.0 0x9.0], [0x9.0 0x9.0]) == [0x1.680000p6 0x1.680000p6]
; run: %fma_f64x2_splat2(0x0.0, [0x1.0 0x2.0], [0x5.0 0x6.0]) == [0x5.0 0x6.0]