cranelift: Add MinGW fma regression tests (#4517)
* cranelift: Add MinGW `fma` regression tests * cranelift: Fix FMA in interpreter * cranelift: Add separate `fma` test suite for the interpreter The interpreter can run `fma.clif` on most platforms, however on `x86_64-pc-windows-gnu` we use libm which has issues with some inputs. We should delete `fma-interpreter.clif` and enable the interpreter on the main `fma.clif` file once those are fixed.
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -595,6 +595,7 @@ dependencies = [
|
||||
"cranelift-entity",
|
||||
"cranelift-frontend",
|
||||
"cranelift-reader",
|
||||
"libm",
|
||||
"log",
|
||||
"smallvec",
|
||||
"thiserror",
|
||||
|
||||
@@ -770,12 +770,6 @@ impl Ieee32 {
|
||||
f32::from_bits(self.0)
|
||||
}
|
||||
|
||||
/// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a
|
||||
/// more accurate result than an unfused multiply-add.
|
||||
pub fn mul_add(&self, a: Self, b: Self) -> Self {
|
||||
Self::with_float(self.as_f32().mul_add(a.as_f32(), b.as_f32()))
|
||||
}
|
||||
|
||||
/// Returns the square root of self.
|
||||
pub fn sqrt(self) -> Self {
|
||||
Self::with_float(self.as_f32().sqrt())
|
||||
@@ -962,12 +956,6 @@ impl Ieee64 {
|
||||
f64::from_bits(self.0)
|
||||
}
|
||||
|
||||
/// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a
|
||||
/// more accurate result than an unfused multiply-add.
|
||||
pub fn mul_add(&self, a: Self, b: Self) -> Self {
|
||||
Self::with_float(self.as_f64().mul_add(a.as_f64(), b.as_f64()))
|
||||
}
|
||||
|
||||
/// Returns the square root of self.
|
||||
pub fn sqrt(self) -> Self {
|
||||
Self::with_float(self.as_f64().sqrt())
|
||||
|
||||
25
cranelift/filetests/filetests/runtests/fma-interpreter.clif
Normal file
25
cranelift/filetests/filetests/runtests/fma-interpreter.clif
Normal file
@@ -0,0 +1,25 @@
|
||||
test interpret
|
||||
|
||||
; The interpreter can run `fma.clif` on most platforms, however on `x86_64-pc-windows-gnu` we
|
||||
; use libm which has issues with some inputs. We should delete this file and enable the interpreter
|
||||
; on the main `fma.clif` file once those are fixed.
|
||||
|
||||
; See: https://github.com/bytecodealliance/wasmtime/pull/4517
|
||||
; See: https://github.com/rust-lang/libm/issues/263
|
||||
|
||||
function %fma_f32(f32, f32, f32) -> f32 {
|
||||
block0(v0: f32, v1: f32, v2: f32):
|
||||
v3 = fma v0, v1, v2
|
||||
return v3
|
||||
}
|
||||
; run: %fma_f32(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6
|
||||
; run: %fma_f32(0x83.0, 0x2.68091p6, 0x9.88721p1) == 0x1.3b88e6p14
|
||||
|
||||
|
||||
function %fma_f64(f64, f64, f64) -> f64 {
|
||||
block0(v0: f64, v1: f64, v2: f64):
|
||||
v3 = fma v0, v1, v2
|
||||
return v3
|
||||
}
|
||||
; run: %fma_f64(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6
|
||||
; run: %fma_f64(0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == 0x1.7ba6ebee17417p21
|
||||
@@ -1,4 +1,3 @@
|
||||
test interpret
|
||||
test run
|
||||
target aarch64
|
||||
target s390x
|
||||
@@ -38,6 +37,15 @@ block0(v0: f32, v1: f32, v2: f32):
|
||||
; run: %fma_f32(0x0.000002p-126, 0x0.000002p-126, 0x0.0) == 0x0.0
|
||||
; run: %fma_f32(0x0.0, 0x0.0, 0x0.000002p-126) == 0x0.000002p-126
|
||||
|
||||
; Regression tests for x86_64-pc-windows-gnu
|
||||
; See: https://github.com/bytecodealliance/wasmtime/issues/4512
|
||||
; run: %fma_f32(0x1.0p100, 0x1.0p100, -Inf) == -Inf
|
||||
; run: %fma_f32(0x1.fffffep23, 0x1.000004p28, 0x1.fcp5) == 0x1.000002p52
|
||||
; run: %fma_f32(0x1.84ae3p125, 0x1.6p-141, 0x1.0p-149) == 0x1.0b37c2p-15
|
||||
; run: %fma_f32(0x1.00001p50, 0x1.1p50, 0x1.0p-149) == 0x1.100012p100
|
||||
; run: %fma_f32(0x1.000002p50, 0x1.8p50, -0x1.0p-149) == 0x1.800002p100
|
||||
; run: %fma_f32(0x1.83bd78p4, -0x1.cp118, -0x1.344108p-2) == -0x1.5345cap123
|
||||
|
||||
|
||||
|
||||
;; The IEEE754 Standard does not make a lot of guarantees about what
|
||||
@@ -98,6 +106,14 @@ block0(v0: f64, v1: f64, v2: f64):
|
||||
; run: %fma_f64(0x0.0, 0x0.0, 0x0.0000000000001p-1022) == 0x0.0000000000001p-1022
|
||||
|
||||
|
||||
; Regression tests for x86_64-pc-windows-gnu
|
||||
; See: https://github.com/bytecodealliance/wasmtime/issues/4512
|
||||
; run: %fma_f64(0x1.0p1000, 0x1.0p1000, -Inf) == -Inf
|
||||
; run: %fma_f64(-0x1.4f8ac19291ffap1023, 0x1.39c33c8d39b7p-1025, 0x1.ee11f685e2e12p-1) == 0x1.2071b0283f156p-1
|
||||
; run: %fma_f64(0x1.0000000000008p500, 0x1.1p500, 0x1.0p-1074) == 0x1.1000000000009p1000
|
||||
; run: %fma_f64(0x1.0000000000001p500, 0x1.8p500, -0x1.0p-1074) == 0x1.8000000000001p1000
|
||||
; run: %fma_f64(0x0.ffffffep513, 0x1.0000002p511, -0x1.0p-1074) == 0x1.fffffffffffffp1023
|
||||
|
||||
;; The IEEE754 Standard does not make a lot of guarantees about what
|
||||
;; comes out of NaN producing operations, we just check if its a NaN
|
||||
function %fma_is_nan_f64(f64, f64, f64) -> i32 {
|
||||
|
||||
@@ -17,6 +17,9 @@ log = { version = "0.4.8", default-features = false }
|
||||
smallvec = "1.6.1"
|
||||
thiserror = "1.0.15"
|
||||
|
||||
[target.x86_64-pc-windows-gnu.dependencies]
|
||||
libm = "0.2"
|
||||
|
||||
[dev-dependencies]
|
||||
cranelift-frontend = { path = "../frontend", version = "0.87.0" }
|
||||
cranelift-reader = { path = "../reader", version = "0.87.0" }
|
||||
|
||||
@@ -555,10 +555,32 @@ impl Value for DataValue {
|
||||
fn fma(self, b: Self, c: Self) -> ValueResult<Self> {
|
||||
match (self, b, c) {
|
||||
(DataValue::F32(a), DataValue::F32(b), DataValue::F32(c)) => {
|
||||
Ok(DataValue::F32(a.mul_add(b, c)))
|
||||
// The `fma` function for `x86_64-pc-windows-gnu` is incorrect. Use `libm`'s instead.
|
||||
// See: https://github.com/bytecodealliance/wasmtime/issues/4512
|
||||
#[cfg(all(target_arch = "x86_64", target_os = "windows", target_env = "gnu"))]
|
||||
let res = libm::fmaf(a.as_f32(), b.as_f32(), c.as_f32());
|
||||
|
||||
#[cfg(not(all(
|
||||
target_arch = "x86_64",
|
||||
target_os = "windows",
|
||||
target_env = "gnu"
|
||||
)))]
|
||||
let res = a.as_f32().mul_add(b.as_f32(), c.as_f32());
|
||||
|
||||
Ok(DataValue::F32(res.into()))
|
||||
}
|
||||
(DataValue::F64(a), DataValue::F64(b), DataValue::F64(c)) => {
|
||||
Ok(DataValue::F64(a.mul_add(b, c)))
|
||||
#[cfg(all(target_arch = "x86_64", target_os = "windows", target_env = "gnu"))]
|
||||
let res = libm::fma(a.as_f64(), b.as_f64(), c.as_f64());
|
||||
|
||||
#[cfg(not(all(
|
||||
target_arch = "x86_64",
|
||||
target_os = "windows",
|
||||
target_env = "gnu"
|
||||
)))]
|
||||
let res = a.as_f64().mul_add(b.as_f64(), c.as_f64());
|
||||
|
||||
Ok(DataValue::F64(res.into()))
|
||||
}
|
||||
(a, _b, _c) => Err(ValueError::InvalidType(ValueTypeClass::Float, a.ty())),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user