cranelift: Implement nan canonicalization for vectors (#3146)

This fixes some fuzz bugs that came about enabling simd where nan canonicalization is performed on the fuzzers but cranelift would panic on these ops for vectors. This adds some custom codegen with `bitselect` to ensure any nan lanes are canonical-nan lanes in the canonicalized operations.
2021-08-05 13:44:16 -05:00
parent 9e142f8792
commit c6b095f9a3
3 changed files with 95 additions and 12 deletions
--- a/tests/all/wast.rs
+++ b/tests/all/wast.rs
@@ -40,6 +40,10 @@ fn run_wast(wast: &str, strategy: Strategy, pooling: bool) -> anyhow::Result<()>
        .strategy(strategy)?
        .cranelift_debug_verifier(true);

+    if wast.ends_with("canonicalize-nan.wast") {
+        cfg.cranelift_nan_canonicalization(true);
+    }
+
    // By default we'll allocate huge chunks (6gb) of the address space for each
    // linear memory. This is typically fine but when we emulate tests with QEMU
    // it turns out that it causes memory usage to balloon massively. Leave a
--- a/tests/misc_testsuite/simd/canonicalize-nan.wast
+++ b/tests/misc_testsuite/simd/canonicalize-nan.wast
@@ -0,0 +1,58 @@
+;; This *.wast test should be run with `cranelift_nan_canonicalization` set to
+;; `true` in `wast.rs`
+
+(module
+  (func (export "f32x4.floor") (param v128) (result v128)
+    local.get 0
+    f32x4.floor)
+  (func (export "f32x4.nearest") (param v128) (result v128)
+    local.get 0
+    f32x4.nearest)
+  (func (export "f32x4.sqrt") (param v128) (result v128)
+    local.get 0
+    f32x4.sqrt)
+  (func (export "f32x4.trunc") (param v128) (result v128)
+    local.get 0
+    f32x4.trunc)
+  (func (export "f32x4.ceil") (param v128) (result v128)
+    local.get 0
+    f32x4.ceil)
+
+  (func (export "f64x2.floor") (param v128) (result v128)
+    local.get 0
+    f64x2.floor)
+  (func (export "f64x2.nearest") (param v128) (result v128)
+    local.get 0
+    f64x2.nearest)
+  (func (export "f64x2.sqrt") (param v128) (result v128)
+    local.get 0
+    f64x2.sqrt)
+  (func (export "f64x2.trunc") (param v128) (result v128)
+    local.get 0
+    f64x2.trunc)
+  (func (export "f64x2.ceil") (param v128) (result v128)
+    local.get 0
+    f64x2.ceil)
+)
+
+(assert_return (invoke "f32x4.floor" (v128.const f32x4 1 -2.2 3.4 nan))
+               (v128.const f32x4 1 -3 3 nan))
+(assert_return (invoke "f32x4.nearest" (v128.const f32x4 1 -2.2 3.4 nan))
+               (v128.const f32x4 1 -2 3 nan))
+(assert_return (invoke "f32x4.sqrt" (v128.const f32x4 1 4 -1 nan))
+               (v128.const f32x4 1 2 nan nan))
+(assert_return (invoke "f32x4.trunc" (v128.const f32x4 1 -2.2 3.4 nan))
+               (v128.const f32x4 1 -2 3 nan))
+(assert_return (invoke "f32x4.ceil" (v128.const f32x4 1 -2.2 3.4 nan))
+               (v128.const f32x4 1 -2 4 nan))
+
+(assert_return (invoke "f64x2.floor" (v128.const f64x2 -2.2 nan))
+               (v128.const f64x2 -3 nan))
+(assert_return (invoke "f64x2.nearest" (v128.const f64x2 -2.2 nan))
+               (v128.const f64x2 -2 nan))
+(assert_return (invoke "f64x2.sqrt" (v128.const f64x2 4 nan))
+               (v128.const f64x2 2 nan))
+(assert_return (invoke "f64x2.trunc" (v128.const f64x2 3.4 nan))
+               (v128.const f64x2 3 nan))
+(assert_return (invoke "f64x2.ceil" (v128.const f64x2 3.4 nan))
+               (v128.const f64x2 4 nan))