cranelift: Fix fdemote on the interpreter (#5158)

* cranelift: Cleanup `fdemote`/`fpromote` tests * cranelift: Fix `fdemote`/`fpromote` instruction docs The verifier fails if the input and output types are the same for these instructions * cranelift: Fix `fdemote`/`fpromote` in the interpreter * fuzzgen: Add `fdemote`/`fpromote`
2022-11-15 22:22:00 +00:00
parent a007e02bd2
commit a793648eb2
8 changed files with 249 additions and 100 deletions
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -3482,8 +3482,7 @@ pub(crate) fn define(
        - `f32` and `f64`. This may change in the future.
        The result type must have the same number of vector lanes as the input,
-        and the result lanes must not have fewer bits than the input lanes. If
+        and the result lanes must not have fewer bits than the input lanes.
        the input and output types are the same, this is a no-op.
        "#,
            &formats.unary,
        )
@@ -3504,8 +3503,7 @@ pub(crate) fn define(
        - `f32` and `f64`. This may change in the future.
        The result type must have the same number of vector lanes as the input,
-        and the result lanes must not have more bits than the input lanes. If
+        and the result lanes must not have more bits than the input lanes.
        the input and output types are the same, this is a no-op.
        "#,
            &formats.unary,
        )
--- a/cranelift/filetests/filetests/runtests/conversions-load-store.clif
+++ b/cranelift/filetests/filetests/runtests/conversions-load-store.clif
@@ -1,87 +0,0 @@
 test run
 target x86_64
 target s390x
 target aarch64
 ;; target riscv64 vector type not supported.
 function %fpromote_f32_f64(i64 vmctx, i64, f32) -> f64 {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
    heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64
 block0(v0: i64, v1: i64, v2: f32):
    v3 = heap_addr.i64 heap0, v1, 0, 4
    store.f32 v2, v3
    v4 = load.f32 v3
    v5 = fpromote.f64 v4
    return v5
 }
 ; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8
 ; run: %fpromote_f32_f64(0, 0x0.0) == 0x0.0
 ; run: %fpromote_f32_f64(1, 0x0.1) == 0x0.1
 ; run: %fpromote_f32_f64(2, 0x0.2) == 0x0.2
 ; run: %fpromote_f32_f64(3, 0x3.2) == 0x3.2
 ; run: %fpromote_f32_f64(0xc, 0x3.2) == 0x3.2
 function %fdemote_test(i64 vmctx, i64, f64) -> f32 {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
    heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64
 block0(v0: i64, v1: i64, v2: f64):
    v3 = heap_addr.i64 heap0, v1, 0, 8
    store.f64 v2, v3
    v4 = load.f64 v3
    v5 = fdemote.f32 v4
    return v5
 }
 ; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8
 ; run: %fdemote_test(0, 0x0.0) == 0x0.0
 ; run: %fdemote_test(1, 0x0.1) == 0x0.1
 ; run: %fdemote_test(2, 0x0.2) == 0x0.2
 ; run: %fdemote_test(3, 0x3.2) == 0x3.2
 ; run: %fdemote_test(0x8, 0x3.2) == 0x3.2
 function %fvdemote_test(i64 vmctx, i64, f64x2) -> f32x4 {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
    heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64
 block0(v0: i64, v1: i64, v2: f64x2):
    v3 = heap_addr.i64 heap0, v1, 0, 16
    store.f64x2 v2, v3
    v4 = load.f64x2 v3
    v5 = fvdemote v4
    return v5
 }
 ; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8
 ; run: %fvdemote_test(0, [0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
 ; run: %fvdemote_test(1, [0x0.1 0x0.2]) == [0x0.1 0x0.2 0x0.0 0x0.0]
 ; run: %fvdemote_test(2, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
 ; run: %fvdemote_test(8, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
 ; run: %fvdemote_test(16, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
 function %fvpromote_low_test(i64 vmctx, i64, f32x4) -> f64x2 {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
    heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64
 block0(v0: i64, v1: i64, v2: f32x4):
    v3 = heap_addr.i64 heap0, v1, 0, 16
    store.f32x4 v2, v3
    v4 = load.f32x4 v3
    v5 = fvpromote_low v4
    return v5
 }
 ; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8
 ; run: %fvpromote_low_test(0, [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0]
 ; run: %fvpromote_low_test(1, [0x0.1 0x0.2 0x0.0 0x0.0]) == [0x0.1 0x0.2]
 ; run: %fvpromote_low_test(2, [0x2.1 0x1.2 0x0.0 0x0.0]) == [0x2.1 0x1.2]
 ; run: %fvpromote_low_test(5, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
 ; run: %fvpromote_low_test(16, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
--- a/cranelift/filetests/filetests/runtests/fdemote.clif
+++ b/cranelift/filetests/filetests/runtests/fdemote.clif
@@ -0,0 +1,91 @@
 test interpret
 test run
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %fdemote(f64) -> f32 {
 block0(v0: f64):
    v1 = fdemote.f32 v0
    return v1
 }
 ; run: %fdemote(0x0.0) == 0x0.0
 ; run: %fdemote(-0x0.0) == -0x0.0
 ; run: %fdemote(0x0.1) == 0x0.1
 ; run: %fdemote(0x0.2) == 0x0.2
 ; run: %fdemote(0x0.5) == 0x0.5
 ; run: %fdemote(-0x0.5) == -0x0.5
 ; run: %fdemote(0x3.2) == 0x3.2
 ; run: %fdemote(0x9.0) == 0x9.0
 ; run: %fdemote(-0x9.0) == -0x9.0
 ; run: %fdemote(0x1.1p10) == 0x1.100000p10
 ; run: %fdemote(-0x1.1p10) == -0x1.100000p10
 ; run: %fdemote(0x1.c555555555556p10) == 0x1.c55556p10
 ; run: %fdemote(-0x1.999999999999ap-2) == -0x1.99999ap-2
 ; run: %fdemote(0x1.c3c3c3c3c3c3cp-1) == 0x1.c3c3c4p-1
 ; run: %fdemote(0x1.c924924924925p-1) == 0x1.c92492p-1
 ; run: %fdemote(0x1.4cccccccccccdp0) == 0x1.4cccccp0
 ;; Inf
 ; run: %fdemote(Inf) == Inf
 ; run: %fdemote(-Inf) == -Inf
 ;; Epsilon / Max / Min Positive
 ; run: %fdemote(0x1.0000000000000p-52) == 0x1.0000000000000p-52
 ; run: %fdemote(-0x1.0000000000000p-52) == -0x1.0000000000000p-52
 ; run: %fdemote(0x1.fffffffffffffp1023) == +Inf
 ; run: %fdemote(-0x1.fffffffffffffp1023) == -Inf
 ; run: %fdemote(0x1.0000000000000p-1022) == 0x0.0
 ; run: %fdemote(-0x1.0000000000000p-1022) == -0x0.0
 ;; Subnormals
 ; run: %fdemote(0x0.8000000000000p-1022) == 0x0.0
 ; run: %fdemote(-0x0.8000000000000p-1022) == -0x0.0
 ; run: %fdemote(0x0.0000000000001p-1022) == 0x0.0
 ; run: %fdemote(-0x0.0000000000001p-1022) == -0x0.0
 ;; NaN's
 ; For NaN's this operation is specified as producing a value that is a NaN
 function %fdemote_is_nan(f64) -> i8 {
 block0(v0: f64):
    v1 = fdemote.f32 v0
    v2 = fcmp ne v1, v1
    return v2
 }
 ; run: %fdemote_is_nan(+NaN) == 1
 ; run: %fdemote_is_nan(-NaN) == 1
 ; run: %fdemote_is_nan(+NaN:0x0) == 1
 ; run: %fdemote_is_nan(+NaN:0x1) == 1
 ; run: %fdemote_is_nan(+NaN:0x4000000000001) == 1
 ; run: %fdemote_is_nan(-NaN:0x0) == 1
 ; run: %fdemote_is_nan(-NaN:0x1) == 1
 ; run: %fdemote_is_nan(-NaN:0x4000000000001) == 1
 ; run: %fdemote_is_nan(+sNaN:0x1) == 1
 ; run: %fdemote_is_nan(-sNaN:0x1) == 1
 ; run: %fdemote_is_nan(+sNaN:0x4000000000001) == 1
 ; run: %fdemote_is_nan(-sNaN:0x4000000000001) == 1
 ;; Tests a fdemote+load combo which some backends may optimize
 function %fdemote_load(i64 vmctx, i64, f64) -> f32 {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
    heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64
 block0(v0: i64, v1: i64, v2: f64):
    v3 = heap_addr.i64 heap0, v1, 0, 8
    store.f64 v2, v3
    v4 = load.f64 v3
    v5 = fdemote.f32 v4
    return v5
 }
 ; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8
 ; run: %fdemote_load(0, 0x0.0) == 0x0.0
 ; run: %fdemote_load(1, 0x0.1) == 0x0.1
 ; run: %fdemote_load(2, 0x0.2) == 0x0.2
 ; run: %fdemote_load(3, 0x3.2) == 0x3.2
 ; run: %fdemote_load(0x8, 0x3.2) == 0x3.2
--- a/cranelift/filetests/filetests/runtests/fpromote.clif
+++ b/cranelift/filetests/filetests/runtests/fpromote.clif
@@ -0,0 +1,100 @@
 test interpret
 test run
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %fpromote(f32) -> f64 {
 block0(v0: f32):
    v1 = fpromote.f64 v0
    return v1
 }
 ; run: %fpromote(0x0.0) == 0x0.0
 ; run: %fpromote(-0x0.0) == -0x0.0
 ; run: %fpromote(0x0.1) == 0x0.1
 ; run: %fpromote(0x0.2) == 0x0.2
 ; run: %fpromote(0x3.2) == 0x3.2
 ; run: %fpromote(0x1.5) == 0x1.5
 ; run: %fpromote(0x1.1p10) == 0x1.1p10
 ; run: %fpromote(0x1.4cccccp0) == 0x1.4cccccp0
 ; run: %fpromote(0x1.b33334p0) == 0x1.b33334p0
 ; run: %fpromote(-0x1.b33334p0) == -0x1.b33334p0
 ; run: %fpromote(0x1.333334p-1) == 0x1.333334p-1
 ; run: %fpromote(0x0.5) == 0x0.5
 ; run: %fpromote(-0x0.5) == -0x0.5
 ; run: %fpromote(0x1.5) == 0x1.5
 ; run: %fpromote(-0x1.5) == -0x1.5
 ; run: %fpromote(0x1.1p10) == 0x1.1p10
 ; run: %fpromote(-0x1.1p10) == -0x1.1p10
 ; run: %fpromote(0x1.99999ap-2) == 0x1.99999ap-2
 ; run: %fpromote(-0x1.99999ap-2) == -0x1.99999ap-2
 ; run: %fpromote(0x1.8p0) == 0x1.8p0
 ; run: %fpromote(-0x1.8p0) == -0x1.8p0
 ; run: %fpromote(0x1.4p1) == 0x1.4p1
 ; run: %fpromote(-0x1.4p1) == -0x1.4p1
 ;; Inf
 ; run: %fpromote(Inf) == Inf
 ; run: %fpromote(-Inf) == -Inf
 ;; Epsilon / Max / Min Positive
 ; run: %fpromote(0x1.000000p-23) == 0x1.000000p-23
 ; run: %fpromote(-0x1.000000p-23) == -0x1.000000p-23
 ; run: %fpromote(0x1.fffffep127) == 0x1.fffffep127
 ; run: %fpromote(-0x1.fffffep127) == -0x1.fffffep127
 ; run: %fpromote(0x1.000000p-126) == 0x1.000000p-126
 ; run: %fpromote(-0x1.000000p-126) == -0x1.000000p-126
 ;; Subnormals
 ; run: %fpromote(0x0.800000p-126) == 0x0.800000p-126
 ; run: %fpromote(-0x0.800000p-126) == -0x0.800000p-126
 ; run: %fpromote(0x0.000002p-126) == 0x0.000002p-126
 ; run: %fpromote(-0x0.000002p-126) == -0x0.000002p-126
 ;; NaN's
 ; For NaN's this operation is specified as producing a value that is a NaN
 function %fpromote_is_nan(f32) -> i8 {
 block0(v0: f32):
    v1 = fpromote.f64 v0
    v2 = fcmp ne v1, v1
    return v2
 }
 ; run: %fpromote_is_nan(+NaN) == 1
 ; run: %fpromote_is_nan(-NaN) == 1
 ; run: %fpromote_is_nan(+NaN:0x0) == 1
 ; run: %fpromote_is_nan(+NaN:0x1) == 1
 ; run: %fpromote_is_nan(+NaN:0x300001) == 1
 ; run: %fpromote_is_nan(-NaN:0x0) == 1
 ; run: %fpromote_is_nan(-NaN:0x1) == 1
 ; run: %fpromote_is_nan(-NaN:0x300001) == 1
 ; run: %fpromote_is_nan(+sNaN:0x1) == 1
 ; run: %fpromote_is_nan(-sNaN:0x1) == 1
 ; run: %fpromote_is_nan(+sNaN:0x200001) == 1
 ; run: %fpromote_is_nan(-sNaN:0x200001) == 1
 ;; Tests a fpromote+load combo which some backends may optimize
 function %fpromote_load(i64 vmctx, i64, f32) -> f64 {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
    heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64
 block0(v0: i64, v1: i64, v2: f32):
    v3 = heap_addr.i64 heap0, v1, 0, 4
    store.f32 v2, v3
    v4 = load.f32 v3
    v5 = fpromote.f64 v4
    return v5
 }
 ; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8
 ; run: %fpromote_load(0, 0x0.0) == 0x0.0
 ; run: %fpromote_load(1, 0x0.1) == 0x0.1
 ; run: %fpromote_load(2, 0x0.2) == 0x0.2
 ; run: %fpromote_load(3, 0x3.2) == 0x3.2
 ; run: %fpromote_load(0xC, 0x3.2) == 0x3.2
--- a/cranelift/filetests/filetests/runtests/simd-fvdemote.clif
+++ b/cranelift/filetests/filetests/runtests/simd-fvdemote.clif
@@ -0,0 +1,25 @@
 test interpret
 test run
 target x86_64
 target s390x
 target aarch64
 function %fvdemote_test(i64 vmctx, i64, f64x2) -> f32x4 {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
    heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64
 block0(v0: i64, v1: i64, v2: f64x2):
    v3 = heap_addr.i64 heap0, v1, 0, 16
    store.f64x2 v2, v3
    v4 = load.f64x2 v3
    v5 = fvdemote v4
    return v5
 }
 ; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8
 ; run: %fvdemote_test(0, [0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
 ; run: %fvdemote_test(1, [0x0.1 0x0.2]) == [0x0.1 0x0.2 0x0.0 0x0.0]
 ; run: %fvdemote_test(2, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
 ; run: %fvdemote_test(8, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
 ; run: %fvdemote_test(16, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
--- a/cranelift/filetests/filetests/runtests/simd-fvpromote_low.clif
+++ b/cranelift/filetests/filetests/runtests/simd-fvpromote_low.clif
@@ -0,0 +1,26 @@
 test interpret
 test run
 target x86_64
 target s390x
 target aarch64
 function %fvpromote_low_test(i64 vmctx, i64, f32x4) -> f64x2 {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
    heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64
 block0(v0: i64, v1: i64, v2: f32x4):
    v3 = heap_addr.i64 heap0, v1, 0, 16
    store.f32x4 v2, v3
    v4 = load.f32x4 v3
    v5 = fvpromote_low v4
    return v5
 }
 ; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8
 ; run: %fvpromote_low_test(0, [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0]
 ; run: %fvpromote_low_test(1, [0x0.1 0x0.2 0x0.0 0x0.0]) == [0x0.1 0x0.2]
 ; run: %fvpromote_low_test(2, [0x2.1 0x1.2 0x0.0 0x0.0]) == [0x2.1 0x1.2]
 ; run: %fvpromote_low_test(5, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
 ; run: %fvpromote_low_test(16, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
--- a/cranelift/fuzzgen/src/function_generator.rs
+++ b/cranelift/fuzzgen/src/function_generator.rs
@@ -831,6 +831,10 @@ const OPCODE_SIGNATURES: &'static [(
    // Nearest
    (Opcode::Nearest, &[F32], &[F32], insert_opcode),
    (Opcode::Nearest, &[F64], &[F64], insert_opcode),
    // Fpromote
    (Opcode::Fpromote, &[F32], &[F64], insert_opcode),
    // Fdemote
    (Opcode::Fdemote, &[F64], &[F32], insert_opcode),
    // FcvtToUint
    // TODO: Some ops disabled:
    //   x64: https://github.com/bytecodealliance/wasmtime/issues/4897
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@@ -445,15 +445,7 @@ impl Value for DataValue {
                _ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind),
            },
            ValueConversionKind::RoundNearestEven(ty) => match (self, ty) {
-                (DataValue::F64(n), types::F32) => {
+                (DataValue::F64(n), types::F32) => DataValue::F32(Ieee32::from(n.as_f64() as f32)),
                    let mut x = n.as_f64() as f32;
                    // Rust rounds away from zero, so if we've rounded up we
                    // should replace this with a proper rounding tied to even.
                    if (x as f64) != n.as_f64() {
                        x = n.round_ties_even().as_f64() as f32;
                    }
                    DataValue::F32(x.into())
                }
                (s, _) => unimplemented!("conversion: {} -> {:?}", s.ty(), kind),
            },
            ValueConversionKind::ToBoolean => match self.ty() {