diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 4f74ac18c9..f6aad71b72 100755 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -3482,8 +3482,7 @@ pub(crate) fn define( - `f32` and `f64`. This may change in the future. The result type must have the same number of vector lanes as the input, - and the result lanes must not have fewer bits than the input lanes. If - the input and output types are the same, this is a no-op. + and the result lanes must not have fewer bits than the input lanes. "#, &formats.unary, ) @@ -3504,8 +3503,7 @@ pub(crate) fn define( - `f32` and `f64`. This may change in the future. The result type must have the same number of vector lanes as the input, - and the result lanes must not have more bits than the input lanes. If - the input and output types are the same, this is a no-op. + and the result lanes must not have more bits than the input lanes. "#, &formats.unary, ) diff --git a/cranelift/filetests/filetests/runtests/conversions-load-store.clif b/cranelift/filetests/filetests/runtests/conversions-load-store.clif deleted file mode 100644 index 78abe5ba67..0000000000 --- a/cranelift/filetests/filetests/runtests/conversions-load-store.clif +++ /dev/null @@ -1,87 +0,0 @@ -test run - -target x86_64 -target s390x -target aarch64 -;; target riscv64 vector type not supported. - -function %fpromote_f32_f64(i64 vmctx, i64, f32) -> f64 { - gv0 = vmctx - gv1 = load.i64 notrap aligned gv0+0 - heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64 - -block0(v0: i64, v1: i64, v2: f32): - v3 = heap_addr.i64 heap0, v1, 0, 4 - store.f32 v2, v3 - v4 = load.f32 v3 - v5 = fpromote.f64 v4 - return v5 -} - -; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8 -; run: %fpromote_f32_f64(0, 0x0.0) == 0x0.0 -; run: %fpromote_f32_f64(1, 0x0.1) == 0x0.1 -; run: %fpromote_f32_f64(2, 0x0.2) == 0x0.2 -; run: %fpromote_f32_f64(3, 0x3.2) == 0x3.2 -; run: %fpromote_f32_f64(0xc, 0x3.2) == 0x3.2 - -function %fdemote_test(i64 vmctx, i64, f64) -> f32 { - gv0 = vmctx - gv1 = load.i64 notrap aligned gv0+0 - heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64 - -block0(v0: i64, v1: i64, v2: f64): - v3 = heap_addr.i64 heap0, v1, 0, 8 - store.f64 v2, v3 - v4 = load.f64 v3 - v5 = fdemote.f32 v4 - return v5 -} - -; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8 -; run: %fdemote_test(0, 0x0.0) == 0x0.0 -; run: %fdemote_test(1, 0x0.1) == 0x0.1 -; run: %fdemote_test(2, 0x0.2) == 0x0.2 -; run: %fdemote_test(3, 0x3.2) == 0x3.2 -; run: %fdemote_test(0x8, 0x3.2) == 0x3.2 - -function %fvdemote_test(i64 vmctx, i64, f64x2) -> f32x4 { - gv0 = vmctx - gv1 = load.i64 notrap aligned gv0+0 - heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64 - -block0(v0: i64, v1: i64, v2: f64x2): - v3 = heap_addr.i64 heap0, v1, 0, 16 - store.f64x2 v2, v3 - v4 = load.f64x2 v3 - v5 = fvdemote v4 - return v5 -} - -; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8 -; run: %fvdemote_test(0, [0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0] -; run: %fvdemote_test(1, [0x0.1 0x0.2]) == [0x0.1 0x0.2 0x0.0 0x0.0] -; run: %fvdemote_test(2, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] -; run: %fvdemote_test(8, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] -; run: %fvdemote_test(16, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] - - -function %fvpromote_low_test(i64 vmctx, i64, f32x4) -> f64x2 { - gv0 = vmctx - gv1 = load.i64 notrap aligned gv0+0 - heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64 - -block0(v0: i64, v1: i64, v2: f32x4): - v3 = heap_addr.i64 heap0, v1, 0, 16 - store.f32x4 v2, v3 - v4 = load.f32x4 v3 - v5 = fvpromote_low v4 - return v5 -} - -; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8 -; run: %fvpromote_low_test(0, [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0] -; run: %fvpromote_low_test(1, [0x0.1 0x0.2 0x0.0 0x0.0]) == [0x0.1 0x0.2] -; run: %fvpromote_low_test(2, [0x2.1 0x1.2 0x0.0 0x0.0]) == [0x2.1 0x1.2] -; run: %fvpromote_low_test(5, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0] -; run: %fvpromote_low_test(16, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0] diff --git a/cranelift/filetests/filetests/runtests/fdemote.clif b/cranelift/filetests/filetests/runtests/fdemote.clif new file mode 100644 index 0000000000..7923cca5ed --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fdemote.clif @@ -0,0 +1,91 @@ +test interpret +test run +target x86_64 +target s390x +target aarch64 +target riscv64 + + +function %fdemote(f64) -> f32 { +block0(v0: f64): + v1 = fdemote.f32 v0 + return v1 +} +; run: %fdemote(0x0.0) == 0x0.0 +; run: %fdemote(-0x0.0) == -0x0.0 +; run: %fdemote(0x0.1) == 0x0.1 +; run: %fdemote(0x0.2) == 0x0.2 +; run: %fdemote(0x0.5) == 0x0.5 +; run: %fdemote(-0x0.5) == -0x0.5 +; run: %fdemote(0x3.2) == 0x3.2 +; run: %fdemote(0x9.0) == 0x9.0 +; run: %fdemote(-0x9.0) == -0x9.0 +; run: %fdemote(0x1.1p10) == 0x1.100000p10 +; run: %fdemote(-0x1.1p10) == -0x1.100000p10 +; run: %fdemote(0x1.c555555555556p10) == 0x1.c55556p10 +; run: %fdemote(-0x1.999999999999ap-2) == -0x1.99999ap-2 +; run: %fdemote(0x1.c3c3c3c3c3c3cp-1) == 0x1.c3c3c4p-1 +; run: %fdemote(0x1.c924924924925p-1) == 0x1.c92492p-1 +; run: %fdemote(0x1.4cccccccccccdp0) == 0x1.4cccccp0 + + +;; Inf +; run: %fdemote(Inf) == Inf +; run: %fdemote(-Inf) == -Inf + +;; Epsilon / Max / Min Positive +; run: %fdemote(0x1.0000000000000p-52) == 0x1.0000000000000p-52 +; run: %fdemote(-0x1.0000000000000p-52) == -0x1.0000000000000p-52 +; run: %fdemote(0x1.fffffffffffffp1023) == +Inf +; run: %fdemote(-0x1.fffffffffffffp1023) == -Inf +; run: %fdemote(0x1.0000000000000p-1022) == 0x0.0 +; run: %fdemote(-0x1.0000000000000p-1022) == -0x0.0 + +;; Subnormals +; run: %fdemote(0x0.8000000000000p-1022) == 0x0.0 +; run: %fdemote(-0x0.8000000000000p-1022) == -0x0.0 +; run: %fdemote(0x0.0000000000001p-1022) == 0x0.0 +; run: %fdemote(-0x0.0000000000001p-1022) == -0x0.0 + + +;; NaN's +; For NaN's this operation is specified as producing a value that is a NaN +function %fdemote_is_nan(f64) -> i8 { +block0(v0: f64): + v1 = fdemote.f32 v0 + v2 = fcmp ne v1, v1 + return v2 +} +; run: %fdemote_is_nan(+NaN) == 1 +; run: %fdemote_is_nan(-NaN) == 1 +; run: %fdemote_is_nan(+NaN:0x0) == 1 +; run: %fdemote_is_nan(+NaN:0x1) == 1 +; run: %fdemote_is_nan(+NaN:0x4000000000001) == 1 +; run: %fdemote_is_nan(-NaN:0x0) == 1 +; run: %fdemote_is_nan(-NaN:0x1) == 1 +; run: %fdemote_is_nan(-NaN:0x4000000000001) == 1 +; run: %fdemote_is_nan(+sNaN:0x1) == 1 +; run: %fdemote_is_nan(-sNaN:0x1) == 1 +; run: %fdemote_is_nan(+sNaN:0x4000000000001) == 1 +; run: %fdemote_is_nan(-sNaN:0x4000000000001) == 1 + + +;; Tests a fdemote+load combo which some backends may optimize +function %fdemote_load(i64 vmctx, i64, f64) -> f32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+0 + heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64 + +block0(v0: i64, v1: i64, v2: f64): + v3 = heap_addr.i64 heap0, v1, 0, 8 + store.f64 v2, v3 + v4 = load.f64 v3 + v5 = fdemote.f32 v4 + return v5 +} +; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8 +; run: %fdemote_load(0, 0x0.0) == 0x0.0 +; run: %fdemote_load(1, 0x0.1) == 0x0.1 +; run: %fdemote_load(2, 0x0.2) == 0x0.2 +; run: %fdemote_load(3, 0x3.2) == 0x3.2 +; run: %fdemote_load(0x8, 0x3.2) == 0x3.2 diff --git a/cranelift/filetests/filetests/runtests/fpromote.clif b/cranelift/filetests/filetests/runtests/fpromote.clif new file mode 100644 index 0000000000..7a6170e1c8 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fpromote.clif @@ -0,0 +1,100 @@ +test interpret +test run +target x86_64 +target s390x +target aarch64 +target riscv64 + + +function %fpromote(f32) -> f64 { +block0(v0: f32): + v1 = fpromote.f64 v0 + return v1 +} + +; run: %fpromote(0x0.0) == 0x0.0 +; run: %fpromote(-0x0.0) == -0x0.0 +; run: %fpromote(0x0.1) == 0x0.1 +; run: %fpromote(0x0.2) == 0x0.2 +; run: %fpromote(0x3.2) == 0x3.2 +; run: %fpromote(0x1.5) == 0x1.5 +; run: %fpromote(0x1.1p10) == 0x1.1p10 +; run: %fpromote(0x1.4cccccp0) == 0x1.4cccccp0 +; run: %fpromote(0x1.b33334p0) == 0x1.b33334p0 +; run: %fpromote(-0x1.b33334p0) == -0x1.b33334p0 +; run: %fpromote(0x1.333334p-1) == 0x1.333334p-1 +; run: %fpromote(0x0.5) == 0x0.5 +; run: %fpromote(-0x0.5) == -0x0.5 +; run: %fpromote(0x1.5) == 0x1.5 +; run: %fpromote(-0x1.5) == -0x1.5 +; run: %fpromote(0x1.1p10) == 0x1.1p10 +; run: %fpromote(-0x1.1p10) == -0x1.1p10 +; run: %fpromote(0x1.99999ap-2) == 0x1.99999ap-2 +; run: %fpromote(-0x1.99999ap-2) == -0x1.99999ap-2 +; run: %fpromote(0x1.8p0) == 0x1.8p0 +; run: %fpromote(-0x1.8p0) == -0x1.8p0 +; run: %fpromote(0x1.4p1) == 0x1.4p1 +; run: %fpromote(-0x1.4p1) == -0x1.4p1 + + +;; Inf +; run: %fpromote(Inf) == Inf +; run: %fpromote(-Inf) == -Inf + +;; Epsilon / Max / Min Positive +; run: %fpromote(0x1.000000p-23) == 0x1.000000p-23 +; run: %fpromote(-0x1.000000p-23) == -0x1.000000p-23 +; run: %fpromote(0x1.fffffep127) == 0x1.fffffep127 +; run: %fpromote(-0x1.fffffep127) == -0x1.fffffep127 +; run: %fpromote(0x1.000000p-126) == 0x1.000000p-126 +; run: %fpromote(-0x1.000000p-126) == -0x1.000000p-126 + +;; Subnormals +; run: %fpromote(0x0.800000p-126) == 0x0.800000p-126 +; run: %fpromote(-0x0.800000p-126) == -0x0.800000p-126 +; run: %fpromote(0x0.000002p-126) == 0x0.000002p-126 +; run: %fpromote(-0x0.000002p-126) == -0x0.000002p-126 + + +;; NaN's +; For NaN's this operation is specified as producing a value that is a NaN +function %fpromote_is_nan(f32) -> i8 { +block0(v0: f32): + v1 = fpromote.f64 v0 + v2 = fcmp ne v1, v1 + return v2 +} +; run: %fpromote_is_nan(+NaN) == 1 +; run: %fpromote_is_nan(-NaN) == 1 +; run: %fpromote_is_nan(+NaN:0x0) == 1 +; run: %fpromote_is_nan(+NaN:0x1) == 1 +; run: %fpromote_is_nan(+NaN:0x300001) == 1 +; run: %fpromote_is_nan(-NaN:0x0) == 1 +; run: %fpromote_is_nan(-NaN:0x1) == 1 +; run: %fpromote_is_nan(-NaN:0x300001) == 1 +; run: %fpromote_is_nan(+sNaN:0x1) == 1 +; run: %fpromote_is_nan(-sNaN:0x1) == 1 +; run: %fpromote_is_nan(+sNaN:0x200001) == 1 +; run: %fpromote_is_nan(-sNaN:0x200001) == 1 + + +;; Tests a fpromote+load combo which some backends may optimize +function %fpromote_load(i64 vmctx, i64, f32) -> f64 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+0 + heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64 + +block0(v0: i64, v1: i64, v2: f32): + v3 = heap_addr.i64 heap0, v1, 0, 4 + store.f32 v2, v3 + v4 = load.f32 v3 + v5 = fpromote.f64 v4 + return v5 +} + +; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8 +; run: %fpromote_load(0, 0x0.0) == 0x0.0 +; run: %fpromote_load(1, 0x0.1) == 0x0.1 +; run: %fpromote_load(2, 0x0.2) == 0x0.2 +; run: %fpromote_load(3, 0x3.2) == 0x3.2 +; run: %fpromote_load(0xC, 0x3.2) == 0x3.2 diff --git a/cranelift/filetests/filetests/runtests/simd-fvdemote.clif b/cranelift/filetests/filetests/runtests/simd-fvdemote.clif new file mode 100644 index 0000000000..558a346161 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fvdemote.clif @@ -0,0 +1,25 @@ +test interpret +test run +target x86_64 +target s390x +target aarch64 + +function %fvdemote_test(i64 vmctx, i64, f64x2) -> f32x4 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+0 + heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64 + +block0(v0: i64, v1: i64, v2: f64x2): + v3 = heap_addr.i64 heap0, v1, 0, 16 + store.f64x2 v2, v3 + v4 = load.f64x2 v3 + v5 = fvdemote v4 + return v5 +} + +; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8 +; run: %fvdemote_test(0, [0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0] +; run: %fvdemote_test(1, [0x0.1 0x0.2]) == [0x0.1 0x0.2 0x0.0 0x0.0] +; run: %fvdemote_test(2, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] +; run: %fvdemote_test(8, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] +; run: %fvdemote_test(16, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] diff --git a/cranelift/filetests/filetests/runtests/simd-fvpromote_low.clif b/cranelift/filetests/filetests/runtests/simd-fvpromote_low.clif new file mode 100644 index 0000000000..79dd279492 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fvpromote_low.clif @@ -0,0 +1,26 @@ +test interpret +test run +target x86_64 +target s390x +target aarch64 + + +function %fvpromote_low_test(i64 vmctx, i64, f32x4) -> f64x2 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+0 + heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64 + +block0(v0: i64, v1: i64, v2: f32x4): + v3 = heap_addr.i64 heap0, v1, 0, 16 + store.f32x4 v2, v3 + v4 = load.f32x4 v3 + v5 = fvpromote_low v4 + return v5 +} + +; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8 +; run: %fvpromote_low_test(0, [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0] +; run: %fvpromote_low_test(1, [0x0.1 0x0.2 0x0.0 0x0.0]) == [0x0.1 0x0.2] +; run: %fvpromote_low_test(2, [0x2.1 0x1.2 0x0.0 0x0.0]) == [0x2.1 0x1.2] +; run: %fvpromote_low_test(5, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0] +; run: %fvpromote_low_test(16, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0] diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs index 4bd1522ade..b1af0ca062 100644 --- a/cranelift/fuzzgen/src/function_generator.rs +++ b/cranelift/fuzzgen/src/function_generator.rs @@ -831,6 +831,10 @@ const OPCODE_SIGNATURES: &'static [( // Nearest (Opcode::Nearest, &[F32], &[F32], insert_opcode), (Opcode::Nearest, &[F64], &[F64], insert_opcode), + // Fpromote + (Opcode::Fpromote, &[F32], &[F64], insert_opcode), + // Fdemote + (Opcode::Fdemote, &[F64], &[F32], insert_opcode), // FcvtToUint // TODO: Some ops disabled: // x64: https://github.com/bytecodealliance/wasmtime/issues/4897 diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index a5f69cf944..2262d6a06f 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -445,15 +445,7 @@ impl Value for DataValue { _ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind), }, ValueConversionKind::RoundNearestEven(ty) => match (self, ty) { - (DataValue::F64(n), types::F32) => { - let mut x = n.as_f64() as f32; - // Rust rounds away from zero, so if we've rounded up we - // should replace this with a proper rounding tied to even. - if (x as f64) != n.as_f64() { - x = n.round_ties_even().as_f64() as f32; - } - DataValue::F32(x.into()) - } + (DataValue::F64(n), types::F32) => DataValue::F32(Ieee32::from(n.as_f64() as f32)), (s, _) => unimplemented!("conversion: {} -> {:?}", s.ty(), kind), }, ValueConversionKind::ToBoolean => match self.ty() {