diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
index 4f74ac18c9..f6aad71b72 100755
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -3482,8 +3482,7 @@ pub(crate) fn define(
         - `f32` and `f64`. This may change in the future.
 
         The result type must have the same number of vector lanes as the input,
-        and the result lanes must not have fewer bits than the input lanes. If
-        the input and output types are the same, this is a no-op.
+        and the result lanes must not have fewer bits than the input lanes.
         "#,
             &formats.unary,
         )
@@ -3504,8 +3503,7 @@ pub(crate) fn define(
         - `f32` and `f64`. This may change in the future.
 
         The result type must have the same number of vector lanes as the input,
-        and the result lanes must not have more bits than the input lanes. If
-        the input and output types are the same, this is a no-op.
+        and the result lanes must not have more bits than the input lanes.
         "#,
             &formats.unary,
         )
diff --git a/cranelift/filetests/filetests/runtests/conversions-load-store.clif b/cranelift/filetests/filetests/runtests/conversions-load-store.clif
deleted file mode 100644
index 78abe5ba67..0000000000
--- a/cranelift/filetests/filetests/runtests/conversions-load-store.clif
+++ /dev/null
@@ -1,87 +0,0 @@
-test run
-
-target x86_64
-target s390x
-target aarch64
-;; target riscv64 vector type not supported.
-
-function %fpromote_f32_f64(i64 vmctx, i64, f32) -> f64 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0+0
-    heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64
-
-block0(v0: i64, v1: i64, v2: f32):
-    v3 = heap_addr.i64 heap0, v1, 0, 4
-    store.f32 v2, v3
-    v4 = load.f32 v3
-    v5 = fpromote.f64 v4
-    return v5
-}
-
-; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8
-; run: %fpromote_f32_f64(0, 0x0.0) == 0x0.0
-; run: %fpromote_f32_f64(1, 0x0.1) == 0x0.1
-; run: %fpromote_f32_f64(2, 0x0.2) == 0x0.2
-; run: %fpromote_f32_f64(3, 0x3.2) == 0x3.2
-; run: %fpromote_f32_f64(0xc, 0x3.2) == 0x3.2
-
-function %fdemote_test(i64 vmctx, i64, f64) -> f32 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0+0
-    heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64
-
-block0(v0: i64, v1: i64, v2: f64):
-    v3 = heap_addr.i64 heap0, v1, 0, 8
-    store.f64 v2, v3
-    v4 = load.f64 v3
-    v5 = fdemote.f32 v4
-    return v5
-}
-
-; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8
-; run: %fdemote_test(0, 0x0.0) == 0x0.0
-; run: %fdemote_test(1, 0x0.1) == 0x0.1
-; run: %fdemote_test(2, 0x0.2) == 0x0.2
-; run: %fdemote_test(3, 0x3.2) == 0x3.2
-; run: %fdemote_test(0x8, 0x3.2) == 0x3.2
-
-function %fvdemote_test(i64 vmctx, i64, f64x2) -> f32x4 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0+0
-    heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64
-
-block0(v0: i64, v1: i64, v2: f64x2):
-    v3 = heap_addr.i64 heap0, v1, 0, 16
-    store.f64x2 v2, v3
-    v4 = load.f64x2 v3
-    v5 = fvdemote v4
-    return v5
-}
-
-; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8
-; run: %fvdemote_test(0, [0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
-; run: %fvdemote_test(1, [0x0.1 0x0.2]) == [0x0.1 0x0.2 0x0.0 0x0.0]
-; run: %fvdemote_test(2, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
-; run: %fvdemote_test(8, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
-; run: %fvdemote_test(16, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
-
-
-function %fvpromote_low_test(i64 vmctx, i64, f32x4) -> f64x2 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0+0
-    heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64
-
-block0(v0: i64, v1: i64, v2: f32x4):
-    v3 = heap_addr.i64 heap0, v1, 0, 16
-    store.f32x4 v2, v3
-    v4 = load.f32x4 v3
-    v5 = fvpromote_low v4
-    return v5
-}
-
-; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8
-; run: %fvpromote_low_test(0, [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0]
-; run: %fvpromote_low_test(1, [0x0.1 0x0.2 0x0.0 0x0.0]) == [0x0.1 0x0.2]
-; run: %fvpromote_low_test(2, [0x2.1 0x1.2 0x0.0 0x0.0]) == [0x2.1 0x1.2]
-; run: %fvpromote_low_test(5, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
-; run: %fvpromote_low_test(16, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
diff --git a/cranelift/filetests/filetests/runtests/fdemote.clif b/cranelift/filetests/filetests/runtests/fdemote.clif
new file mode 100644
index 0000000000..7923cca5ed
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/fdemote.clif
@@ -0,0 +1,91 @@
+test interpret
+test run
+target x86_64
+target s390x
+target aarch64
+target riscv64
+
+
+function %fdemote(f64) -> f32 {
+block0(v0: f64):
+    v1 = fdemote.f32 v0
+    return v1
+}
+; run: %fdemote(0x0.0) == 0x0.0
+; run: %fdemote(-0x0.0) == -0x0.0
+; run: %fdemote(0x0.1) == 0x0.1
+; run: %fdemote(0x0.2) == 0x0.2
+; run: %fdemote(0x0.5) == 0x0.5
+; run: %fdemote(-0x0.5) == -0x0.5
+; run: %fdemote(0x3.2) == 0x3.2
+; run: %fdemote(0x9.0) == 0x9.0
+; run: %fdemote(-0x9.0) == -0x9.0
+; run: %fdemote(0x1.1p10) == 0x1.100000p10
+; run: %fdemote(-0x1.1p10) == -0x1.100000p10
+; run: %fdemote(0x1.c555555555556p10) == 0x1.c55556p10
+; run: %fdemote(-0x1.999999999999ap-2) == -0x1.99999ap-2
+; run: %fdemote(0x1.c3c3c3c3c3c3cp-1) == 0x1.c3c3c4p-1
+; run: %fdemote(0x1.c924924924925p-1) == 0x1.c92492p-1
+; run: %fdemote(0x1.4cccccccccccdp0) == 0x1.4cccccp0
+
+
+;; Inf
+; run: %fdemote(Inf) == Inf
+; run: %fdemote(-Inf) == -Inf
+
+;; Epsilon / Max / Min Positive
+; run: %fdemote(0x1.0000000000000p-52) == 0x1.0000000000000p-52
+; run: %fdemote(-0x1.0000000000000p-52) == -0x1.0000000000000p-52
+; run: %fdemote(0x1.fffffffffffffp1023) == +Inf
+; run: %fdemote(-0x1.fffffffffffffp1023) == -Inf
+; run: %fdemote(0x1.0000000000000p-1022) == 0x0.0
+; run: %fdemote(-0x1.0000000000000p-1022) == -0x0.0
+
+;; Subnormals
+; run: %fdemote(0x0.8000000000000p-1022) == 0x0.0
+; run: %fdemote(-0x0.8000000000000p-1022) == -0x0.0
+; run: %fdemote(0x0.0000000000001p-1022) == 0x0.0
+; run: %fdemote(-0x0.0000000000001p-1022) == -0x0.0
+
+
+;; NaN's
+; For NaN's this operation is specified as producing a value that is a NaN
+function %fdemote_is_nan(f64) -> i8 {
+block0(v0: f64):
+    v1 = fdemote.f32 v0
+    v2 = fcmp ne v1, v1
+    return v2
+}
+; run: %fdemote_is_nan(+NaN) == 1
+; run: %fdemote_is_nan(-NaN) == 1
+; run: %fdemote_is_nan(+NaN:0x0) == 1
+; run: %fdemote_is_nan(+NaN:0x1) == 1
+; run: %fdemote_is_nan(+NaN:0x4000000000001) == 1
+; run: %fdemote_is_nan(-NaN:0x0) == 1
+; run: %fdemote_is_nan(-NaN:0x1) == 1
+; run: %fdemote_is_nan(-NaN:0x4000000000001) == 1
+; run: %fdemote_is_nan(+sNaN:0x1) == 1
+; run: %fdemote_is_nan(-sNaN:0x1) == 1
+; run: %fdemote_is_nan(+sNaN:0x4000000000001) == 1
+; run: %fdemote_is_nan(-sNaN:0x4000000000001) == 1
+
+
+;; Tests a fdemote+load combo which some backends may optimize
+function %fdemote_load(i64 vmctx, i64, f64) -> f32 {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0+0
+    heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64
+
+block0(v0: i64, v1: i64, v2: f64):
+    v3 = heap_addr.i64 heap0, v1, 0, 8
+    store.f64 v2, v3
+    v4 = load.f64 v3
+    v5 = fdemote.f32 v4
+    return v5
+}
+; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8
+; run: %fdemote_load(0, 0x0.0) == 0x0.0
+; run: %fdemote_load(1, 0x0.1) == 0x0.1
+; run: %fdemote_load(2, 0x0.2) == 0x0.2
+; run: %fdemote_load(3, 0x3.2) == 0x3.2
+; run: %fdemote_load(0x8, 0x3.2) == 0x3.2
diff --git a/cranelift/filetests/filetests/runtests/fpromote.clif b/cranelift/filetests/filetests/runtests/fpromote.clif
new file mode 100644
index 0000000000..7a6170e1c8
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/fpromote.clif
@@ -0,0 +1,100 @@
+test interpret
+test run
+target x86_64
+target s390x
+target aarch64
+target riscv64
+
+
+function %fpromote(f32) -> f64 {
+block0(v0: f32):
+    v1 = fpromote.f64 v0
+    return v1
+}
+
+; run: %fpromote(0x0.0) == 0x0.0
+; run: %fpromote(-0x0.0) == -0x0.0
+; run: %fpromote(0x0.1) == 0x0.1
+; run: %fpromote(0x0.2) == 0x0.2
+; run: %fpromote(0x3.2) == 0x3.2
+; run: %fpromote(0x1.5) == 0x1.5
+; run: %fpromote(0x1.1p10) == 0x1.1p10
+; run: %fpromote(0x1.4cccccp0) == 0x1.4cccccp0
+; run: %fpromote(0x1.b33334p0) == 0x1.b33334p0
+; run: %fpromote(-0x1.b33334p0) == -0x1.b33334p0
+; run: %fpromote(0x1.333334p-1) == 0x1.333334p-1
+; run: %fpromote(0x0.5) == 0x0.5
+; run: %fpromote(-0x0.5) == -0x0.5
+; run: %fpromote(0x1.5) == 0x1.5
+; run: %fpromote(-0x1.5) == -0x1.5
+; run: %fpromote(0x1.1p10) == 0x1.1p10
+; run: %fpromote(-0x1.1p10) == -0x1.1p10
+; run: %fpromote(0x1.99999ap-2) == 0x1.99999ap-2
+; run: %fpromote(-0x1.99999ap-2) == -0x1.99999ap-2
+; run: %fpromote(0x1.8p0) == 0x1.8p0
+; run: %fpromote(-0x1.8p0) == -0x1.8p0
+; run: %fpromote(0x1.4p1) == 0x1.4p1
+; run: %fpromote(-0x1.4p1) == -0x1.4p1
+
+
+;; Inf
+; run: %fpromote(Inf) == Inf
+; run: %fpromote(-Inf) == -Inf
+
+;; Epsilon / Max / Min Positive
+; run: %fpromote(0x1.000000p-23) == 0x1.000000p-23
+; run: %fpromote(-0x1.000000p-23) == -0x1.000000p-23
+; run: %fpromote(0x1.fffffep127) == 0x1.fffffep127
+; run: %fpromote(-0x1.fffffep127) == -0x1.fffffep127
+; run: %fpromote(0x1.000000p-126) == 0x1.000000p-126
+; run: %fpromote(-0x1.000000p-126) == -0x1.000000p-126
+
+;; Subnormals
+; run: %fpromote(0x0.800000p-126) == 0x0.800000p-126
+; run: %fpromote(-0x0.800000p-126) == -0x0.800000p-126
+; run: %fpromote(0x0.000002p-126) == 0x0.000002p-126
+; run: %fpromote(-0x0.000002p-126) == -0x0.000002p-126
+
+
+;; NaN's
+; For NaN's this operation is specified as producing a value that is a NaN
+function %fpromote_is_nan(f32) -> i8 {
+block0(v0: f32):
+    v1 = fpromote.f64 v0
+    v2 = fcmp ne v1, v1
+    return v2
+}
+; run: %fpromote_is_nan(+NaN) == 1
+; run: %fpromote_is_nan(-NaN) == 1
+; run: %fpromote_is_nan(+NaN:0x0) == 1
+; run: %fpromote_is_nan(+NaN:0x1) == 1
+; run: %fpromote_is_nan(+NaN:0x300001) == 1
+; run: %fpromote_is_nan(-NaN:0x0) == 1
+; run: %fpromote_is_nan(-NaN:0x1) == 1
+; run: %fpromote_is_nan(-NaN:0x300001) == 1
+; run: %fpromote_is_nan(+sNaN:0x1) == 1
+; run: %fpromote_is_nan(-sNaN:0x1) == 1
+; run: %fpromote_is_nan(+sNaN:0x200001) == 1
+; run: %fpromote_is_nan(-sNaN:0x200001) == 1
+
+
+;; Tests a fpromote+load combo which some backends may optimize
+function %fpromote_load(i64 vmctx, i64, f32) -> f64 {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0+0
+    heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64
+
+block0(v0: i64, v1: i64, v2: f32):
+    v3 = heap_addr.i64 heap0, v1, 0, 4
+    store.f32 v2, v3
+    v4 = load.f32 v3
+    v5 = fpromote.f64 v4
+    return v5
+}
+
+; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8
+; run: %fpromote_load(0, 0x0.0) == 0x0.0
+; run: %fpromote_load(1, 0x0.1) == 0x0.1
+; run: %fpromote_load(2, 0x0.2) == 0x0.2
+; run: %fpromote_load(3, 0x3.2) == 0x3.2
+; run: %fpromote_load(0xC, 0x3.2) == 0x3.2
diff --git a/cranelift/filetests/filetests/runtests/simd-fvdemote.clif b/cranelift/filetests/filetests/runtests/simd-fvdemote.clif
new file mode 100644
index 0000000000..558a346161
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fvdemote.clif
@@ -0,0 +1,25 @@
+test interpret
+test run
+target x86_64
+target s390x
+target aarch64
+
+function %fvdemote_test(i64 vmctx, i64, f64x2) -> f32x4 {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0+0
+    heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64
+
+block0(v0: i64, v1: i64, v2: f64x2):
+    v3 = heap_addr.i64 heap0, v1, 0, 16
+    store.f64x2 v2, v3
+    v4 = load.f64x2 v3
+    v5 = fvdemote v4
+    return v5
+}
+
+; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8
+; run: %fvdemote_test(0, [0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
+; run: %fvdemote_test(1, [0x0.1 0x0.2]) == [0x0.1 0x0.2 0x0.0 0x0.0]
+; run: %fvdemote_test(2, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
+; run: %fvdemote_test(8, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
+; run: %fvdemote_test(16, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
diff --git a/cranelift/filetests/filetests/runtests/simd-fvpromote_low.clif b/cranelift/filetests/filetests/runtests/simd-fvpromote_low.clif
new file mode 100644
index 0000000000..79dd279492
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fvpromote_low.clif
@@ -0,0 +1,26 @@
+test interpret
+test run
+target x86_64
+target s390x
+target aarch64
+
+
+function %fvpromote_low_test(i64 vmctx, i64, f32x4) -> f64x2 {
+    gv0 = vmctx
+    gv1 = load.i64 notrap aligned gv0+0
+    heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64
+
+block0(v0: i64, v1: i64, v2: f32x4):
+    v3 = heap_addr.i64 heap0, v1, 0, 16
+    store.f32x4 v2, v3
+    v4 = load.f32x4 v3
+    v5 = fvpromote_low v4
+    return v5
+}
+
+; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8
+; run: %fvpromote_low_test(0, [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0]
+; run: %fvpromote_low_test(1, [0x0.1 0x0.2 0x0.0 0x0.0]) == [0x0.1 0x0.2]
+; run: %fvpromote_low_test(2, [0x2.1 0x1.2 0x0.0 0x0.0]) == [0x2.1 0x1.2]
+; run: %fvpromote_low_test(5, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
+; run: %fvpromote_low_test(16, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs
index 4bd1522ade..b1af0ca062 100644
--- a/cranelift/fuzzgen/src/function_generator.rs
+++ b/cranelift/fuzzgen/src/function_generator.rs
@@ -831,6 +831,10 @@ const OPCODE_SIGNATURES: &'static [(
     // Nearest
     (Opcode::Nearest, &[F32], &[F32], insert_opcode),
     (Opcode::Nearest, &[F64], &[F64], insert_opcode),
+    // Fpromote
+    (Opcode::Fpromote, &[F32], &[F64], insert_opcode),
+    // Fdemote
+    (Opcode::Fdemote, &[F64], &[F32], insert_opcode),
     // FcvtToUint
     // TODO: Some ops disabled:
     //   x64: https://github.com/bytecodealliance/wasmtime/issues/4897
diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs
index a5f69cf944..2262d6a06f 100644
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@@ -445,15 +445,7 @@ impl Value for DataValue {
                 _ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind),
             },
             ValueConversionKind::RoundNearestEven(ty) => match (self, ty) {
-                (DataValue::F64(n), types::F32) => {
-                    let mut x = n.as_f64() as f32;
-                    // Rust rounds away from zero, so if we've rounded up we
-                    // should replace this with a proper rounding tied to even.
-                    if (x as f64) != n.as_f64() {
-                        x = n.round_ties_even().as_f64() as f32;
-                    }
-                    DataValue::F32(x.into())
-                }
+                (DataValue::F64(n), types::F32) => DataValue::F32(Ieee32::from(n.as_f64() as f32)),
                 (s, _) => unimplemented!("conversion: {} -> {:?}", s.ty(), kind),
             },
             ValueConversionKind::ToBoolean => match self.ty() {