diff --git a/cranelift/codegen/meta/src/cdsl/instructions.rs b/cranelift/codegen/meta/src/cdsl/instructions.rs
index 2737099cc4..fdc5ad8bf8 100644
--- a/cranelift/codegen/meta/src/cdsl/instructions.rs
+++ b/cranelift/codegen/meta/src/cdsl/instructions.rs
@@ -614,6 +614,12 @@ pub enum FormatPredicateKind {
     /// Is the immediate format field member equal to zero? (float64 version)
     IsZero64BitFloat,
 
+    /// Is the immediate format field member equal zero in all lanes?
+    IsAllZeroes128Bit,
+
+    /// Does the immediate format field member have ones in all bits of all lanes?
+    IsAllOnes128Bit,
+
     /// Has the value list (in member_name) the size specified in parameter?
     LengthEquals(usize),
 
@@ -690,6 +696,14 @@ impl FormatPredicateNode {
             FormatPredicateKind::IsZero64BitFloat => {
                 format!("predicates::is_zero_64_bit_float({})", self.member_name)
             }
+            FormatPredicateKind::IsAllZeroes128Bit => format!(
+                "predicates::is_all_zeroes_128_bit(func.dfg.constants.get({}))",
+                self.member_name
+            ),
+            FormatPredicateKind::IsAllOnes128Bit => format!(
+                "predicates::is_all_ones_128_bit(func.dfg.constants.get({}))",
+                self.member_name
+            ),
             FormatPredicateKind::LengthEquals(num) => format!(
                 "predicates::has_length_of({}, {}, func)",
                 self.member_name, num
@@ -929,6 +943,28 @@ impl InstructionPredicate {
         ))
     }
 
+    pub fn new_is_all_zeroes_128bit(
+        format: &InstructionFormat,
+        field_name: &'static str,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsAllZeroes128Bit,
+        ))
+    }
+
+    pub fn new_is_all_ones_128bit(
+        format: &InstructionFormat,
+        field_name: &'static str,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsAllOnes128Bit,
+        ))
+    }
+
     pub fn new_length_equals(format: &InstructionFormat, size: usize) -> InstructionPredicateNode {
         assert!(
             format.has_value_list,
diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs
index d8b070b69e..6c578c458a 100644
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -310,6 +310,20 @@ impl PerCpuModeEncodings {
         self.enc64_rec(inst, recipe, bits);
     }
 
+    /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened
+    fn enc_32_64_func<T>(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        builder_closure: T,
+    ) where
+        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+    {
+        let encoding = self.make_encoding(inst.into(), template, builder_closure);
+        self.enc32.push(encoding.clone());
+        self.enc64.push(encoding);
+    }
+
     /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand
     /// binding) has already happened.
     fn enc_32_64_maybe_isap(
@@ -642,6 +656,7 @@ pub(crate) fn define(
     let rec_urm_noflags = r.template("urm_noflags");
     let rec_urm_noflags_abcd = r.template("urm_noflags_abcd");
     let rec_vconst = r.template("vconst");
+    let rec_vconst_optimized = r.template("vconst_optimized");
 
     // Predicates shorthands.
     let all_ones_funcaddrs_and_not_is_pic =
@@ -1671,7 +1686,7 @@ pub(crate) fn define(
     );
     e.enc_x86_64_instp(
         f64const,
-        rec_f64imm_z.opcodes(vec![0x66, 0x0f, 0x57]),
+        rec_f64imm_z.opcodes(vec![0x66, 0x0f, 0x57]), // XORPD from SSE2
         is_zero_64_bit_float,
     );
 
@@ -1946,6 +1961,32 @@ pub(crate) fn define(
         }
     }
 
+    // SIMD vconst for special cases (all zeroes, all ones)
+    // this must be encoded prior to the MOVUPS implementation (below) so the compiler sees this
+    // encoding first
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let f_unary_const = formats.get(formats.by_name("UnaryConst"));
+        let instruction = vconst.bind_vector_from_lane(ty, sse_vector_size);
+
+        let is_zero_128bit =
+            InstructionPredicate::new_is_all_zeroes_128bit(f_unary_const, "constant_handle");
+        let template = rec_vconst_optimized
+            .nonrex()
+            .opcodes(vec![0x66, 0x0f, 0xef]); // PXOR from SSE2
+        e.enc_32_64_func(instruction.clone(), template, |builder| {
+            builder.inst_predicate(is_zero_128bit)
+        });
+
+        let is_ones_128bit =
+            InstructionPredicate::new_is_all_ones_128bit(f_unary_const, "constant_handle");
+        let template = rec_vconst_optimized
+            .nonrex()
+            .opcodes(vec![0x66, 0x0f, 0x74]); // PCMPEQB from SSE2
+        e.enc_32_64_func(instruction, template, |builder| {
+            builder.inst_predicate(is_ones_128bit)
+        });
+    }
+
     // SIMD vconst using MOVUPS
     // TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have
     // to guarantee that the constants are aligned when emitted and there is currently no mechanism
diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs
index b30a35e3f5..b9a10c86f6 100644
--- a/cranelift/codegen/meta/src/isa/x86/recipes.rs
+++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs
@@ -2449,6 +2449,18 @@ pub(crate) fn define<'shared>(
             ),
     );
 
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("vconst_optimized", f_unary_const, 1)
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+                    modrm_rr(out_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
     recipes.add_template_recipe(
         EncodingRecipeBuilder::new("jt_base", f_branch_table_base, 5)
             .operands_out(vec![gpr])
diff --git a/cranelift/codegen/src/predicates.rs b/cranelift/codegen/src/predicates.rs
index f900546111..16672b145b 100644
--- a/cranelift/codegen/src/predicates.rs
+++ b/cranelift/codegen/src/predicates.rs
@@ -31,6 +31,18 @@ pub fn is_zero_32_bit_float<T: Into<ir::immediates::Ieee32>>(x: T) -> bool {
     x32.bits() == 0
 }
 
+/// Check that a 128-bit vector contains all zeroes.
+#[allow(dead_code)]
+pub fn is_all_zeroes_128_bit<'b, T: PartialEq<&'b [u8; 16]>>(x: T) -> bool {
+    x.eq(&&[0; 16])
+}
+
+/// Check that a 128-bit vector contains all ones.
+#[allow(dead_code)]
+pub fn is_all_ones_128_bit<'b, T: PartialEq<&'b [u8; 16]>>(x: T) -> bool {
+    x.eq(&&[0xff; 16])
+}
+
 /// Check that `x` is the same as `y`.
 #[allow(dead_code)]
 pub fn is_equal<T: Eq + Copy, O: Into<T> + Copy>(x: T, y: O) -> bool {
@@ -109,4 +121,19 @@ mod tests {
         assert!(!is_signed_int(x1, 16, 4));
         assert!(!is_signed_int(x2, 16, 4));
     }
+
+    #[test]
+    fn is_all_zeroes() {
+        assert!(is_all_zeroes_128_bit(&[0; 16]));
+        assert!(is_all_zeroes_128_bit(vec![
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+        ]));
+        assert!(!is_all_zeroes_128_bit(&[1; 16]));
+    }
+
+    #[test]
+    fn is_all_ones() {
+        assert!(!is_all_ones_128_bit(&[0; 16]));
+        assert!(is_all_ones_128_bit(&[0xff; 16]));
+    }
 }
diff --git a/cranelift/filetests/filetests/isa/x86/vconst-binemit.clif b/cranelift/filetests/filetests/isa/x86/vconst-binemit.clif
index b07dc0fd4e..2d6f862679 100644
--- a/cranelift/filetests/filetests/isa/x86/vconst-binemit.clif
+++ b/cranelift/filetests/filetests/isa/x86/vconst-binemit.clif
@@ -5,7 +5,7 @@ target x86_64
 
 function %test_vconst_b8() {
 ebb0:
-[-, %xmm2]  v0 = vconst.b8x16 0x00  ; bin: 0f 10 15 00000008 PCRelRodata4(15)
-[-, %xmm3]  v1 = vconst.b8x16 0x01  ; bin: 0f 10 1d 00000011 PCRelRodata4(31)
+[-, %xmm2]  v0 = vconst.b8x16 0x01  ; bin: 0f 10 15 00000008 PCRelRodata4(15)
+[-, %xmm3]  v1 = vconst.b8x16 0x02  ; bin: 0f 10 1d 00000011 PCRelRodata4(31)
             return
 }
diff --git a/cranelift/filetests/filetests/isa/x86/vconst-opt-run.clif b/cranelift/filetests/filetests/isa/x86/vconst-opt-run.clif
new file mode 100644
index 0000000000..487ff4f844
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x86/vconst-opt-run.clif
@@ -0,0 +1,23 @@
+test run
+set enable_simd
+target x86_64
+
+; TODO move to vconst-run.clif
+
+function %test_vconst_zeroes() -> b1 {
+ebb0:
+    v0 = vconst.i8x16 0x00
+    v1 = extractlane v0, 4
+    v2 = icmp_imm eq v1, 0
+    return v2
+}
+; run
+
+function %test_vconst_ones() -> b1 {
+ebb0:
+    v0 = vconst.i8x16 0xffffffffffffffffffffffffffffffff
+    v1 = extractlane v0, 2
+    v2 = icmp_imm eq v1, 0xff
+    return v2
+}
+; run
diff --git a/cranelift/filetests/filetests/isa/x86/vconst-opt.clif b/cranelift/filetests/filetests/isa/x86/vconst-opt.clif
new file mode 100644
index 0000000000..4daeed8abe
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x86/vconst-opt.clif
@@ -0,0 +1,12 @@
+test binemit
+set enable_simd
+target x86_64
+
+; TODO move to vconst-compile.clif or vconst-binemit.clif
+
+function %test_vconst_optimizations() {
+ebb0:
+[-, %xmm4]  v0 = vconst.b8x16 0x00                                  ; bin: 66 0f ef e4
+[-, %xmm7]  v1 = vconst.b8x16 0xffffffffffffffffffffffffffffffff    ; bin: 66 0f 74 ff
+            return
+}