diff --git a/build.rs b/build.rs
index 49f9f81db7..9864d44570 100644
--- a/build.rs
+++ b/build.rs
@@ -182,6 +182,7 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str
     match (testsuite, testname) {
         ("simd", "simd_address") => return false,
         ("simd", "simd_bitwise") => return false,
+        ("simd", "simd_boolean") => return false,
         ("simd", "simd_const") => return false,
         ("simd", "simd_i8x16_arith") => return false,
         ("simd", "simd_i8x16_arith2") => return false,
@@ -229,9 +230,14 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
                 return env::var("CARGO_CFG_TARGET_ARCH").unwrap() != "x86_64";
             }
 
+            // These are only implemented on aarch64 and x64.
+            ("simd", "simd_boolean") => {
+                return !(cfg!(feature = "experimental_x64")
+                    || env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "aarch64")
+            }
+
             // These are only implemented on aarch64.
-            ("simd", "simd_boolean")
-            | ("simd", "simd_f32x4_pmin_pmax")
+            ("simd", "simd_f32x4_pmin_pmax")
             | ("simd", "simd_f32x4_rounding")
             | ("simd", "simd_f64x2_pmin_pmax")
             | ("simd", "simd_f64x2_rounding") => {
diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs
index 48bd822d4d..708e52f36b 100644
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -393,6 +393,8 @@ pub enum SseOpcode {
     Movdqa,
     Movdqu,
     Movlhps,
+    Movmskps,
+    Movmskpd,
     Movq,
     Movss,
     Movsd,
@@ -407,6 +409,7 @@ pub enum SseOpcode {
     Pabsb,
     Pabsw,
     Pabsd,
+    Packsswb,
     Paddb,
     Paddd,
     Paddq,
@@ -445,6 +448,7 @@ pub enum SseOpcode {
     Pminub,
     Pminuw,
     Pminud,
+    Pmovmskb,
     Pmulld,
     Pmullw,
     Pmuludq,
@@ -510,6 +514,7 @@ impl SseOpcode {
             | SseOpcode::Minss
             | SseOpcode::Movaps
             | SseOpcode::Movlhps
+            | SseOpcode::Movmskps
             | SseOpcode::Movss
             | SseOpcode::Movups
             | SseOpcode::Mulps
@@ -546,6 +551,7 @@ impl SseOpcode {
             | SseOpcode::Minsd
             | SseOpcode::Movapd
             | SseOpcode::Movd
+            | SseOpcode::Movmskpd
             | SseOpcode::Movq
             | SseOpcode::Movsd
             | SseOpcode::Movupd
@@ -554,6 +560,7 @@ impl SseOpcode {
             | SseOpcode::Mulpd
             | SseOpcode::Mulsd
             | SseOpcode::Orpd
+            | SseOpcode::Packsswb
             | SseOpcode::Paddb
             | SseOpcode::Paddd
             | SseOpcode::Paddq
@@ -578,6 +585,7 @@ impl SseOpcode {
             | SseOpcode::Pmaxub
             | SseOpcode::Pminsw
             | SseOpcode::Pminub
+            | SseOpcode::Pmovmskb
             | SseOpcode::Pmullw
             | SseOpcode::Pmuludq
             | SseOpcode::Por
@@ -686,6 +694,8 @@ impl fmt::Debug for SseOpcode {
             SseOpcode::Movdqa => "movdqa",
             SseOpcode::Movdqu => "movdqu",
             SseOpcode::Movlhps => "movlhps",
+            SseOpcode::Movmskps => "movmskps",
+            SseOpcode::Movmskpd => "movmskpd",
             SseOpcode::Movq => "movq",
             SseOpcode::Movss => "movss",
             SseOpcode::Movsd => "movsd",
@@ -700,6 +710,7 @@ impl fmt::Debug for SseOpcode {
             SseOpcode::Pabsb => "pabsb",
             SseOpcode::Pabsw => "pabsw",
             SseOpcode::Pabsd => "pabsd",
+            SseOpcode::Packsswb => "packsswb",
             SseOpcode::Paddb => "paddb",
             SseOpcode::Paddd => "paddd",
             SseOpcode::Paddq => "paddq",
@@ -738,6 +749,7 @@ impl fmt::Debug for SseOpcode {
             SseOpcode::Pminub => "pminub",
             SseOpcode::Pminuw => "pminuw",
             SseOpcode::Pminud => "pminud",
+            SseOpcode::Pmovmskb => "pmovmskb",
             SseOpcode::Pmulld => "pmulld",
             SseOpcode::Pmullw => "pmullw",
             SseOpcode::Pmuludq => "pmuludq",
diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs
index 2ad4c4d723..eeb1d3dacb 100644
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -1762,6 +1762,7 @@ pub(crate) fn emit(
                 SseOpcode::Mulsd => (LegacyPrefixes::_F2, 0x0F59, 2),
                 SseOpcode::Orpd => (LegacyPrefixes::_66, 0x0F56, 2),
                 SseOpcode::Orps => (LegacyPrefixes::None, 0x0F56, 2),
+                SseOpcode::Packsswb => (LegacyPrefixes::_66, 0x0F63, 2),
                 SseOpcode::Paddb => (LegacyPrefixes::_66, 0x0FFC, 2),
                 SseOpcode::Paddd => (LegacyPrefixes::_66, 0x0FFE, 2),
                 SseOpcode::Paddq => (LegacyPrefixes::_66, 0x0FD4, 2),
@@ -2040,11 +2041,14 @@ pub(crate) fn emit(
             dst_size,
         } => {
             let (prefix, opcode, dst_first) = match op {
+                SseOpcode::Cvttss2si => (LegacyPrefixes::_F3, 0x0F2C, true),
+                SseOpcode::Cvttsd2si => (LegacyPrefixes::_F2, 0x0F2C, true),
                 // Movd and movq use the same opcode; the presence of the REX prefix (set below)
                 // actually determines which is used.
                 SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F7E, false),
-                SseOpcode::Cvttss2si => (LegacyPrefixes::_F3, 0x0F2C, true),
-                SseOpcode::Cvttsd2si => (LegacyPrefixes::_F2, 0x0F2C, true),
+                SseOpcode::Movmskps => (LegacyPrefixes::None, 0x0F50, true),
+                SseOpcode::Movmskpd => (LegacyPrefixes::_66, 0x0F50, true),
+                SseOpcode::Pmovmskb => (LegacyPrefixes::_66, 0x0FD7, true),
                 _ => panic!("unexpected opcode {:?}", op),
             };
             let rex = match dst_size {
diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
index 728bc09c97..0e6ad1d118 100644
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -3292,6 +3292,12 @@ fn test_x64_emit() {
         "pshufb  %xmm11, %xmm2",
     ));
 
+    insns.push((
+        Inst::xmm_rm_r(SseOpcode::Packsswb, RegMem::reg(xmm11), w_xmm2, None),
+        "66410F63D3",
+        "packsswb %xmm11, %xmm2",
+    ));
+
     // ========================================================
     // XMM_RM_R: Integer Conversion
     insns.push((
@@ -3422,6 +3428,22 @@ fn test_x64_emit() {
         "cvttsd2si %xmm0, %r15",
     ));
 
+    insns.push((
+        Inst::xmm_to_gpr(SseOpcode::Pmovmskb, xmm10, w_rax, OperandSize::Size32),
+        "66410FD7C2",
+        "pmovmskb %xmm10, %eax",
+    ));
+    insns.push((
+        Inst::xmm_to_gpr(SseOpcode::Movmskps, xmm2, w_rax, OperandSize::Size32),
+        "0F50C2",
+        "movmskps %xmm2, %eax",
+    ));
+    insns.push((
+        Inst::xmm_to_gpr(SseOpcode::Movmskpd, xmm0, w_rcx, OperandSize::Size32),
+        "660F50C8",
+        "movmskpd %xmm0, %ecx",
+    ));
+
     insns.push((
         Inst::gpr_to_xmm(
             SseOpcode::Movd,
diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
index 8e887b0e90..f63a157d8c 100644
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -3657,6 +3657,58 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             ctx.emit(Inst::setcc(CC::Z, dst));
         }
 
+        Opcode::VhighBits => {
+            let src = put_input_in_reg(ctx, inputs[0]);
+            let src_ty = ctx.input_ty(insn, 0);
+            debug_assert!(src_ty.is_vector() && src_ty.bits() == 128);
+            let dst = get_output_reg(ctx, outputs[0]);
+            debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+
+            // The Intel specification allows using both 32-bit and 64-bit GPRs as destination for
+            // the "move mask" instructions. This is controlled by the REX.R bit: "In 64-bit mode,
+            // the instruction can access additional registers when used with a REX.R prefix. The
+            // default operand size is 64-bit in 64-bit mode" (PMOVMSKB in IA Software Development
+            // Manual, vol. 2). This being the case, we will always clear REX.W since its use is
+            // unnecessary (`OperandSize` is used for setting/clearing REX.W).
+            let size = OperandSize::Size32;
+
+            match src_ty {
+                types::I8X16 | types::B8X16 => {
+                    ctx.emit(Inst::xmm_to_gpr(SseOpcode::Pmovmskb, src, dst, size))
+                }
+                types::I32X4 | types::B32X4 | types::F32X4 => {
+                    ctx.emit(Inst::xmm_to_gpr(SseOpcode::Movmskps, src, dst, size))
+                }
+                types::I64X2 | types::B64X2 | types::F64X2 => {
+                    ctx.emit(Inst::xmm_to_gpr(SseOpcode::Movmskpd, src, dst, size))
+                }
+                types::I16X8 | types::B16X8 => {
+                    // There is no x86 instruction for extracting the high bit of 16-bit lanes so
+                    // here we:
+                    // - duplicate the 16-bit lanes of `src` into 8-bit lanes:
+                    //     PACKSSWB([x1, x2, ...], [x1, x2, ...]) = [x1', x2', ..., x1', x2', ...]
+                    // - use PMOVMSKB to gather the high bits; now we have duplicates, though
+                    // - shift away the bottom 8 high bits to remove the duplicates.
+                    let tmp = ctx.alloc_tmp(RegClass::V128, src_ty);
+                    ctx.emit(Inst::gen_move(tmp, src, src_ty));
+                    ctx.emit(Inst::xmm_rm_r(
+                        SseOpcode::Packsswb,
+                        RegMem::reg(src),
+                        tmp,
+                        None,
+                    ));
+                    ctx.emit(Inst::xmm_to_gpr(
+                        SseOpcode::Pmovmskb,
+                        tmp.to_reg(),
+                        dst,
+                        size,
+                    ));
+                    ctx.emit(Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(8), dst));
+                }
+                _ => unimplemented!("unknown input type {} for {}", src_ty, op),
+            }
+        }
+
         Opcode::IaddImm
         | Opcode::ImulImm
         | Opcode::UdivImm