diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
index a49b998914..6aa4f5509f 100755
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -1409,8 +1409,8 @@ pub(crate) fn define(
             r#"
         Conditional select.
 
-        This instruction selects whole values. Use `vselect` for
-        lane-wise selection.
+        This instruction selects whole values. Use `bitselect` to choose each
+        bit according to a mask.
         "#,
             &formats.ternary,
         )
@@ -1458,7 +1458,7 @@ pub(crate) fn define(
 
         For each bit in `c`, this instruction selects the corresponding bit from `x` if the bit
         in `x` is 1 and the corresponding bit from `y` if the bit in `c` is 0. See also:
-        `select`, `vselect`.
+        `select`.
         "#,
             &formats.ternary,
         )
@@ -1484,26 +1484,7 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
-    let c = &Operand::new("c", &TxN.as_bool()).with_doc("Controlling vector");
-    let x = &Operand::new("x", TxN).with_doc("Value to use where `c` is true");
-    let y = &Operand::new("y", TxN).with_doc("Value to use where `c` is false");
     let a = &Operand::new("a", TxN);
-
-    ig.push(
-        Inst::new(
-            "vselect",
-            r#"
-        Vector lane select.
-
-        Select lanes from ``x`` or ``y`` controlled by the lanes of the truthy
-        vector ``c``.
-        "#,
-            &formats.ternary,
-        )
-        .operands_in(vec![c, x, y])
-        .operands_out(vec![a]),
-    );
-
     let s = &Operand::new("s", i8);
 
     ig.push(
diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle
index bd7e968d72..edb1124473 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -1659,11 +1659,6 @@
 (rule 1 (lower (has_type (ty_vec128 ty) (bitselect c x y)))
         (bsl ty c x y))
 
-;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (has_type (ty_vec128 ty) (vselect c x y)))
-        (bsl ty c x y))
-
 ;;;; Rules for `ireduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; T -> I{64,32,16,8}: We can simply pass through the value: values
diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle
index acbbf2082c..02563f4d5b 100644
--- a/cranelift/codegen/src/isa/s390x/lower.isle
+++ b/cranelift/codegen/src/isa/s390x/lower.isle
@@ -1170,13 +1170,6 @@
       (vec_select ty y z x))
 
 
-;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Vector select.
-(rule (lower (has_type (ty_vec128 ty) (vselect x y z)))
-      (vec_select ty y z x))
-
-
 ;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type ty (bmask x)))
diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle
index f6abbec48d..ac27aae4d6 100644
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -1193,7 +1193,7 @@
         (sse_or ty b a)))
 
 ;; If every byte of the condition is guaranteed to be all ones or all zeroes,
-;; we can use x86_blend like vselect does.
+;; we can use x64_blend.
 (rule 1 (lower (has_type ty @ (multi_lane _bits _lanes)
                          (bitselect condition
                                     if_true
@@ -1226,15 +1226,6 @@
                        (x86_blendv condition if_true if_false)))
       (x64_blendvpd if_false if_true condition))
 
-;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (has_type ty @ (multi_lane _bits _lanes)
-                       (vselect condition if_true if_false)))
-      (x64_blend ty
-                 condition
-                 if_true
-                 if_false))
-
 ;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (insertlane vec @ (value_type ty) val (u8_from_uimm8 idx)))
diff --git a/cranelift/codegen/src/nan_canonicalization.rs b/cranelift/codegen/src/nan_canonicalization.rs
index 40600fc6fb..49415a86b0 100644
--- a/cranelift/codegen/src/nan_canonicalization.rs
+++ b/cranelift/codegen/src/nan_canonicalization.rs
@@ -7,6 +7,7 @@ use crate::ir::condcodes::FloatCC;
 use crate::ir::immediates::{Ieee32, Ieee64};
 use crate::ir::types;
 use crate::ir::{Function, Inst, InstBuilder, InstructionData, Opcode, Value};
+use crate::opts::MemFlags;
 use crate::timing;
 
 // Canonical 32-bit and 64-bit NaN values.
@@ -70,9 +71,10 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst) {
             .select(is_nan, canon_nan, new_res);
     };
     let vector_select = |pos: &mut FuncCursor, canon_nan: Value| {
+        let is_nan = pos.ins().bitcast(val_type, MemFlags::new(), is_nan);
         pos.ins()
             .with_result(val)
-            .vselect(is_nan, canon_nan, new_res);
+            .bitselect(is_nan, canon_nan, new_res);
     };
 
     match val_type {
diff --git a/cranelift/codegen/src/opts/algebraic.isle b/cranelift/codegen/src/opts/algebraic.isle
index eac1650654..d7663df031 100644
--- a/cranelift/codegen/src/opts/algebraic.isle
+++ b/cranelift/codegen/src/opts/algebraic.isle
@@ -454,56 +454,56 @@
        (select ty (icmp _ (IntCC.UnsignedGreaterThanOrEqual) x y) y x))
       (umin ty x y))
 
-;; Transform vselect-of-icmp into {u,s}{min,max} instructions where possible.
+;; Transform bitselect-of-icmp into {u,s}{min,max} instructions where possible.
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.SignedGreaterThan) x y) x y))
+       (bitselect ty (icmp _ (IntCC.SignedGreaterThan) x y) x y))
       (smax ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.SignedGreaterThanOrEqual) x y) x y))
+       (bitselect ty (icmp _ (IntCC.SignedGreaterThanOrEqual) x y) x y))
       (smax ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.UnsignedGreaterThan) x y) x y))
+       (bitselect ty (icmp _ (IntCC.UnsignedGreaterThan) x y) x y))
       (umax ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.UnsignedGreaterThanOrEqual) x y) x y))
+       (bitselect ty (icmp _ (IntCC.UnsignedGreaterThanOrEqual) x y) x y))
       (umax ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.SignedLessThan) x y) x y))
+       (bitselect ty (icmp _ (IntCC.SignedLessThan) x y) x y))
       (smin ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.SignedLessThanOrEqual) x y) x y))
+       (bitselect ty (icmp _ (IntCC.SignedLessThanOrEqual) x y) x y))
       (smin ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.UnsignedLessThan) x y) x y))
+       (bitselect ty (icmp _ (IntCC.UnsignedLessThan) x y) x y))
       (umin ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.UnsignedLessThanOrEqual) x y) x y))
+       (bitselect ty (icmp _ (IntCC.UnsignedLessThanOrEqual) x y) x y))
       (umin ty x y))
 
 ;; These are the same rules as above, but when the operands for select are swapped
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.SignedLessThan) x y) y x))
+       (bitselect ty (icmp _ (IntCC.SignedLessThan) x y) y x))
       (smax ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.SignedLessThanOrEqual) x y) y x))
+       (bitselect ty (icmp _ (IntCC.SignedLessThanOrEqual) x y) y x))
       (smax ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.UnsignedLessThan) x y) y x))
+       (bitselect ty (icmp _ (IntCC.UnsignedLessThan) x y) y x))
       (umax ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.UnsignedLessThanOrEqual) x y) y x))
+       (bitselect ty (icmp _ (IntCC.UnsignedLessThanOrEqual) x y) y x))
       (umax ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.SignedGreaterThan) x y) y x))
+       (bitselect ty (icmp _ (IntCC.SignedGreaterThan) x y) y x))
       (smin ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.SignedGreaterThanOrEqual) x y) y x))
+       (bitselect ty (icmp _ (IntCC.SignedGreaterThanOrEqual) x y) y x))
       (smin ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.UnsignedGreaterThan) x y) y x))
+       (bitselect ty (icmp _ (IntCC.UnsignedGreaterThan) x y) y x))
       (umin ty x y))
 (rule (simplify
-       (vselect ty (icmp _ (IntCC.UnsignedGreaterThanOrEqual) x y) y x))
+       (bitselect ty (icmp _ (IntCC.UnsignedGreaterThanOrEqual) x y) y x))
       (umin ty x y))
 
 ;; For floats convert fcmp lt into pseudo_min and gt into pseudo_max
@@ -520,13 +520,9 @@
        (select ty (fcmp _ (FloatCC.GreaterThan) x y) x y))
       (fmax_pseudo ty x y))
 
-;; Do the same for vectors
-(rule (simplify
-       (vselect ty (fcmp _ (FloatCC.LessThan) x y) x y))
-      (fmin_pseudo ty x y))
-(rule (simplify
-       (vselect ty (fcmp _ (FloatCC.GreaterThan) x y) x y))
-      (fmax_pseudo ty x y))
+;; TODO: perform this same optimization to `f{min,max}_pseudo` for vectors
+;; with the `bitselect` instruction, but the pattern is a bit more complicated
+;; due to most bitselects-over-floats having bitcasts.
 
 ;; If both of the multiplied arguments to an `fma` are negated then remove
 ;; both of them since they cancel out.
diff --git a/cranelift/filetests/filetests/egraph/vselect.clif b/cranelift/filetests/filetests/egraph/bitselect.clif
similarity index 54%
rename from cranelift/filetests/filetests/egraph/vselect.clif
rename to cranelift/filetests/filetests/egraph/bitselect.clif
index 805f7b61cc..91797bb397 100644
--- a/cranelift/filetests/filetests/egraph/vselect.clif
+++ b/cranelift/filetests/filetests/egraph/bitselect.clif
@@ -5,10 +5,10 @@ target x86_64
 target aarch64
 target s390x
 
-function %vselect_sgt_to_smax(i32x4, i32x4) -> i32x4 {
+function %bitselect_sgt_to_smax(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
     v2 = icmp sgt v0, v1
-    v3 = vselect v2, v0, v1
+    v3 = bitselect v2, v0, v1
     return v3
 }
 
@@ -17,11 +17,11 @@ block0(v0: i32x4, v1: i32x4):
 ; check:    return v4
 
 
-; This tests an inverted vselect, where the operands are swapped.
-function %vselect_sgt_to_smax(i32x4, i32x4) -> i32x4 {
+; This tests an inverted bitselect, where the operands are swapped.
+function %bitselect_sgt_to_smax(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
     v2 = icmp sgt v0, v1
-    v3 = vselect v2, v1, v0
+    v3 = bitselect v2, v1, v0
     return v3
 }
 
@@ -31,10 +31,10 @@ block0(v0: i32x4, v1: i32x4):
 
 
 
-function %vselect_sge_to_smax(i32x4, i32x4) -> i32x4 {
+function %bitselect_sge_to_smax(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
     v2 = icmp sge v0, v1
-    v3 = vselect v2, v0, v1
+    v3 = bitselect v2, v0, v1
     return v3
 }
 
@@ -43,10 +43,10 @@ block0(v0: i32x4, v1: i32x4):
 ; check:    return v4
 
 
-function %vselect_ugt_to_umax(i32x4, i32x4) -> i32x4 {
+function %bitselect_ugt_to_umax(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
     v2 = icmp ugt v0, v1
-    v3 = vselect v2, v0, v1
+    v3 = bitselect v2, v0, v1
     return v3
 }
 
@@ -55,10 +55,10 @@ block0(v0: i32x4, v1: i32x4):
 ; check:    return v4
 
 
-function %vselect_uge_to_umax(i32x4, i32x4) -> i32x4 {
+function %bitselect_uge_to_umax(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
     v2 = icmp uge v0, v1
-    v3 = vselect v2, v0, v1
+    v3 = bitselect v2, v0, v1
     return v3
 }
 
@@ -68,10 +68,10 @@ block0(v0: i32x4, v1: i32x4):
 
 
 
-function %vselect_slt_to_smin(i32x4, i32x4) -> i32x4 {
+function %bitselect_slt_to_smin(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
     v2 = icmp slt v0, v1
-    v3 = vselect v2, v0, v1
+    v3 = bitselect v2, v0, v1
     return v3
 }
 
@@ -80,10 +80,10 @@ block0(v0: i32x4, v1: i32x4):
 ; check:    return v4
 
 
-function %vselect_sle_to_smin(i32x4, i32x4) -> i32x4 {
+function %bitselect_sle_to_smin(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
     v2 = icmp sle v0, v1
-    v3 = vselect v2, v0, v1
+    v3 = bitselect v2, v0, v1
     return v3
 }
 
@@ -92,10 +92,10 @@ block0(v0: i32x4, v1: i32x4):
 ; check:    return v4
 
 
-function %vselect_ult_to_umin(i32x4, i32x4) -> i32x4 {
+function %bitselect_ult_to_umin(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
     v2 = icmp ult v0, v1
-    v3 = vselect v2, v0, v1
+    v3 = bitselect v2, v0, v1
     return v3
 }
 
@@ -104,10 +104,10 @@ block0(v0: i32x4, v1: i32x4):
 ; check:    return v4
 
 
-function %vselect_ule_to_umin(i32x4, i32x4) -> i32x4 {
+function %bitselect_ule_to_umin(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
     v2 = icmp ule v0, v1
-    v3 = vselect v2, v0, v1
+    v3 = bitselect v2, v0, v1
     return v3
 }
 
@@ -117,38 +117,14 @@ block0(v0: i32x4, v1: i32x4):
 
 
 
-function %vselect_with_different_regs_does_not_optimize(i32x4, i32x4, i32x4, i32x4) -> i32x4 {
+function %bitselect_with_different_regs_does_not_optimize(i32x4, i32x4, i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4, v2: i32x4, v3: i32x4):
     v4 = icmp ule v0, v1
-    v5 = vselect v4, v2, v3
+    v5 = bitselect v4, v2, v3
     return v5
 }
 
 ; check: block0(v0: i32x4, v1: i32x4, v2: i32x4, v3: i32x4):
 ; check:    v4 = icmp ule v0, v1
-; check:    v5 = vselect v4, v2, v3
+; check:    v5 = bitselect v4, v2, v3
 ; check:    return v5
-
-
-
-function %vselect_fcmp_gt_to_fmax_pseudo(f32x4, f32x4) -> f32x4 {
-block0(v0: f32x4, v1: f32x4):
-    v2 = fcmp gt v0, v1
-    v3 = vselect v2, v0, v1
-    return v3
-}
-
-; check: block0(v0: f32x4, v1: f32x4):
-; check:    v4 = fmax_pseudo v0, v1
-; check:    return v4
-
-function %vselect_fcmp_lt_to_fmin_pseudo(f32x4, f32x4) -> f32x4 {
-block0(v0: f32x4, v1: f32x4):
-    v2 = fcmp lt v0, v1
-    v3 = vselect v2, v0, v1
-    return v3
-}
-
-; check: block0(v0: f32x4, v1: f32x4):
-; check:    v4 = fmin_pseudo v0, v1
-; check:    return v4
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
index 0ae0eb407c..b4449a9670 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
@@ -173,7 +173,7 @@ block0:
 
 function %vselect_i16x8(i16x8, i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8, v2: i16x8):
-    v3 = vselect v0, v1, v2
+    v3 = bitselect v0, v1, v2
     return v3
 }
 
@@ -187,9 +187,9 @@ block0(v0: i16x8, v1: i16x8, v2: i16x8):
 ;   bsl v0.16b, v1.16b, v2.16b
 ;   ret
 
-function %vselect_f32x4(i32x4, f32x4, f32x4) -> f32x4 {
-block0(v0: i32x4, v1: f32x4, v2: f32x4):
-    v3 = vselect v0, v1, v2
+function %vselect_f32x4(f32x4, f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4, v2: f32x4):
+    v3 = bitselect v0, v1, v2
     return v3
 }
 
@@ -203,9 +203,9 @@ block0(v0: i32x4, v1: f32x4, v2: f32x4):
 ;   bsl v0.16b, v1.16b, v2.16b
 ;   ret
 
-function %vselect_f64x2(i64x2, f64x2, f64x2) -> f64x2 {
-block0(v0: i64x2, v1: f64x2, v2: f64x2):
-    v3 = vselect v0, v1, v2
+function %vselect_f64x2(f64x2, f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2, v2: f64x2):
+    v3 = bitselect v0, v1, v2
     return v3
 }
 
diff --git a/cranelift/filetests/filetests/isa/s390x/vec-bitwise.clif b/cranelift/filetests/filetests/isa/s390x/vec-bitwise.clif
index 1fdbb2e64f..0a76e10711 100644
--- a/cranelift/filetests/filetests/isa/s390x/vec-bitwise.clif
+++ b/cranelift/filetests/filetests/isa/s390x/vec-bitwise.clif
@@ -514,67 +514,3 @@ block0(v0: i8x16, v1: i8x16, v2: i8x16):
 ;   vsel %v24, %v25, %v26, %v24
 ;   br %r14
 
-function %vselect_i64x2(i64x2, i64x2, i64x2) -> i64x2 {
-block0(v0: i64x2, v1: i64x2, v2: i64x2):
-  v3 = vselect.i64x2 v0, v1, v2
-  return v3
-}
-
-; VCode:
-; block0:
-;   vsel %v24, %v25, %v26, %v24
-;   br %r14
-; 
-; Disassembled:
-; block0: ; offset 0x0
-;   vsel %v24, %v25, %v26, %v24
-;   br %r14
-
-function %vselect_i32x4(i32x4, i32x4, i32x4) -> i32x4 {
-block0(v0: i32x4, v1: i32x4, v2: i32x4):
-  v3 = vselect.i32x4 v0, v1, v2
-  return v3
-}
-
-; VCode:
-; block0:
-;   vsel %v24, %v25, %v26, %v24
-;   br %r14
-; 
-; Disassembled:
-; block0: ; offset 0x0
-;   vsel %v24, %v25, %v26, %v24
-;   br %r14
-
-function %vselect_i16x8(i16x8, i16x8, i16x8) -> i16x8 {
-block0(v0: i16x8, v1: i16x8, v2: i16x8):
-  v3 = vselect.i16x8 v0, v1, v2
-  return v3
-}
-
-; VCode:
-; block0:
-;   vsel %v24, %v25, %v26, %v24
-;   br %r14
-; 
-; Disassembled:
-; block0: ; offset 0x0
-;   vsel %v24, %v25, %v26, %v24
-;   br %r14
-
-function %vselect_i8x16(i8x16, i8x16, i8x16) -> i8x16 {
-block0(v0: i8x16, v1: i8x16, v2: i8x16):
-  v3 = vselect.i8x16 v0, v1, v2
-  return v3
-}
-
-; VCode:
-; block0:
-;   vsel %v24, %v25, %v26, %v24
-;   br %r14
-; 
-; Disassembled:
-; block0: ; offset 0x0
-;   vsel %v24, %v25, %v26, %v24
-;   br %r14
-
diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-avx.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-avx.clif
index 479844fe63..4cf4352956 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-avx.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-avx.clif
@@ -433,7 +433,7 @@ block0(v0: f64x2, v1: f64x2):
 
 function %i16x8_bitselect(i16x8, i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8, v2: i16x8):
-  v3 = vselect v0, v1, v2
+  v3 = bitselect v0, v1, v2
   return v3
 }
 
@@ -441,7 +441,9 @@ block0(v0: i16x8, v1: i16x8, v2: i16x8):
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   vpblendvb %xmm0, %xmm1, %xmm0, %xmm2
+;   vpand   %xmm1, %xmm0, %xmm4
+;   vpandn  %xmm0, %xmm2, %xmm6
+;   vpor    %xmm6, %xmm4, %xmm0
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
@@ -451,14 +453,16 @@ block0(v0: i16x8, v1: i16x8, v2: i16x8):
 ;   pushq %rbp
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
-;   vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
+;   vpand %xmm0, %xmm1, %xmm4
+;   vpandn %xmm2, %xmm0, %xmm6
+;   vpor %xmm4, %xmm6, %xmm0
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq
 
-function %i32x4_bitselect(i32x4, f32x4, f32x4) -> f32x4 {
-block0(v0: i32x4, v1: f32x4, v2: f32x4):
-  v3 = vselect v0, v1, v2
+function %f32x4_bitselect(f32x4, f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4, v2: f32x4):
+  v3 = bitselect v0, v1, v2
   return v3
 }
 
@@ -466,7 +470,9 @@ block0(v0: i32x4, v1: f32x4, v2: f32x4):
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   vblendvps %xmm0, %xmm1, %xmm0, %xmm2
+;   vandps  %xmm1, %xmm0, %xmm4
+;   vandnps %xmm0, %xmm2, %xmm6
+;   vorps   %xmm6, %xmm4, %xmm0
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
@@ -476,14 +482,16 @@ block0(v0: i32x4, v1: f32x4, v2: f32x4):
 ;   pushq %rbp
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
-;   vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+;   vandps %xmm0, %xmm1, %xmm4
+;   vandnps %xmm2, %xmm0, %xmm6
+;   vorps %xmm4, %xmm6, %xmm0
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq
 
-function %i64x2_bitselect(i64x2, f64x2, f64x2) -> f64x2 {
-block0(v0: i64x2, v1: f64x2, v2: f64x2):
-  v3 = vselect v0, v1, v2
+function %f64x2_bitselect(f64x2, f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2, v2: f64x2):
+  v3 = bitselect v0, v1, v2
   return v3
 }
 
@@ -491,7 +499,9 @@ block0(v0: i64x2, v1: f64x2, v2: f64x2):
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   vblendvpd %xmm0, %xmm1, %xmm0, %xmm2
+;   vandpd  %xmm1, %xmm0, %xmm4
+;   vandnpd %xmm0, %xmm2, %xmm6
+;   vorpd   %xmm6, %xmm4, %xmm0
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
@@ -501,7 +511,9 @@ block0(v0: i64x2, v1: f64x2, v2: f64x2):
 ;   pushq %rbp
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
-;   vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+;   vandpd %xmm0, %xmm1, %xmm4
+;   vandnpd %xmm2, %xmm0, %xmm6
+;   vorpd %xmm4, %xmm6, %xmm0
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq
diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
index f63cc22313..a0e4a9c279 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
@@ -229,7 +229,7 @@ block0(v0: i32x4, v1: i32x4):
 
 function %vselect_i16x8(i16x8, i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8, v2: i16x8):
-    v3 = vselect v0, v1, v2
+    v3 = bitselect v0, v1, v2
     return v3
 }
 
@@ -237,9 +237,10 @@ block0(v0: i16x8, v1: i16x8, v2: i16x8):
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   movdqa  %xmm2, %xmm4
-;   pblendvb %xmm4, %xmm1, %xmm4
-;   movdqa  %xmm4, %xmm0
+;   movdqa  %xmm1, %xmm4
+;   pand    %xmm4, %xmm0, %xmm4
+;   pandn   %xmm0, %xmm2, %xmm0
+;   por     %xmm0, %xmm4, %xmm0
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
@@ -249,16 +250,17 @@ block0(v0: i16x8, v1: i16x8, v2: i16x8):
 ;   pushq %rbp
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
-;   movdqa %xmm2, %xmm4
-;   pblendvb %xmm0, %xmm1, %xmm4
-;   movdqa %xmm4, %xmm0
+;   movdqa %xmm1, %xmm4
+;   pand %xmm0, %xmm4
+;   pandn %xmm2, %xmm0
+;   por %xmm4, %xmm0
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq
 
-function %vselect_f32x4(i32x4, f32x4, f32x4) -> f32x4 {
-block0(v0: i32x4, v1: f32x4, v2: f32x4):
-    v3 = vselect v0, v1, v2
+function %vselect_f32x4(f32x4, f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4, v2: f32x4):
+    v3 = bitselect v0, v1, v2
     return v3
 }
 
@@ -266,9 +268,10 @@ block0(v0: i32x4, v1: f32x4, v2: f32x4):
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   movdqa  %xmm2, %xmm4
-;   blendvps %xmm4, %xmm1, %xmm4
-;   movdqa  %xmm4, %xmm0
+;   movdqa  %xmm1, %xmm4
+;   andps   %xmm4, %xmm0, %xmm4
+;   andnps  %xmm0, %xmm2, %xmm0
+;   orps    %xmm0, %xmm4, %xmm0
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
@@ -278,16 +281,17 @@ block0(v0: i32x4, v1: f32x4, v2: f32x4):
 ;   pushq %rbp
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
-;   movdqa %xmm2, %xmm4
-;   blendvps %xmm0, %xmm1, %xmm4
-;   movdqa %xmm4, %xmm0
+;   movdqa %xmm1, %xmm4
+;   andps %xmm0, %xmm4
+;   andnps %xmm2, %xmm0
+;   orps %xmm4, %xmm0
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq
 
-function %vselect_f64x2(i64x2, f64x2, f64x2) -> f64x2 {
-block0(v0: i64x2, v1: f64x2, v2: f64x2):
-    v3 = vselect v0, v1, v2
+function %vselect_f64x2(f64x2, f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2, v2: f64x2):
+    v3 = bitselect v0, v1, v2
     return v3
 }
 
@@ -295,9 +299,10 @@ block0(v0: i64x2, v1: f64x2, v2: f64x2):
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   movdqa  %xmm2, %xmm4
-;   blendvpd %xmm4, %xmm1, %xmm4
-;   movdqa  %xmm4, %xmm0
+;   movdqa  %xmm1, %xmm4
+;   andpd   %xmm4, %xmm0, %xmm4
+;   andnpd  %xmm0, %xmm2, %xmm0
+;   orpd    %xmm0, %xmm4, %xmm0
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
@@ -307,9 +312,10 @@ block0(v0: i64x2, v1: f64x2, v2: f64x2):
 ;   pushq %rbp
 ;   movq %rsp, %rbp
 ; block1: ; offset 0x4
-;   movdqa %xmm2, %xmm4
-;   blendvpd %xmm0, %xmm1, %xmm4
-;   movdqa %xmm4, %xmm0
+;   movdqa %xmm1, %xmm4
+;   andpd %xmm0, %xmm4
+;   andnpd %xmm2, %xmm0
+;   orpd %xmm4, %xmm0
 ;   movq %rbp, %rsp
 ;   popq %rbp
 ;   retq
diff --git a/cranelift/filetests/filetests/runtests/simd-vselect.clif b/cranelift/filetests/filetests/runtests/simd-vselect.clif
deleted file mode 100644
index 5d2ca1afe7..0000000000
--- a/cranelift/filetests/filetests/runtests/simd-vselect.clif
+++ /dev/null
@@ -1,82 +0,0 @@
-test interpret
-test run
-target s390x
-target aarch64
-set enable_simd
-target x86_64 has_sse3 has_ssse3 has_sse41
-
-function %vselect_i8x16() -> i8x16 {
-block0:
-    v1 = vconst.i8x16 [0 -1 0 -1 0 -1 -1 -1 -1 -1 0 0 0 0 0 0]
-    v2 = vconst.i8x16 [100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115]
-    v3 = vconst.i8x16 [200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215]
-    v4 = vselect v1, v2, v3
-    return v4
-}
-; run: %vselect_i8x16() == [200 101 202 103 204 105 106 107 108 109 210 211 212 213 214 215]
-
-function %vselect_i16x8() -> i16x8 {
-block0:
-    v1 = vconst.i16x8 [0 -1 0 -1 0 -1 -1 -1]
-    v2 = vconst.i16x8 [100 101 102 103 104 105 106 107]
-    v3 = vconst.i16x8 [200 201 202 203 204 205 206 207]
-    v4 = vselect v1, v2, v3
-    return v4
-}
-; run: %vselect_i16x8() == [200 101 202 103 204 105 106 107]
-
-function %vselect_i32x4_const() -> i32x4 {
-block0:
-    v1 = vconst.i32x4 [0 -1 0 -1]
-    v2 = vconst.i32x4 [100 101 102 103]
-    v3 = vconst.i32x4 [200 201 202 203]
-    v4 = vselect v1, v2, v3
-    return v4
-}
-; run: %vselect_i32x4_const() == [200 101 202 103]
-
-function %vselect_i32x4(i32x4, i32x4, i32x4) -> i32x4 {
-block0(v0: i32x4, v1: i32x4, v2: i32x4):
-    v3 = vselect v0, v1, v2
-    return v3
-}
-; Remember that vselect accepts: 1) the selector vector, 2) the "if true" vector, and 3) the "if false" vector.
-; run: %vselect_i32x4([-1 -1 0 0], [1 2 -1 -1], [-1 -1 3 4]) == [1 2 3 4]
-
-function %vselect_i64x2() -> i64x2 {
-block0:
-    v1 = vconst.i64x2 [0 -1]
-    v2 = vconst.i64x2 [100 101]
-    v3 = vconst.i64x2 [200 201]
-    v4 = vselect v1, v2, v3
-    return v4
-}
-; run: %vselect_i64x2() == [200 101]
-
-function %vselect_p_i8x16(i8x16, i8x16, i8x16) -> i8x16 {
-block0(v0: i8x16, v1: i8x16, v2: i8x16):
-    v3 = vselect v0, v1, v2
-    return v3
-}
-; run: %vselect_p_i8x16([-1 0 -1 -1 -1 0 0 0 -1 0 -1 -1 -1 0 0 0], [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], [17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32]) == [1 18 3 4 5 22 23 24 9 26 11 12 13 30 31 32]
-
-function %vselect_p_i16x8(i16x8, i16x8, i16x8) -> i16x8 {
-block0(v0: i16x8, v1: i16x8, v2: i16x8):
-    v3 = vselect v0, v1, v2
-    return v3
-}
-; run: %vselect_p_i16x8([-1 0 -1 -1 -1 0 0 0], [1 2 3 4 5 6 7 8], [17 18 19 20 21 22 23 24]) == [1 18 3 4 5 22 23 24]
-
-function %vselect_p_i32x4(i32x4, i32x4, i32x4) -> i32x4 {
-block0(v0: i32x4, v1: i32x4, v2: i32x4):
-    v3 = vselect v0, v1, v2
-    return v3
-}
-; run: %vselect_p_i32x4([-1 0 -1 -1], [1 2 3 4], [100000 200000 300000 400000]) == [1 200000 3 4]
-
-function %vselect_p_i64x2(i64x2, i64x2, i64x2) -> i64x2 {
-block0(v0: i64x2, v1: i64x2, v2: i64x2):
-    v3 = vselect v0, v1, v2
-    return v3
-}
-; run: %vselect_p_i64x2([-1 0], [1 2], [100000000000 200000000000]) == [1 200000000000]
diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
index 56a894fd62..51251b6d47 100644
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -603,11 +603,7 @@ where
         Opcode::Select | Opcode::SelectSpectreGuard => {
             choose(arg(0)?.into_bool()?, arg(1)?, arg(2)?)
         }
-        Opcode::Bitselect => {
-            let mask_a = Value::and(arg(0)?, arg(1)?)?;
-            let mask_b = Value::and(Value::not(arg(0)?)?, arg(2)?)?;
-            assign(Value::or(mask_a, mask_b)?)
-        }
+        Opcode::Bitselect => assign(bitselect(arg(0)?, arg(1)?, arg(2)?)?),
         Opcode::Icmp => assign(icmp(
             ctrl_ty,
             inst.cond_code().unwrap(),
@@ -623,7 +619,7 @@ where
         Opcode::Smin => {
             if ctrl_ty.is_vector() {
                 let icmp = icmp(ctrl_ty, IntCC::SignedGreaterThan, &arg(1)?, &arg(0)?)?;
-                assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?)
+                assign(bitselect(icmp, arg(0)?, arg(1)?)?)
             } else {
                 choose(Value::gt(&arg(1)?, &arg(0)?)?, arg(0)?, arg(1)?)
             }
@@ -631,7 +627,7 @@ where
         Opcode::Umin => {
             if ctrl_ty.is_vector() {
                 let icmp = icmp(ctrl_ty, IntCC::UnsignedGreaterThan, &arg(1)?, &arg(0)?)?;
-                assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?)
+                assign(bitselect(icmp, arg(0)?, arg(1)?)?)
             } else {
                 choose(
                     Value::gt(
@@ -646,7 +642,7 @@ where
         Opcode::Smax => {
             if ctrl_ty.is_vector() {
                 let icmp = icmp(ctrl_ty, IntCC::SignedGreaterThan, &arg(0)?, &arg(1)?)?;
-                assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?)
+                assign(bitselect(icmp, arg(0)?, arg(1)?)?)
             } else {
                 choose(Value::gt(&arg(0)?, &arg(1)?)?, arg(0)?, arg(1)?)
             }
@@ -654,7 +650,7 @@ where
         Opcode::Umax => {
             if ctrl_ty.is_vector() {
                 let icmp = icmp(ctrl_ty, IntCC::UnsignedGreaterThan, &arg(0)?, &arg(1)?)?;
-                assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?)
+                assign(bitselect(icmp, arg(0)?, arg(1)?)?)
             } else {
                 choose(
                     Value::gt(
@@ -1067,7 +1063,6 @@ where
             }
             assign(Value::int(result, ctrl_ty)?)
         }
-        Opcode::Vselect => assign(vselect(&arg(0)?, &arg(1)?, &arg(2)?, ctrl_ty)?),
         Opcode::VanyTrue => {
             let lane_ty = ctrl_ty.lane_type();
             let init = V::bool(false, true, lane_ty)?;
@@ -1641,20 +1636,11 @@ where
     vectorizelanes(&result, vector_type)
 }
 
-fn vselect<V>(c: &V, x: &V, y: &V, vector_type: types::Type) -> ValueResult<V>
+fn bitselect<V>(c: V, x: V, y: V) -> ValueResult<V>
 where
     V: Value,
 {
-    let c = extractlanes(c, vector_type)?;
-    let x = extractlanes(x, vector_type)?;
-    let y = extractlanes(y, vector_type)?;
-    let mut new_vec = SimdVec::new();
-    for (c, (x, y)) in c.into_iter().zip(x.into_iter().zip(y.into_iter())) {
-        if Value::eq(&c, &Value::int(0, vector_type.lane_type())?)? {
-            new_vec.push(y);
-        } else {
-            new_vec.push(x);
-        }
-    }
-    vectorizelanes(&new_vec, vector_type)
+    let mask_x = Value::and(c.clone(), x)?;
+    let mask_y = Value::and(Value::not(c)?, y)?;
+    Value::or(mask_x, mask_y)
 }
diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs
index 2262d6a06f..704a7002ab 100644
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@@ -218,6 +218,28 @@ macro_rules! binary_match {
     };
 }
 
+macro_rules! bitop {
+    ( $op:tt($arg1:expr, $arg2:expr) ) => {
+        Ok(match ($arg1, $arg2) {
+            (DataValue::I8(a), DataValue::I8(b)) => DataValue::I8(a $op b),
+            (DataValue::I16(a), DataValue::I16(b)) => DataValue::I16(a $op b),
+            (DataValue::I32(a), DataValue::I32(b)) => DataValue::I32(a $op b),
+            (DataValue::I64(a), DataValue::I64(b)) => DataValue::I64(a $op b),
+            (DataValue::I128(a), DataValue::I128(b)) => DataValue::I128(a $op b),
+            (DataValue::F32(a), DataValue::F32(b)) => DataValue::F32(a $op b),
+            (DataValue::F64(a), DataValue::F64(b)) => DataValue::F64(a $op b),
+            (DataValue::V128(a), DataValue::V128(b)) => {
+                let mut a2 = a.clone();
+                for (a, b) in a2.iter_mut().zip(b.iter()) {
+                    *a = *a $op *b;
+                }
+                DataValue::V128(a2)
+            }
+            _ => unimplemented!(),
+        })
+    };
+}
+
 impl Value for DataValue {
     fn ty(&self) -> Type {
         self.ty()
@@ -686,19 +708,35 @@ impl Value for DataValue {
     }
 
     fn and(self, other: Self) -> ValueResult<Self> {
-        binary_match!(&(self, other); [I8, I16, I32, I64, I128, F32, F64])
+        bitop!(&(self, other))
     }
 
     fn or(self, other: Self) -> ValueResult<Self> {
-        binary_match!(|(self, other); [I8, I16, I32, I64, I128, F32, F64])
+        bitop!(|(self, other))
     }
 
     fn xor(self, other: Self) -> ValueResult<Self> {
-        binary_match!(^(self, other); [I8, I16, I32, I64, I128, F32, F64])
+        bitop!(^(self, other))
     }
 
     fn not(self) -> ValueResult<Self> {
-        unary_match!(!(self); [I8, I16, I32, I64, I128, F32, F64])
+        Ok(match self {
+            DataValue::I8(a) => DataValue::I8(!a),
+            DataValue::I16(a) => DataValue::I16(!a),
+            DataValue::I32(a) => DataValue::I32(!a),
+            DataValue::I64(a) => DataValue::I64(!a),
+            DataValue::I128(a) => DataValue::I128(!a),
+            DataValue::F32(a) => DataValue::F32(!a),
+            DataValue::F64(a) => DataValue::F64(!a),
+            DataValue::V128(a) => {
+                let mut a2 = a.clone();
+                for a in a2.iter_mut() {
+                    *a = !*a;
+                }
+                DataValue::V128(a2)
+            }
+            _ => unimplemented!(),
+        })
     }
 
     fn count_ones(self) -> ValueResult<Self> {