diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs
index a029e8170f..f7f8964905 100644
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -521,6 +521,7 @@ pub(crate) fn define<'defs>(
     let x86_pinsr = x86.by_name("x86_pinsr");
     let x86_pshufd = x86.by_name("x86_pshufd");
     let x86_pshufb = x86.by_name("x86_pshufb");
+    let x86_psll = x86.by_name("x86_psll");
     let x86_push = x86.by_name("x86_push");
     let x86_sdivmodx = x86.by_name("x86_sdivmodx");
     let x86_smulx = x86.by_name("x86_smulx");
@@ -1988,6 +1989,26 @@ pub(crate) fn define<'defs>(
         e.enc_32_64(bxor, rec_fa.opcodes(&PXOR));
     }
 
+    // SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement
+    // allows SIMD shifts to be legalized more easily. TODO ideally this would be typed as an
+    // I128x1 but restrictions on the type builder prevent this; the general idea here is that
+    // the upper bits are all zeroed and do not form parts of any separate lane. See
+    // https://github.com/CraneStation/cranelift/issues/1146.
+    e.enc_both(
+        bitcast.bind(vector(I64, sse_vector_size)).bind(I32),
+        rec_frurm.opcodes(&MOVD_LOAD_XMM),
+    );
+    e.enc64(
+        bitcast.bind(vector(I64, sse_vector_size)).bind(I64),
+        rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(),
+    );
+
+    // SIMD shift left
+    for (ty, opcodes) in &[(I16, &PSLLW), (I32, &PSLLD), (I64, &PSLLQ)] {
+        let x86_psll = x86_psll.bind(vector(*ty, sse_vector_size));
+        e.enc_32_64(x86_psll, rec_fa.opcodes(*opcodes));
+    }
+
     // SIMD icmp using PCMPEQ*
     for ty in ValueType::all_lane_types().filter(|t| t.is_int() && allowed_simd_type(t)) {
         let (opcodes, isa_predicate): (&[_], _) = match ty.lane_bits() {
diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs
index b9f2496a85..c8839e78a8 100644
--- a/cranelift/codegen/meta/src/isa/x86/instructions.rs
+++ b/cranelift/codegen/meta/src/isa/x86/instructions.rs
@@ -387,5 +387,40 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
+    let IxN = &TypeVar::new(
+        "IxN",
+        "A SIMD vector type containing integers",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .simd_lanes(Interval::All)
+            .includes_scalars(false)
+            .build(),
+    );
+    let I64x2 = &TypeVar::new(
+        "I64x2",
+        "A SIMD vector type containing one large integer (the upper lane is concatenated with \
+         the lower lane to form the integer)",
+        TypeSetBuilder::new()
+            .ints(64..64)
+            .simd_lanes(2..2)
+            .includes_scalars(false)
+            .build(),
+    );
+    let x = &operand_doc("x", IxN, "Vector value to shift");
+    let y = &operand_doc("y", I64x2, "Number of bits to shift");
+    let a = &operand("a", IxN);
+    ig.push(
+        Inst::new(
+            "x86_psll",
+            r#"
+        Shift Packed Data Left Logical -- This implements the behavior of the shared instruction 
+        ``ishl`` but alters the shift operand to live in an XMM register as expected by the PSSL*
+        family of instructions.
+        "#,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
     ig.build()
 }
diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs
index 04951c3d5b..8b71bfd637 100644
--- a/cranelift/codegen/meta/src/isa/x86/legalize.rs
+++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs
@@ -3,7 +3,7 @@ use crate::cdsl::instructions::{vector, Bindable, InstructionGroup};
 use crate::cdsl::types::{LaneType, ValueType};
 use crate::cdsl::xform::TransformGroupBuilder;
 use crate::shared::types::Float::F64;
-use crate::shared::types::Int::{I32, I64};
+use crate::shared::types::Int::{I16, I32, I64};
 use crate::shared::Definitions as SharedDefinitions;
 
 pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
@@ -20,6 +20,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
     // List of instructions.
     let insts = &shared.instructions;
     let band = insts.by_name("band");
+    let bitcast = insts.by_name("bitcast");
     let bor = insts.by_name("bor");
     let bnot = insts.by_name("bnot");
     let bxor = insts.by_name("bxor");
@@ -40,6 +41,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
     let imul = insts.by_name("imul");
     let ineg = insts.by_name("ineg");
     let insertlane = insts.by_name("insertlane");
+    let ishl = insts.by_name("ishl");
     let isub = insts.by_name("isub");
     let popcnt = insts.by_name("popcnt");
     let raw_bitcast = insts.by_name("raw_bitcast");
@@ -60,6 +62,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
     let x86_bsr = x86_instructions.by_name("x86_bsr");
     let x86_pshufb = x86_instructions.by_name("x86_pshufb");
     let x86_pshufd = x86_instructions.by_name("x86_pshufd");
+    let x86_psll = x86_instructions.by_name("x86_psll");
     let x86_umulx = x86_instructions.by_name("x86_umulx");
     let x86_smulx = x86_instructions.by_name("x86_smulx");
 
@@ -394,6 +397,16 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
         );
     }
 
+    // SIMD shift left
+    for ty in &[I16, I32, I64] {
+        let ishl = ishl.bind(vector(*ty, sse_vector_size));
+        let bitcast = bitcast.bind(vector(I64, sse_vector_size));
+        narrow.legalize(
+            def!(a = ishl(x, y)),
+            vec![def!(b = bitcast(y)), def!(a = x86_psll(x, b))],
+        );
+    }
+
     narrow.custom_legalize(shuffle, "convert_shuffle");
     narrow.custom_legalize(extractlane, "convert_extractlane");
     narrow.custom_legalize(insertlane, "convert_insertlane");
diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs
index b7f223eb27..6e3859d848 100644
--- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs
+++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs
@@ -320,6 +320,15 @@ pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00];
 /// store the result in xmm1 (SSE2).
 pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70];
 
+/// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1];
+
+/// Shift doublewords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSLLD: [u8; 3] = [0x66, 0x0f, 0xf2];
+
+/// Shift quadwords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSLLQ: [u8; 3] = [0x66, 0x0f, 0xf3];
+
 /// Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1 (SSE2).
 pub static PSUBB: [u8; 3] = [0x66, 0x0f, 0xf8];
 
diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif
new file mode 100644
index 0000000000..5cfb4375d7
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif
@@ -0,0 +1,21 @@
+test binemit
+set enable_simd
+target x86_64 skylake
+
+function %ishl_i16x8(i16x8, i64x2) -> i16x8 {
+ebb0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]):
+[-, %xmm2]  v2 = x86_psll v0, v1     ; bin: 66 0f f1 d1
+            return v2
+}
+
+function %ishl_i32x4(i32x4, i64x2) -> i32x4 {
+ebb0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]):
+[-, %xmm4]  v2 = x86_psll v0, v1      ; bin: 66 0f f2 e0
+            return v2
+}
+
+function %ishl_i64x2(i64x2, i64x2) -> i64x2 {
+ebb0(v0: i64x2 [%xmm6], v1: i64x2 [%xmm3]):
+[-, %xmm6]  v2 = x86_psll v0, v1      ; bin: 66 0f f3 f3
+            return v2
+}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif
new file mode 100644
index 0000000000..5c2893950d
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif
@@ -0,0 +1,13 @@
+test legalizer
+set enable_simd
+target x86_64 skylake
+
+function %ishl_i32x4() -> i32x4 {
+ebb0:
+    v0 = iconst.i32 1
+    v1 = vconst.i32x4 [1 2 4 8]
+    v2 = ishl v1, v0
+    ; check: v3 = bitcast.i64x2 v0
+    ; nextln: v2 = x86_psll v1, v3
+    return v2
+}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif
new file mode 100644
index 0000000000..224b3d5470
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif
@@ -0,0 +1,39 @@
+test run
+set enable_simd
+target x86_64 skylake
+
+; TODO: once available, replace all lane extraction with `icmp + all_ones`
+
+function %ishl_i32x4() -> b1 {
+ebb0:
+    v0 = iconst.i32 1
+    v1 = vconst.i32x4 [1 2 4 8]
+    v2 = ishl v1, v0
+
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, 2
+
+    v5 = extractlane v2, 3
+    v6 = icmp_imm eq v5, 16
+
+    v7 = band v4, v6
+    return v7
+}
+; run
+
+function %ishl_too_large_i16x8() -> b1 {
+ebb0:
+    v0 = iconst.i32 17 ; note that this will shift off the end of each lane
+    v1 = vconst.i16x8 [1 2 4 8 16 32 64 128]
+    v2 = ishl v1, v0
+
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, 0
+
+    v5 = extractlane v2, 3
+    v6 = icmp_imm eq v5, 0
+
+    v7 = band v4, v6
+    return v7
+}
+; run