diff --git a/build.rs b/build.rs
index 2baea0ab4f..c331648114 100644
--- a/build.rs
+++ b/build.rs
@@ -183,6 +183,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
             ("simd", "simd_address") => return false,
             ("simd", "simd_align") => return false,
             ("simd", "simd_bitwise") => return false,
+            ("simd", "simd_bit_shift") => return false,
             ("simd", "simd_boolean") => return false,
             ("simd", "simd_f32x4_cmp") => return false,
             ("simd", "simd_f64x2_cmp") => return false,
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
index bafe42abd0..3d08d524b4 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1352,6 +1352,8 @@ impl MachInstEmit for Inst {
                         debug_assert_ne!(I64X2, ty);
                         (0b010_01110_00_1 | enc_size << 1, 0b100111)
                     }
+                    VecALUOp::Sshl => (0b010_01110_00_1 | enc_size << 1, 0b010001),
+                    VecALUOp::Ushl => (0b011_01110_00_1 | enc_size << 1, 0b010001),
                 };
                 sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
             }
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
index 16bec07ac3..2656c0ccfe 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -2473,6 +2473,102 @@ fn test_aarch64_binemit() {
         "mul v18.4s, v18.4s, v18.4s",
     ));
 
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Ushl,
+            rd: writable_vreg(18),
+            rn: vreg(18),
+            rm: vreg(18),
+            ty: I8X16,
+        },
+        "5246326E",
+        "ushl v18.16b, v18.16b, v18.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Ushl,
+            rd: writable_vreg(18),
+            rn: vreg(18),
+            rm: vreg(18),
+            ty: I16X8,
+        },
+        "5246726E",
+        "ushl v18.8h, v18.8h, v18.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Ushl,
+            rd: writable_vreg(18),
+            rn: vreg(1),
+            rm: vreg(21),
+            ty: I32X4,
+        },
+        "3244B56E",
+        "ushl v18.4s, v1.4s, v21.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Ushl,
+            rd: writable_vreg(5),
+            rn: vreg(7),
+            rm: vreg(19),
+            ty: I64X2,
+        },
+        "E544F36E",
+        "ushl v5.2d, v7.2d, v19.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sshl,
+            rd: writable_vreg(18),
+            rn: vreg(18),
+            rm: vreg(18),
+            ty: I8X16,
+        },
+        "5246324E",
+        "sshl v18.16b, v18.16b, v18.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sshl,
+            rd: writable_vreg(30),
+            rn: vreg(1),
+            rm: vreg(29),
+            ty: I16X8,
+        },
+        "3E447D4E",
+        "sshl v30.8h, v1.8h, v29.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sshl,
+            rd: writable_vreg(8),
+            rn: vreg(22),
+            rm: vreg(21),
+            ty: I32X4,
+        },
+        "C846B54E",
+        "sshl v8.4s, v22.4s, v21.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Sshl,
+            rd: writable_vreg(8),
+            rn: vreg(22),
+            rm: vreg(2),
+            ty: I64X2,
+        },
+        "C846E24E",
+        "sshl v8.2d, v22.2d, v2.2d",
+    ));
+
     insns.push((
         Inst::VecMisc {
             op: VecMisc2::Not,
diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
index 33f0c1604c..3f1f849336 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -249,6 +249,10 @@ pub enum VecALUOp {
     Sub,
     /// Multiply
     Mul,
+    /// Signed shift left
+    Sshl,
+    /// Unsigned shift left
+    Ushl,
 }
 
 /// A Vector miscellaneous operation with two registers.
@@ -2750,6 +2754,8 @@ impl ShowWithRRU for Inst {
                     VecALUOp::Add => ("add", true, ty),
                     VecALUOp::Sub => ("sub", true, ty),
                     VecALUOp::Mul => ("mul", true, ty),
+                    VecALUOp::Sshl => ("sshl", true, ty),
+                    VecALUOp::Ushl => ("ushl", true, ty),
                 };
 
                 let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector {
diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
index ba8210b875..664f2729a3 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -484,24 +484,60 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
         Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
             let ty = ty.unwrap();
             let size = InstSize::from_bits(ty_bits(ty));
-            let narrow_mode = match (op, size) {
-                (Opcode::Ishl, _) => NarrowValueMode::None,
-                (Opcode::Ushr, InstSize::Size64) => NarrowValueMode::ZeroExtend64,
-                (Opcode::Ushr, InstSize::Size32) => NarrowValueMode::ZeroExtend32,
-                (Opcode::Sshr, InstSize::Size64) => NarrowValueMode::SignExtend64,
-                (Opcode::Sshr, InstSize::Size32) => NarrowValueMode::SignExtend32,
-                _ => unreachable!(),
-            };
             let rd = get_output_reg(ctx, outputs[0]);
-            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
-            let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty));
-            let alu_op = match op {
-                Opcode::Ishl => choose_32_64(ty, ALUOp::Lsl32, ALUOp::Lsl64),
-                Opcode::Ushr => choose_32_64(ty, ALUOp::Lsr32, ALUOp::Lsr64),
-                Opcode::Sshr => choose_32_64(ty, ALUOp::Asr32, ALUOp::Asr64),
-                _ => unreachable!(),
-            };
-            ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm));
+            if ty_bits(ty) < 128 {
+                let narrow_mode = match (op, size) {
+                    (Opcode::Ishl, _) => NarrowValueMode::None,
+                    (Opcode::Ushr, InstSize::Size64) => NarrowValueMode::ZeroExtend64,
+                    (Opcode::Ushr, InstSize::Size32) => NarrowValueMode::ZeroExtend32,
+                    (Opcode::Sshr, InstSize::Size64) => NarrowValueMode::SignExtend64,
+                    (Opcode::Sshr, InstSize::Size32) => NarrowValueMode::SignExtend32,
+                    _ => unreachable!(),
+                };
+                let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+                let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty));
+                let alu_op = match op {
+                    Opcode::Ishl => choose_32_64(ty, ALUOp::Lsl32, ALUOp::Lsl64),
+                    Opcode::Ushr => choose_32_64(ty, ALUOp::Lsr32, ALUOp::Lsr64),
+                    Opcode::Sshr => choose_32_64(ty, ALUOp::Asr32, ALUOp::Asr64),
+                    _ => unreachable!(),
+                };
+                ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm));
+            } else {
+                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+
+                let (alu_op, is_right_shift) = match op {
+                    Opcode::Ishl => (VecALUOp::Sshl, false),
+                    Opcode::Ushr => (VecALUOp::Ushl, true),
+                    Opcode::Sshr => (VecALUOp::Sshl, true),
+                    _ => unreachable!(),
+                };
+
+                let rm = if is_right_shift {
+                    // Right shifts are implemented with a negative left shift.
+                    let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+                    let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
+                    let rn = zero_reg();
+                    ctx.emit(alu_inst_imm12(ALUOp::Sub32, tmp, rn, rm));
+                    tmp.to_reg()
+                } else {
+                    put_input_in_reg(ctx, inputs[1], NarrowValueMode::None)
+                };
+
+                ctx.emit(Inst::VecDup {
+                    rd,
+                    rn: rm,
+                    ty: ty.lane_type(),
+                });
+
+                ctx.emit(Inst::VecRRR {
+                    alu_op,
+                    rd,
+                    rn,
+                    rm: rd.to_reg(),
+                    ty,
+                });
+            }
         }
 
         Opcode::Rotr | Opcode::Rotl => {