From 33dba07e6ba39933d46c5fc71b3f479b6a108634 Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Fri, 19 Nov 2021 08:43:59 -0800
Subject: [PATCH] aarch64: Migrate `imul` to ISLE

This commit migrates the `imul` clif instruction lowering for AArch64 to
ISLE. This is a relatively complicated instruction with lots of special
cases due to the simd proposal for wasm. Like x64, however, the special
casing lends itself to ISLE quite well and the lowerings here in theory
are pretty straightforward.

The main gotcha of this commit is that this encounters a unique
situation which hasn't been encountered yet with other lowerings, namely
the `Umlal32` instruction used in the implementation of `i64x2.mul` is
unique in the `VecRRRLongOp` class of instructions in that it both reads
and writes the destination register (`use_mod` instead of simply
`use_def`). This meant that I needed to add another helper in ISLe for
creating a `vec_rrrr_long` instruction (despite this enum variant not
actually existing) which implicitly moves the first operand into the
destination before issuing the actual `VecRRRLong` instruction.
---
 cranelift/codegen/src/isa/aarch64/inst.isle   |  32 +
 cranelift/codegen/src/isa/aarch64/lower.isle  | 180 +++++
 cranelift/codegen/src/isa/aarch64/lower.rs    | 147 ----
 .../lower/isle/generated_code.manifest        |   6 +-
 .../isa/aarch64/lower/isle/generated_code.rs  | 687 +++++++++++++++++-
 .../codegen/src/isa/aarch64/lower_inst.rs     |  97 +--
 .../x64/lower/isle/generated_code.manifest    |   2 +-
 .../src/isa/x64/lower/isle/generated_code.rs  |  11 +-
 cranelift/codegen/src/machinst/isle.rs        |   8 +
 cranelift/codegen/src/prelude.isle            |   4 +
 10 files changed, 913 insertions(+), 261 deletions(-)

diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle
index ba99c03b10..12198d8db0 100644
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -1455,6 +1455,38 @@
             (_ Unit (emit (MInst.VecMisc op dst src size))))
         (writable_reg_to_reg dst)))
 
+;; Helper for emitting `MInst.VecRRRLong` instructions.
+(decl vec_rrr_long (VecRRRLongOp Reg Reg bool) Reg)
+(rule (vec_rrr_long op src1 src2 high_half)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.VecRRRLong op dst src1 src2 high_half))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for emitting `MInst.VecRRRLong` instructions, but for variants
+;; where the operation both reads and modifies the destination register.
+;;
+;; Currently this is only used for `VecRRRLongOp.Umlal*`
+(decl vec_rrrr_long (VecRRRLongOp Reg Reg Reg bool) Reg)
+(rule (vec_rrrr_long op src1 src2 src3 high_half)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_1 Unit (emit (MInst.FpuMove128 dst src1)))
+            (_2 Unit (emit (MInst.VecRRRLong op dst src2 src3 high_half))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for emitting `MInst.VecRRNarrow` instructions.
+(decl vec_rr_narrow (VecRRNarrowOp Reg bool) Reg)
+(rule (vec_rr_narrow op src high_half)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.VecRRNarrow op dst src high_half))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for emitting `MInst.VecRRLong` instructions.
+(decl vec_rr_long (VecRRLongOp Reg bool) Reg)
+(rule (vec_rr_long op src high_half)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.VecRRLong op dst src high_half))))
+        (writable_reg_to_reg dst)))
+
 ;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (decl imm (Type u64) Reg)
diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle
index de548231b1..5dce2a3741 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -189,3 +189,183 @@
 ;; vectors.
 (rule (lower (has_type (vec128 ty) (ineg x)))
       (value_reg (vec_misc (VecMisc2.Neg) (put_in_reg x) (vector_size ty))))
+
+;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `i64` and smaller.
+(rule (lower (has_type (fits_in_64 ty) (imul x y)))
+      (value_reg (alu_rrrr (madd_op ty) (put_in_reg x) (put_in_reg y) (zero_reg))))
+
+;; `i128`.
+(rule (lower (has_type $I128 (imul x y)))
+      (let (
+          ;; Get the high/low registers for `x`.
+          (x_regs ValueRegs (put_in_regs x))
+          (x_lo Reg (value_regs_get x_regs 0))
+          (x_hi Reg (value_regs_get x_regs 1))
+
+          ;; Get the high/low registers for `y`.
+          (y_regs ValueRegs (put_in_regs y))
+          (y_lo Reg (value_regs_get y_regs 0))
+          (y_hi Reg (value_regs_get y_regs 1))
+
+          ;; 128bit mul formula:
+          ;;   dst_lo = x_lo * y_lo
+          ;;   dst_hi = umulhi(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo)
+          ;;
+          ;; We can convert the above formula into the following
+          ;; umulh   dst_hi, x_lo, y_lo
+          ;; madd    dst_hi, x_lo, y_hi, dst_hi
+          ;; madd    dst_hi, x_hi, y_lo, dst_hi
+          ;; madd    dst_lo, x_lo, y_lo, zero
+          (dst_hi1 Reg (alu_rrr (ALUOp.UMulH) x_lo y_lo))
+          (dst_hi2 Reg (alu_rrrr (ALUOp3.MAdd64) x_lo y_hi dst_hi1))
+          (dst_hi Reg (alu_rrrr (ALUOp3.MAdd64) x_hi y_lo dst_hi2))
+          (dst_lo Reg (alu_rrrr (ALUOp3.MAdd64) x_lo y_lo (zero_reg)))
+        )
+        (value_regs dst_lo dst_hi)))
+
+;; Case for i8x16, i16x8, and i32x4.
+(rule (lower (has_type (vec128 ty @ (not_i64x2)) (imul x y)))
+      (value_reg (vec_rrr (VecALUOp.Mul) (put_in_reg x) (put_in_reg y) (vector_size ty))))
+
+;; Special lowering for i64x2.
+;;
+;; This I64X2 multiplication is performed with several 32-bit
+;; operations.
+;;
+;; 64-bit numbers x and y, can be represented as:
+;;   x = a + 2^32(b)
+;;   y = c + 2^32(d)
+;;
+;; A 64-bit multiplication is:
+;;   x * y = ac + 2^32(ad + bc) + 2^64(bd)
+;; note: `2^64(bd)` can be ignored, the value is too large to fit in
+;; 64 bits.
+;;
+;; This sequence implements a I64X2 multiply, where the registers
+;; `rn` and `rm` are split up into 32-bit components:
+;;   rn = |d|c|b|a|
+;;   rm = |h|g|f|e|
+;;
+;;   rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
+;;
+;;  The sequence is:
+;;  rev64 rd.4s, rm.4s
+;;  mul rd.4s, rd.4s, rn.4s
+;;  xtn tmp1.2s, rn.2d
+;;  addp rd.4s, rd.4s, rd.4s
+;;  xtn tmp2.2s, rm.2d
+;;  shll rd.2d, rd.2s, #32
+;;  umlal rd.2d, tmp2.2s, tmp1.2s
+(rule (lower (has_type $I64X2 (imul x y)))
+      (let (
+          (rn Reg (put_in_reg x))
+          (rm Reg (put_in_reg y))
+          ;; Reverse the 32-bit elements in the 64-bit words.
+          ;;   rd = |g|h|e|f|
+          (rev Reg (vec_misc (VecMisc2.Rev64) rm (VectorSize.Size32x4)))
+
+          ;; Calculate the high half components.
+          ;;   rd = |dg|ch|be|af|
+          ;;
+          ;; Note that this 32-bit multiply of the high half
+          ;; discards the bits that would overflow, same as
+          ;; if 64-bit operations were used. Also the Shll
+          ;; below would shift out the overflow bits anyway.
+          (mul Reg (vec_rrr (VecALUOp.Mul) rev rn (VectorSize.Size32x4)))
+
+          ;; Extract the low half components of rn.
+          ;;   tmp1 = |c|a|
+          (tmp1 Reg (vec_rr_narrow (VecRRNarrowOp.Xtn64) rn $false))
+
+          ;; Sum the respective high half components.
+          ;;   rd = |dg+ch|be+af||dg+ch|be+af|
+          (sum Reg (vec_rrr (VecALUOp.Addp) mul mul (VectorSize.Size32x4)))
+
+          ;; Extract the low half components of rm.
+          ;;   tmp2 = |g|e|
+          (tmp2 Reg (vec_rr_narrow (VecRRNarrowOp.Xtn64) rm $false))
+
+          ;; Shift the high half components, into the high half.
+          ;;   rd = |dg+ch << 32|be+af << 32|
+          (shift Reg (vec_rr_long (VecRRLongOp.Shll32) sum $false))
+
+          ;; Multiply the low components together, and accumulate with the high
+          ;; half.
+          ;;   rd = |rd[1] + cg|rd[0] + ae|
+          (result Reg (vec_rrrr_long (VecRRRLongOp.Umlal32) shift tmp2 tmp1 $false))
+        )
+        (value_reg result)))
+
+;; Special case for `i16x8.extmul_low_i8x16_s`.
+(rule (lower (has_type $I16X8
+                       (imul (def_inst (swiden_low x @ (value_type $I8X16)))
+                             (def_inst (swiden_low y @ (value_type $I8X16))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Smull8) (put_in_reg x) (put_in_reg y) $false)))
+
+;; Special case for `i16x8.extmul_high_i8x16_s`.
+(rule (lower (has_type $I16X8
+                       (imul (def_inst (swiden_high x @ (value_type $I8X16)))
+                             (def_inst (swiden_high y @ (value_type $I8X16))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Smull8) (put_in_reg x) (put_in_reg y) $true)))
+
+;; Special case for `i16x8.extmul_low_i8x16_u`.
+(rule (lower (has_type $I16X8
+                       (imul (def_inst (uwiden_low x @ (value_type $I8X16)))
+                             (def_inst (uwiden_low y @ (value_type $I8X16))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Umull8) (put_in_reg x) (put_in_reg y) $false)))
+
+;; Special case for `i16x8.extmul_high_i8x16_u`.
+(rule (lower (has_type $I16X8
+                       (imul (def_inst (uwiden_high x @ (value_type $I8X16)))
+                             (def_inst (uwiden_high y @ (value_type $I8X16))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Umull8) (put_in_reg x) (put_in_reg y) $true)))
+
+;; Special case for `i32x4.extmul_low_i16x8_s`.
+(rule (lower (has_type $I32X4
+                       (imul (def_inst (swiden_low x @ (value_type $I16X8)))
+                             (def_inst (swiden_low y @ (value_type $I16X8))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Smull16) (put_in_reg x) (put_in_reg y) $false)))
+
+;; Special case for `i32x4.extmul_high_i16x8_s`.
+(rule (lower (has_type $I32X4
+                       (imul (def_inst (swiden_high x @ (value_type $I16X8)))
+                             (def_inst (swiden_high y @ (value_type $I16X8))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Smull16) (put_in_reg x) (put_in_reg y) $true)))
+
+;; Special case for `i32x4.extmul_low_i16x8_u`.
+(rule (lower (has_type $I32X4
+                       (imul (def_inst (uwiden_low x @ (value_type $I16X8)))
+                             (def_inst (uwiden_low y @ (value_type $I16X8))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Umull16) (put_in_reg x) (put_in_reg y) $false)))
+
+;; Special case for `i32x4.extmul_high_i16x8_u`.
+(rule (lower (has_type $I32X4
+                       (imul (def_inst (uwiden_high x @ (value_type $I16X8)))
+                             (def_inst (uwiden_high y @ (value_type $I16X8))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Umull16) (put_in_reg x) (put_in_reg y) $true)))
+
+;; Special case for `i64x2.extmul_low_i32x4_s`.
+(rule (lower (has_type $I64X2
+                       (imul (def_inst (swiden_low x @ (value_type $I32X4)))
+                             (def_inst (swiden_low y @ (value_type $I32X4))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Smull32) (put_in_reg x) (put_in_reg y) $false)))
+
+;; Special case for `i64x2.extmul_high_i32x4_s`.
+(rule (lower (has_type $I64X2
+                       (imul (def_inst (swiden_high x @ (value_type $I32X4)))
+                             (def_inst (swiden_high y @ (value_type $I32X4))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Smull32) (put_in_reg x) (put_in_reg y) $true)))
+
+;; Special case for `i64x2.extmul_low_i32x4_u`.
+(rule (lower (has_type $I64X2
+                       (imul (def_inst (uwiden_low x @ (value_type $I32X4)))
+                             (def_inst (uwiden_low y @ (value_type $I32X4))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Umull32) (put_in_reg x) (put_in_reg y) $false)))
+
+;; Special case for `i64x2.extmul_high_i32x4_u`.
+(rule (lower (has_type $I64X2
+                       (imul (def_inst (uwiden_high x @ (value_type $I32X4)))
+                             (def_inst (uwiden_high y @ (value_type $I32X4))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Umull32) (put_in_reg x) (put_in_reg y) $true)))
diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs
index 7d6a10b537..ac185daa2d 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -1285,153 +1285,6 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
     None
 }
 
-/// Pattern match an extending vector multiplication.
-/// Returns a tuple of the opcode to use, the two input registers and whether
-/// it's the 'high half' version of the instruction.
-pub(crate) fn match_vec_long_mul<C: LowerCtx<I = Inst>>(
-    c: &mut C,
-    insn: IRInst,
-    ext_op: Opcode,
-) -> Option<(VecRRRLongOp, regalloc::Reg, regalloc::Reg, bool)> {
-    let inputs = insn_inputs(c, insn);
-    if let Some(lhs) = maybe_input_insn(c, inputs[0], ext_op) {
-        if let Some(rhs) = maybe_input_insn(c, inputs[1], ext_op) {
-            let lhs_input = insn_inputs(c, lhs)[0];
-            let rhs_input = insn_inputs(c, rhs)[0];
-            let rn = put_input_in_reg(c, lhs_input, NarrowValueMode::None);
-            let rm = put_input_in_reg(c, rhs_input, NarrowValueMode::None);
-            let lane_type = c.output_ty(insn, 0).lane_type();
-            match (lane_type, ext_op) {
-                (I16, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull8, rn, rm, false)),
-                (I16, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull8, rn, rm, true)),
-                (I16, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull8, rn, rm, false)),
-                (I16, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull8, rn, rm, true)),
-                (I32, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull16, rn, rm, false)),
-                (I32, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull16, rn, rm, true)),
-                (I32, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull16, rn, rm, false)),
-                (I32, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull16, rn, rm, true)),
-                (I64, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull32, rn, rm, false)),
-                (I64, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull32, rn, rm, true)),
-                (I64, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull32, rn, rm, false)),
-                (I64, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull32, rn, rm, true)),
-                _ => {}
-            };
-        }
-    }
-    None
-}
-
-pub(crate) fn lower_i64x2_mul<C: LowerCtx<I = Inst>>(c: &mut C, insn: IRInst) {
-    let inputs = insn_inputs(c, insn);
-    let outputs = insn_outputs(c, insn);
-    let rd = get_output_reg(c, outputs[0]).regs()[0];
-    let rn = put_input_in_regs(c, inputs[0]).regs()[0];
-    let rm = put_input_in_regs(c, inputs[1]).regs()[0];
-
-    let tmp1 = c.alloc_tmp(I64X2).only_reg().unwrap();
-    let tmp2 = c.alloc_tmp(I64X2).only_reg().unwrap();
-
-    // This I64X2 multiplication is performed with several 32-bit
-    // operations.
-
-    // 64-bit numbers x and y, can be represented as:
-    //   x = a + 2^32(b)
-    //   y = c + 2^32(d)
-
-    // A 64-bit multiplication is:
-    //   x * y = ac + 2^32(ad + bc) + 2^64(bd)
-    // note: `2^64(bd)` can be ignored, the value is too large to fit in
-    // 64 bits.
-
-    // This sequence implements a I64X2 multiply, where the registers
-    // `rn` and `rm` are split up into 32-bit components:
-    //   rn = |d|c|b|a|
-    //   rm = |h|g|f|e|
-    //
-    //   rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
-    //
-    //  The sequence is:
-    //  rev64 rd.4s, rm.4s
-    //  mul rd.4s, rd.4s, rn.4s
-    //  xtn tmp1.2s, rn.2d
-    //  addp rd.4s, rd.4s, rd.4s
-    //  xtn tmp2.2s, rm.2d
-    //  shll rd.2d, rd.2s, #32
-    //  umlal rd.2d, tmp2.2s, tmp1.2s
-
-    // Reverse the 32-bit elements in the 64-bit words.
-    //   rd = |g|h|e|f|
-    c.emit(Inst::VecMisc {
-        op: VecMisc2::Rev64,
-        rd,
-        rn: rm,
-        size: VectorSize::Size32x4,
-    });
-
-    // Calculate the high half components.
-    //   rd = |dg|ch|be|af|
-    //
-    // Note that this 32-bit multiply of the high half
-    // discards the bits that would overflow, same as
-    // if 64-bit operations were used. Also the Shll
-    // below would shift out the overflow bits anyway.
-    c.emit(Inst::VecRRR {
-        alu_op: VecALUOp::Mul,
-        rd,
-        rn: rd.to_reg(),
-        rm: rn,
-        size: VectorSize::Size32x4,
-    });
-
-    // Extract the low half components of rn.
-    //   tmp1 = |c|a|
-    c.emit(Inst::VecRRNarrow {
-        op: VecRRNarrowOp::Xtn64,
-        rd: tmp1,
-        rn,
-        high_half: false,
-    });
-
-    // Sum the respective high half components.
-    //   rd = |dg+ch|be+af||dg+ch|be+af|
-    c.emit(Inst::VecRRR {
-        alu_op: VecALUOp::Addp,
-        rd,
-        rn: rd.to_reg(),
-        rm: rd.to_reg(),
-        size: VectorSize::Size32x4,
-    });
-
-    // Extract the low half components of rm.
-    //   tmp2 = |g|e|
-    c.emit(Inst::VecRRNarrow {
-        op: VecRRNarrowOp::Xtn64,
-        rd: tmp2,
-        rn: rm,
-        high_half: false,
-    });
-
-    // Shift the high half components, into the high half.
-    //   rd = |dg+ch << 32|be+af << 32|
-    c.emit(Inst::VecRRLong {
-        op: VecRRLongOp::Shll32,
-        rd,
-        rn: rd.to_reg(),
-        high_half: false,
-    });
-
-    // Multiply the low components together, and accumulate with the high
-    // half.
-    //   rd = |rd[1] + cg|rd[0] + ae|
-    c.emit(Inst::VecRRRLong {
-        alu_op: VecRRRLongOp::Umlal32,
-        rd,
-        rn: tmp2.to_reg(),
-        rm: tmp1.to_reg(),
-        high_half: false,
-    });
-}
-
 /// Specifies what [lower_icmp] should do when lowering
 #[derive(Debug, Clone, PartialEq)]
 pub(crate) enum IcmpOutput {
diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
index c976fb5a4b..39e1f0fc04 100644
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb
-src/prelude.isle e4933f2bcb6cd9e00cb6dc0c47c43d096d0c4e37468af17a38fad8906b864d975e0a8b98d15c6a5e2bccf255ec2ced2466991c3405533e9cafefbf4d9ac46823
-src/isa/aarch64/inst.isle c90a42ae8e0d932d200c6150777fa6a8b6d113f2e9ef24a9328669d9d9bebf137004e70eaef91b9be1880eb71e5b1cb28f84d53e2a11c0c45db3c57f5c32441e
-src/isa/aarch64/lower.isle 5b9b2423ff641cb9bc3b297a0fba87813421200de7b83c8d575e52e643439971fb912be8d41043ecbe65107678451a74dfec0012df13dfca34bbfed4857504af
+src/prelude.isle fc3ca134da0df8e7309db0f6969c8f1db85ca7b7590d2e43552ef3134b9a55bd358a93e3aadf79d5c31d3fc95ce5c9c52f8313183c688259c027ee494913869c
+src/isa/aarch64/inst.isle 30c88514c23dfda849aa4a98b981b52b569994cdf3424a93d77429246ebce8c45575a76387ae2f3e4901ba6b21c846a419231da413f2df6c5dcea681eab6bf0c
+src/isa/aarch64/lower.isle 97392236095b99e93c97732b2af0778aba409f81da22b6879cf6e8f2513f0de5b3017bfa072dc60e7f6bf21aac91a5153133c01b041fb174fab0680d2fd4886c
diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
index 816306b477..682d2a98e2 100644
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
@@ -34,6 +34,7 @@ pub trait Context {
     fn fits_in_32(&mut self, arg0: Type) -> Option<Type>;
     fn fits_in_64(&mut self, arg0: Type) -> Option<Type>;
     fn vec128(&mut self, arg0: Type) -> Option<Type>;
+    fn not_i64x2(&mut self, arg0: Type) -> Option<()>;
     fn value_list_slice(&mut self, arg0: ValueList) -> ValueSlice;
     fn unwrap_head_value_list_1(&mut self, arg0: ValueList) -> (Value, ValueSlice);
     fn unwrap_head_value_list_2(&mut self, arg0: ValueList) -> (Value, Value, ValueSlice);
@@ -63,13 +64,13 @@ pub trait Context {
     fn load_constant64_full(&mut self, arg0: u64) -> Reg;
 }
 
-/// Internal type ProducesFlags: defined at src/prelude.isle line 234.
+/// Internal type ProducesFlags: defined at src/prelude.isle line 238.
 #[derive(Clone, Debug)]
 pub enum ProducesFlags {
     ProducesFlags { inst: MInst, result: Reg },
 }
 
-/// Internal type ConsumesFlags: defined at src/prelude.isle line 237.
+/// Internal type ConsumesFlags: defined at src/prelude.isle line 241.
 #[derive(Clone, Debug)]
 pub enum ConsumesFlags {
     ConsumesFlags { inst: MInst, result: Reg },
@@ -983,7 +984,7 @@ pub fn constructor_with_flags<C: Context>(
             result: pattern3_1,
         } = pattern2_0
         {
-            // Rule at src/prelude.isle line 247.
+            // Rule at src/prelude.isle line 251.
             let expr0_0 = C::emit(ctx, &pattern1_0);
             let expr1_0 = C::emit(ctx, &pattern3_0);
             let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -1011,7 +1012,7 @@ pub fn constructor_with_flags_1<C: Context>(
             result: pattern3_1,
         } = pattern2_0
         {
-            // Rule at src/prelude.isle line 255.
+            // Rule at src/prelude.isle line 259.
             let expr0_0 = C::emit(ctx, &pattern1_0);
             let expr1_0 = C::emit(ctx, &pattern3_0);
             return Some(pattern3_1);
@@ -1045,7 +1046,7 @@ pub fn constructor_with_flags_2<C: Context>(
                 result: pattern5_1,
             } = pattern4_0
             {
-                // Rule at src/prelude.isle line 265.
+                // Rule at src/prelude.isle line 269.
                 let expr0_0 = C::emit(ctx, &pattern1_0);
                 let expr1_0 = C::emit(ctx, &pattern3_0);
                 let expr2_0 = C::emit(ctx, &pattern5_0);
@@ -1452,31 +1453,140 @@ pub fn constructor_vec_misc<C: Context>(
     return Some(expr4_0);
 }
 
+// Generated as internal constructor for term vec_rrr_long.
+pub fn constructor_vec_rrr_long<C: Context>(
+    ctx: &mut C,
+    arg0: &VecRRRLongOp,
+    arg1: Reg,
+    arg2: Reg,
+    arg3: bool,
+) -> Option<Reg> {
+    let pattern0_0 = arg0;
+    let pattern1_0 = arg1;
+    let pattern2_0 = arg2;
+    let pattern3_0 = arg3;
+    // Rule at src/isa/aarch64/inst.isle line 1460.
+    let expr0_0: Type = I8X16;
+    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
+    let expr2_0 = MInst::VecRRRLong {
+        alu_op: pattern0_0.clone(),
+        rd: expr1_0,
+        rn: pattern1_0,
+        rm: pattern2_0,
+        high_half: pattern3_0,
+    };
+    let expr3_0 = C::emit(ctx, &expr2_0);
+    let expr4_0 = C::writable_reg_to_reg(ctx, expr1_0);
+    return Some(expr4_0);
+}
+
+// Generated as internal constructor for term vec_rrrr_long.
+pub fn constructor_vec_rrrr_long<C: Context>(
+    ctx: &mut C,
+    arg0: &VecRRRLongOp,
+    arg1: Reg,
+    arg2: Reg,
+    arg3: Reg,
+    arg4: bool,
+) -> Option<Reg> {
+    let pattern0_0 = arg0;
+    let pattern1_0 = arg1;
+    let pattern2_0 = arg2;
+    let pattern3_0 = arg3;
+    let pattern4_0 = arg4;
+    // Rule at src/isa/aarch64/inst.isle line 1470.
+    let expr0_0: Type = I8X16;
+    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
+    let expr2_0 = MInst::FpuMove128 {
+        rd: expr1_0,
+        rn: pattern1_0,
+    };
+    let expr3_0 = C::emit(ctx, &expr2_0);
+    let expr4_0 = MInst::VecRRRLong {
+        alu_op: pattern0_0.clone(),
+        rd: expr1_0,
+        rn: pattern2_0,
+        rm: pattern3_0,
+        high_half: pattern4_0,
+    };
+    let expr5_0 = C::emit(ctx, &expr4_0);
+    let expr6_0 = C::writable_reg_to_reg(ctx, expr1_0);
+    return Some(expr6_0);
+}
+
+// Generated as internal constructor for term vec_rr_narrow.
+pub fn constructor_vec_rr_narrow<C: Context>(
+    ctx: &mut C,
+    arg0: &VecRRNarrowOp,
+    arg1: Reg,
+    arg2: bool,
+) -> Option<Reg> {
+    let pattern0_0 = arg0;
+    let pattern1_0 = arg1;
+    let pattern2_0 = arg2;
+    // Rule at src/isa/aarch64/inst.isle line 1478.
+    let expr0_0: Type = I8X16;
+    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
+    let expr2_0 = MInst::VecRRNarrow {
+        op: pattern0_0.clone(),
+        rd: expr1_0,
+        rn: pattern1_0,
+        high_half: pattern2_0,
+    };
+    let expr3_0 = C::emit(ctx, &expr2_0);
+    let expr4_0 = C::writable_reg_to_reg(ctx, expr1_0);
+    return Some(expr4_0);
+}
+
+// Generated as internal constructor for term vec_rr_long.
+pub fn constructor_vec_rr_long<C: Context>(
+    ctx: &mut C,
+    arg0: &VecRRLongOp,
+    arg1: Reg,
+    arg2: bool,
+) -> Option<Reg> {
+    let pattern0_0 = arg0;
+    let pattern1_0 = arg1;
+    let pattern2_0 = arg2;
+    // Rule at src/isa/aarch64/inst.isle line 1485.
+    let expr0_0: Type = I8X16;
+    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
+    let expr2_0 = MInst::VecRRLong {
+        op: pattern0_0.clone(),
+        rd: expr1_0,
+        rn: pattern1_0,
+        high_half: pattern2_0,
+    };
+    let expr3_0 = C::emit(ctx, &expr2_0);
+    let expr4_0 = C::writable_reg_to_reg(ctx, expr1_0);
+    return Some(expr4_0);
+}
+
 // Generated as internal constructor for term imm.
 pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option<Reg> {
     let pattern0_0 = arg0;
     if let Some(pattern1_0) = C::integral_ty(ctx, pattern0_0) {
         let pattern2_0 = arg1;
         if let Some(pattern3_0) = C::imm_logic_from_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1471.
+            // Rule at src/isa/aarch64/inst.isle line 1503.
             let expr0_0 = ALUOp::Orr64;
             let expr1_0 = C::zero_reg(ctx);
             let expr2_0 = constructor_alu_rr_imm_logic(ctx, &expr0_0, expr1_0, pattern3_0)?;
             return Some(expr2_0);
         }
         if let Some(pattern3_0) = C::move_wide_const_from_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1463.
+            // Rule at src/isa/aarch64/inst.isle line 1495.
             let expr0_0 = OperandSize::Size64;
             let expr1_0 = constructor_movz(ctx, pattern3_0, &expr0_0)?;
             return Some(expr1_0);
         }
         if let Some(pattern3_0) = C::move_wide_const_from_negated_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1467.
+            // Rule at src/isa/aarch64/inst.isle line 1499.
             let expr0_0 = OperandSize::Size64;
             let expr1_0 = constructor_movn(ctx, pattern3_0, &expr0_0)?;
             return Some(expr1_0);
         }
-        // Rule at src/isa/aarch64/inst.isle line 1478.
+        // Rule at src/isa/aarch64/inst.isle line 1510.
         let expr0_0 = C::load_constant64_full(ctx, pattern2_0);
         return Some(expr0_0);
     }
@@ -1532,10 +1642,535 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
                         let expr12_0 = constructor_with_flags(ctx, &expr10_0, &expr11_0)?;
                         return Some(expr12_0);
                     }
+                    &Opcode::Imul => {
+                        let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1);
+                        // Rule at src/isa/aarch64/lower.isle line 200.
+                        let expr0_0 = C::put_in_regs(ctx, pattern7_0);
+                        let expr1_0: usize = 0;
+                        let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0);
+                        let expr3_0: usize = 1;
+                        let expr4_0 = C::value_regs_get(ctx, expr0_0, expr3_0);
+                        let expr5_0 = C::put_in_regs(ctx, pattern7_1);
+                        let expr6_0: usize = 0;
+                        let expr7_0 = C::value_regs_get(ctx, expr5_0, expr6_0);
+                        let expr8_0: usize = 1;
+                        let expr9_0 = C::value_regs_get(ctx, expr5_0, expr8_0);
+                        let expr10_0 = ALUOp::UMulH;
+                        let expr11_0 = constructor_alu_rrr(ctx, &expr10_0, expr2_0, expr7_0)?;
+                        let expr12_0 = ALUOp3::MAdd64;
+                        let expr13_0 =
+                            constructor_alu_rrrr(ctx, &expr12_0, expr2_0, expr9_0, expr11_0)?;
+                        let expr14_0 = ALUOp3::MAdd64;
+                        let expr15_0 =
+                            constructor_alu_rrrr(ctx, &expr14_0, expr4_0, expr7_0, expr13_0)?;
+                        let expr16_0 = ALUOp3::MAdd64;
+                        let expr17_0 = C::zero_reg(ctx);
+                        let expr18_0 =
+                            constructor_alu_rrrr(ctx, &expr16_0, expr2_0, expr7_0, expr17_0)?;
+                        let expr19_0 = C::value_regs(ctx, expr18_0, expr15_0);
+                        return Some(expr19_0);
+                    }
                     _ => {}
                 }
             }
         }
+        if pattern2_0 == I16X8 {
+            let pattern4_0 = C::inst_data(ctx, pattern0_0);
+            if let &InstructionData::Binary {
+                opcode: ref pattern5_0,
+                args: ref pattern5_1,
+            } = &pattern4_0
+            {
+                if let &Opcode::Imul = &pattern5_0 {
+                    let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1);
+                    if let Some(pattern8_0) = C::def_inst(ctx, pattern7_0) {
+                        let pattern9_0 = C::inst_data(ctx, pattern8_0);
+                        if let &InstructionData::Unary {
+                            opcode: ref pattern10_0,
+                            arg: pattern10_1,
+                        } = &pattern9_0
+                        {
+                            match &pattern10_0 {
+                                &Opcode::SwidenLow => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I8X16 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::SwidenLow = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I8X16 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 302.
+                                                        let expr0_0 = VecRRRLongOp::Smull8;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = false;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::SwidenHigh => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I8X16 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::SwidenHigh = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I8X16 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 308.
+                                                        let expr0_0 = VecRRRLongOp::Smull8;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = true;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::UwidenLow => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I8X16 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::UwidenLow = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I8X16 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 314.
+                                                        let expr0_0 = VecRRRLongOp::Umull8;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = false;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::UwidenHigh => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I8X16 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::UwidenHigh = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I8X16 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 320.
+                                                        let expr0_0 = VecRRRLongOp::Umull8;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = true;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                _ => {}
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        if pattern2_0 == I32X4 {
+            let pattern4_0 = C::inst_data(ctx, pattern0_0);
+            if let &InstructionData::Binary {
+                opcode: ref pattern5_0,
+                args: ref pattern5_1,
+            } = &pattern4_0
+            {
+                if let &Opcode::Imul = &pattern5_0 {
+                    let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1);
+                    if let Some(pattern8_0) = C::def_inst(ctx, pattern7_0) {
+                        let pattern9_0 = C::inst_data(ctx, pattern8_0);
+                        if let &InstructionData::Unary {
+                            opcode: ref pattern10_0,
+                            arg: pattern10_1,
+                        } = &pattern9_0
+                        {
+                            match &pattern10_0 {
+                                &Opcode::SwidenLow => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I16X8 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::SwidenLow = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I16X8 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 326.
+                                                        let expr0_0 = VecRRRLongOp::Smull16;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = false;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::SwidenHigh => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I16X8 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::SwidenHigh = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I16X8 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 332.
+                                                        let expr0_0 = VecRRRLongOp::Smull16;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = true;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::UwidenLow => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I16X8 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::UwidenLow = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I16X8 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 338.
+                                                        let expr0_0 = VecRRRLongOp::Umull16;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = false;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::UwidenHigh => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I16X8 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::UwidenHigh = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I16X8 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 344.
+                                                        let expr0_0 = VecRRRLongOp::Umull16;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = true;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                _ => {}
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        if pattern2_0 == I64X2 {
+            let pattern4_0 = C::inst_data(ctx, pattern0_0);
+            if let &InstructionData::Binary {
+                opcode: ref pattern5_0,
+                args: ref pattern5_1,
+            } = &pattern4_0
+            {
+                if let &Opcode::Imul = &pattern5_0 {
+                    let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1);
+                    if let Some(pattern8_0) = C::def_inst(ctx, pattern7_0) {
+                        let pattern9_0 = C::inst_data(ctx, pattern8_0);
+                        if let &InstructionData::Unary {
+                            opcode: ref pattern10_0,
+                            arg: pattern10_1,
+                        } = &pattern9_0
+                        {
+                            match &pattern10_0 {
+                                &Opcode::SwidenLow => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I32X4 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::SwidenLow = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I32X4 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 350.
+                                                        let expr0_0 = VecRRRLongOp::Smull32;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = false;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::SwidenHigh => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I32X4 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::SwidenHigh = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I32X4 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 356.
+                                                        let expr0_0 = VecRRRLongOp::Smull32;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = true;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::UwidenLow => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I32X4 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::UwidenLow = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I32X4 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 362.
+                                                        let expr0_0 = VecRRRLongOp::Umull32;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = false;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::UwidenHigh => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I32X4 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::UwidenHigh = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I32X4 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 368.
+                                                        let expr0_0 = VecRRRLongOp::Umull32;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = true;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                _ => {}
+                            }
+                        }
+                    }
+                    // Rule at src/isa/aarch64/lower.isle line 261.
+                    let expr0_0 = C::put_in_reg(ctx, pattern7_0);
+                    let expr1_0 = C::put_in_reg(ctx, pattern7_1);
+                    let expr2_0 = VecMisc2::Rev64;
+                    let expr3_0 = VectorSize::Size32x4;
+                    let expr4_0 = constructor_vec_misc(ctx, &expr2_0, expr1_0, &expr3_0)?;
+                    let expr5_0 = VecALUOp::Mul;
+                    let expr6_0 = VectorSize::Size32x4;
+                    let expr7_0 = constructor_vec_rrr(ctx, &expr5_0, expr4_0, expr0_0, &expr6_0)?;
+                    let expr8_0 = VecRRNarrowOp::Xtn64;
+                    let expr9_0: bool = false;
+                    let expr10_0 = constructor_vec_rr_narrow(ctx, &expr8_0, expr0_0, expr9_0)?;
+                    let expr11_0 = VecALUOp::Addp;
+                    let expr12_0 = VectorSize::Size32x4;
+                    let expr13_0 =
+                        constructor_vec_rrr(ctx, &expr11_0, expr7_0, expr7_0, &expr12_0)?;
+                    let expr14_0 = VecRRNarrowOp::Xtn64;
+                    let expr15_0: bool = false;
+                    let expr16_0 = constructor_vec_rr_narrow(ctx, &expr14_0, expr1_0, expr15_0)?;
+                    let expr17_0 = VecRRLongOp::Shll32;
+                    let expr18_0: bool = false;
+                    let expr19_0 = constructor_vec_rr_long(ctx, &expr17_0, expr13_0, expr18_0)?;
+                    let expr20_0 = VecRRRLongOp::Umlal32;
+                    let expr21_0: bool = false;
+                    let expr22_0 = constructor_vec_rrrr_long(
+                        ctx, &expr20_0, expr19_0, expr16_0, expr10_0, expr21_0,
+                    )?;
+                    let expr23_0 = C::value_reg(ctx, expr22_0);
+                    return Some(expr23_0);
+                }
+            }
+        }
         let pattern3_0 = C::inst_data(ctx, pattern0_0);
         match &pattern3_0 {
             &InstructionData::NullAry {
@@ -2029,6 +2664,19 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
                             let expr4_0 = C::value_reg(ctx, expr3_0);
                             return Some(expr4_0);
                         }
+                        &Opcode::Imul => {
+                            let (pattern7_0, pattern7_1) =
+                                C::unpack_value_array_2(ctx, &pattern5_1);
+                            // Rule at src/isa/aarch64/lower.isle line 196.
+                            let expr0_0 = constructor_madd_op(ctx, pattern3_0)?;
+                            let expr1_0 = C::put_in_reg(ctx, pattern7_0);
+                            let expr2_0 = C::put_in_reg(ctx, pattern7_1);
+                            let expr3_0 = C::zero_reg(ctx);
+                            let expr4_0 =
+                                constructor_alu_rrrr(ctx, &expr0_0, expr1_0, expr2_0, expr3_0)?;
+                            let expr5_0 = C::value_reg(ctx, expr4_0);
+                            return Some(expr5_0);
+                        }
                         _ => {}
                     }
                 }
@@ -2128,6 +2776,27 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
                 }
                 _ => {}
             }
+            if let Some(()) = C::not_i64x2(ctx, pattern3_0) {
+                let pattern5_0 = C::inst_data(ctx, pattern0_0);
+                if let &InstructionData::Binary {
+                    opcode: ref pattern6_0,
+                    args: ref pattern6_1,
+                } = &pattern5_0
+                {
+                    if let &Opcode::Imul = &pattern6_0 {
+                        let (pattern8_0, pattern8_1) = C::unpack_value_array_2(ctx, &pattern6_1);
+                        // Rule at src/isa/aarch64/lower.isle line 229.
+                        let expr0_0 = VecALUOp::Mul;
+                        let expr1_0 = C::put_in_reg(ctx, pattern8_0);
+                        let expr2_0 = C::put_in_reg(ctx, pattern8_1);
+                        let expr3_0 = constructor_vector_size(ctx, pattern3_0)?;
+                        let expr4_0 =
+                            constructor_vec_rrr(ctx, &expr0_0, expr1_0, expr2_0, &expr3_0)?;
+                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                        return Some(expr5_0);
+                    }
+                }
+            }
         }
     }
     return None;
diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
index 396a9cf6d3..b46acd59de 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -71,102 +71,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
 
         Opcode::Ineg => implemented_in_isle(ctx),
 
-        Opcode::Imul => {
-            let ty = ty.unwrap();
-            if ty == I128 {
-                let lhs = put_input_in_regs(ctx, inputs[0]);
-                let rhs = put_input_in_regs(ctx, inputs[1]);
-                let dst = get_output_reg(ctx, outputs[0]);
-                assert_eq!(lhs.len(), 2);
-                assert_eq!(rhs.len(), 2);
-                assert_eq!(dst.len(), 2);
-
-                // 128bit mul formula:
-                //   dst_lo = lhs_lo * rhs_lo
-                //   dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo)
-                //
-                // We can convert the above formula into the following
-                // umulh   dst_hi, lhs_lo, rhs_lo
-                // madd    dst_hi, lhs_lo, rhs_hi, dst_hi
-                // madd    dst_hi, lhs_hi, rhs_lo, dst_hi
-                // mul     dst_lo, lhs_lo, rhs_lo
-
-                ctx.emit(Inst::AluRRR {
-                    alu_op: ALUOp::UMulH,
-                    rd: dst.regs()[1],
-                    rn: lhs.regs()[0],
-                    rm: rhs.regs()[0],
-                });
-                ctx.emit(Inst::AluRRRR {
-                    alu_op: ALUOp3::MAdd64,
-                    rd: dst.regs()[1],
-                    rn: lhs.regs()[0],
-                    rm: rhs.regs()[1],
-                    ra: dst.regs()[1].to_reg(),
-                });
-                ctx.emit(Inst::AluRRRR {
-                    alu_op: ALUOp3::MAdd64,
-                    rd: dst.regs()[1],
-                    rn: lhs.regs()[1],
-                    rm: rhs.regs()[0],
-                    ra: dst.regs()[1].to_reg(),
-                });
-                ctx.emit(Inst::AluRRRR {
-                    alu_op: ALUOp3::MAdd64,
-                    rd: dst.regs()[0],
-                    rn: lhs.regs()[0],
-                    rm: rhs.regs()[0],
-                    ra: zero_reg(),
-                });
-            } else if ty.is_vector() {
-                for ext_op in &[
-                    Opcode::SwidenLow,
-                    Opcode::SwidenHigh,
-                    Opcode::UwidenLow,
-                    Opcode::UwidenHigh,
-                ] {
-                    if let Some((alu_op, rn, rm, high_half)) =
-                        match_vec_long_mul(ctx, insn, *ext_op)
-                    {
-                        let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                        ctx.emit(Inst::VecRRRLong {
-                            alu_op,
-                            rd,
-                            rn,
-                            rm,
-                            high_half,
-                        });
-                        return Ok(());
-                    }
-                }
-                if ty == I64X2 {
-                    lower_i64x2_mul(ctx, insn);
-                } else {
-                    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-                    let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-                    let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                    ctx.emit(Inst::VecRRR {
-                        alu_op: VecALUOp::Mul,
-                        rd,
-                        rn,
-                        rm,
-                        size: VectorSize::from_ty(ty),
-                    });
-                }
-            } else {
-                let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
-                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-                let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-                let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                ctx.emit(Inst::AluRRRR {
-                    alu_op,
-                    rd,
-                    rn,
-                    rm,
-                    ra: zero_reg(),
-                });
-            }
-        }
+        Opcode::Imul => implemented_in_isle(ctx),
 
         Opcode::Umulhi | Opcode::Smulhi => {
             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
index cf052dd533..f2c580d485 100644
--- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb
-src/prelude.isle e4933f2bcb6cd9e00cb6dc0c47c43d096d0c4e37468af17a38fad8906b864d975e0a8b98d15c6a5e2bccf255ec2ced2466991c3405533e9cafefbf4d9ac46823
+src/prelude.isle fc3ca134da0df8e7309db0f6969c8f1db85ca7b7590d2e43552ef3134b9a55bd358a93e3aadf79d5c31d3fc95ce5c9c52f8313183c688259c027ee494913869c
 src/isa/x64/inst.isle 12dc8fa43cbba6e9c5cf46a2472e2754abfe33b7fd38f80e271afa3f6c002efad7a4202c8f00ff27d5e6176de8fec97e1887d382cbd4ef06eaac177a0b5992e3
 src/isa/x64/lower.isle 333e1be62f602bb835a3cebc3299290a3d386438e9190d2db219263d974e097bfc3f1afdaac9401853806d21d548cad70bab2ffbc3b1cf5c3bebdd971a961f70
diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
index a2836b1614..d5273dae90 100644
--- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
@@ -34,6 +34,7 @@ pub trait Context {
     fn fits_in_32(&mut self, arg0: Type) -> Option<Type>;
     fn fits_in_64(&mut self, arg0: Type) -> Option<Type>;
     fn vec128(&mut self, arg0: Type) -> Option<Type>;
+    fn not_i64x2(&mut self, arg0: Type) -> Option<()>;
     fn value_list_slice(&mut self, arg0: ValueList) -> ValueSlice;
     fn unwrap_head_value_list_1(&mut self, arg0: ValueList) -> (Value, ValueSlice);
     fn unwrap_head_value_list_2(&mut self, arg0: ValueList) -> (Value, Value, ValueSlice);
@@ -66,13 +67,13 @@ pub trait Context {
     fn sse_insertps_lane_imm(&mut self, arg0: u8) -> u8;
 }
 
-/// Internal type ProducesFlags: defined at src/prelude.isle line 234.
+/// Internal type ProducesFlags: defined at src/prelude.isle line 238.
 #[derive(Clone, Debug)]
 pub enum ProducesFlags {
     ProducesFlags { inst: MInst, result: Reg },
 }
 
-/// Internal type ConsumesFlags: defined at src/prelude.isle line 237.
+/// Internal type ConsumesFlags: defined at src/prelude.isle line 241.
 #[derive(Clone, Debug)]
 pub enum ConsumesFlags {
     ConsumesFlags { inst: MInst, result: Reg },
@@ -122,7 +123,7 @@ pub fn constructor_with_flags<C: Context>(
             result: pattern3_1,
         } = pattern2_0
         {
-            // Rule at src/prelude.isle line 247.
+            // Rule at src/prelude.isle line 251.
             let expr0_0 = C::emit(ctx, &pattern1_0);
             let expr1_0 = C::emit(ctx, &pattern3_0);
             let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -150,7 +151,7 @@ pub fn constructor_with_flags_1<C: Context>(
             result: pattern3_1,
         } = pattern2_0
         {
-            // Rule at src/prelude.isle line 255.
+            // Rule at src/prelude.isle line 259.
             let expr0_0 = C::emit(ctx, &pattern1_0);
             let expr1_0 = C::emit(ctx, &pattern3_0);
             return Some(pattern3_1);
@@ -184,7 +185,7 @@ pub fn constructor_with_flags_2<C: Context>(
                 result: pattern5_1,
             } = pattern4_0
             {
-                // Rule at src/prelude.isle line 265.
+                // Rule at src/prelude.isle line 269.
                 let expr0_0 = C::emit(ctx, &pattern1_0);
                 let expr1_0 = C::emit(ctx, &pattern3_0);
                 let expr2_0 = C::emit(ctx, &pattern5_0);
diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs
index 9098dc5df2..a93b2aff45 100644
--- a/cranelift/codegen/src/machinst/isle.rs
+++ b/cranelift/codegen/src/machinst/isle.rs
@@ -196,6 +196,14 @@ macro_rules! isle_prelude_methods {
         fn u8_from_uimm8(&mut self, val: Uimm8) -> u8 {
             val
         }
+
+        fn not_i64x2(&mut self, ty: Type) -> Option<()> {
+            if ty == I64X2 {
+                None
+            } else {
+                Some(())
+            }
+        }
     };
 }
 
diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle
index 6c2af0b581..dd4ef9530b 100644
--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -156,6 +156,10 @@
 (decl vec128 (Type) Type)
 (extern extractor vec128 vec128)
 
+;; An extractor that matches everything except i64x2
+(decl not_i64x2 () Type)
+(extern extractor not_i64x2 not_i64x2)
+
 ;; Extractor to get a `ValueSlice` out of a `ValueList`.
 (decl value_list_slice (ValueSlice) ValueList)
 (extern extractor infallible value_list_slice value_list_slice)