Merge pull request #3569 from alexcrichton/isle-2-imul

aarch64: Migrate `imul` to ISLE
2021-11-29 17:00:24 -08:00
parent 42b23dac4a 33dba07e6b
commit 3f16cc86cb
10 changed files with 913 additions and 261 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -1455,6 +1455,38 @@
            (_ Unit (emit (MInst.VecMisc op dst src size))))
        (writable_reg_to_reg dst)))

+;; Helper for emitting `MInst.VecRRRLong` instructions.
+(decl vec_rrr_long (VecRRRLongOp Reg Reg bool) Reg)
+(rule (vec_rrr_long op src1 src2 high_half)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.VecRRRLong op dst src1 src2 high_half))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for emitting `MInst.VecRRRLong` instructions, but for variants
+;; where the operation both reads and modifies the destination register.
+;;
+;; Currently this is only used for `VecRRRLongOp.Umlal*`
+(decl vec_rrrr_long (VecRRRLongOp Reg Reg Reg bool) Reg)
+(rule (vec_rrrr_long op src1 src2 src3 high_half)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_1 Unit (emit (MInst.FpuMove128 dst src1)))
+            (_2 Unit (emit (MInst.VecRRRLong op dst src2 src3 high_half))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for emitting `MInst.VecRRNarrow` instructions.
+(decl vec_rr_narrow (VecRRNarrowOp Reg bool) Reg)
+(rule (vec_rr_narrow op src high_half)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.VecRRNarrow op dst src high_half))))
+        (writable_reg_to_reg dst)))
+
+;; Helper for emitting `MInst.VecRRLong` instructions.
+(decl vec_rr_long (VecRRLongOp Reg bool) Reg)
+(rule (vec_rr_long op src high_half)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.VecRRLong op dst src high_half))))
+        (writable_reg_to_reg dst)))
+
 ;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (decl imm (Type u64) Reg)
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -189,3 +189,183 @@
 ;; vectors.
 (rule (lower (has_type (vec128 ty) (ineg x)))
      (value_reg (vec_misc (VecMisc2.Neg) (put_in_reg x) (vector_size ty))))
+
+;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `i64` and smaller.
+(rule (lower (has_type (fits_in_64 ty) (imul x y)))
+      (value_reg (alu_rrrr (madd_op ty) (put_in_reg x) (put_in_reg y) (zero_reg))))
+
+;; `i128`.
+(rule (lower (has_type $I128 (imul x y)))
+      (let (
+          ;; Get the high/low registers for `x`.
+          (x_regs ValueRegs (put_in_regs x))
+          (x_lo Reg (value_regs_get x_regs 0))
+          (x_hi Reg (value_regs_get x_regs 1))
+
+          ;; Get the high/low registers for `y`.
+          (y_regs ValueRegs (put_in_regs y))
+          (y_lo Reg (value_regs_get y_regs 0))
+          (y_hi Reg (value_regs_get y_regs 1))
+
+          ;; 128bit mul formula:
+          ;;   dst_lo = x_lo * y_lo
+          ;;   dst_hi = umulhi(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo)
+          ;;
+          ;; We can convert the above formula into the following
+          ;; umulh   dst_hi, x_lo, y_lo
+          ;; madd    dst_hi, x_lo, y_hi, dst_hi
+          ;; madd    dst_hi, x_hi, y_lo, dst_hi
+          ;; madd    dst_lo, x_lo, y_lo, zero
+          (dst_hi1 Reg (alu_rrr (ALUOp.UMulH) x_lo y_lo))
+          (dst_hi2 Reg (alu_rrrr (ALUOp3.MAdd64) x_lo y_hi dst_hi1))
+          (dst_hi Reg (alu_rrrr (ALUOp3.MAdd64) x_hi y_lo dst_hi2))
+          (dst_lo Reg (alu_rrrr (ALUOp3.MAdd64) x_lo y_lo (zero_reg)))
+        )
+        (value_regs dst_lo dst_hi)))
+
+;; Case for i8x16, i16x8, and i32x4.
+(rule (lower (has_type (vec128 ty @ (not_i64x2)) (imul x y)))
+      (value_reg (vec_rrr (VecALUOp.Mul) (put_in_reg x) (put_in_reg y) (vector_size ty))))
+
+;; Special lowering for i64x2.
+;;
+;; This I64X2 multiplication is performed with several 32-bit
+;; operations.
+;;
+;; 64-bit numbers x and y, can be represented as:
+;;   x = a + 2^32(b)
+;;   y = c + 2^32(d)
+;;
+;; A 64-bit multiplication is:
+;;   x * y = ac + 2^32(ad + bc) + 2^64(bd)
+;; note: `2^64(bd)` can be ignored, the value is too large to fit in
+;; 64 bits.
+;;
+;; This sequence implements a I64X2 multiply, where the registers
+;; `rn` and `rm` are split up into 32-bit components:
+;;   rn = |d|c|b|a|
+;;   rm = |h|g|f|e|
+;;
+;;   rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
+;;
+;;  The sequence is:
+;;  rev64 rd.4s, rm.4s
+;;  mul rd.4s, rd.4s, rn.4s
+;;  xtn tmp1.2s, rn.2d
+;;  addp rd.4s, rd.4s, rd.4s
+;;  xtn tmp2.2s, rm.2d
+;;  shll rd.2d, rd.2s, #32
+;;  umlal rd.2d, tmp2.2s, tmp1.2s
+(rule (lower (has_type $I64X2 (imul x y)))
+      (let (
+          (rn Reg (put_in_reg x))
+          (rm Reg (put_in_reg y))
+          ;; Reverse the 32-bit elements in the 64-bit words.
+          ;;   rd = |g|h|e|f|
+          (rev Reg (vec_misc (VecMisc2.Rev64) rm (VectorSize.Size32x4)))
+
+          ;; Calculate the high half components.
+          ;;   rd = |dg|ch|be|af|
+          ;;
+          ;; Note that this 32-bit multiply of the high half
+          ;; discards the bits that would overflow, same as
+          ;; if 64-bit operations were used. Also the Shll
+          ;; below would shift out the overflow bits anyway.
+          (mul Reg (vec_rrr (VecALUOp.Mul) rev rn (VectorSize.Size32x4)))
+
+          ;; Extract the low half components of rn.
+          ;;   tmp1 = |c|a|
+          (tmp1 Reg (vec_rr_narrow (VecRRNarrowOp.Xtn64) rn $false))
+
+          ;; Sum the respective high half components.
+          ;;   rd = |dg+ch|be+af||dg+ch|be+af|
+          (sum Reg (vec_rrr (VecALUOp.Addp) mul mul (VectorSize.Size32x4)))
+
+          ;; Extract the low half components of rm.
+          ;;   tmp2 = |g|e|
+          (tmp2 Reg (vec_rr_narrow (VecRRNarrowOp.Xtn64) rm $false))
+
+          ;; Shift the high half components, into the high half.
+          ;;   rd = |dg+ch << 32|be+af << 32|
+          (shift Reg (vec_rr_long (VecRRLongOp.Shll32) sum $false))
+
+          ;; Multiply the low components together, and accumulate with the high
+          ;; half.
+          ;;   rd = |rd[1] + cg|rd[0] + ae|
+          (result Reg (vec_rrrr_long (VecRRRLongOp.Umlal32) shift tmp2 tmp1 $false))
+        )
+        (value_reg result)))
+
+;; Special case for `i16x8.extmul_low_i8x16_s`.
+(rule (lower (has_type $I16X8
+                       (imul (def_inst (swiden_low x @ (value_type $I8X16)))
+                             (def_inst (swiden_low y @ (value_type $I8X16))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Smull8) (put_in_reg x) (put_in_reg y) $false)))
+
+;; Special case for `i16x8.extmul_high_i8x16_s`.
+(rule (lower (has_type $I16X8
+                       (imul (def_inst (swiden_high x @ (value_type $I8X16)))
+                             (def_inst (swiden_high y @ (value_type $I8X16))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Smull8) (put_in_reg x) (put_in_reg y) $true)))
+
+;; Special case for `i16x8.extmul_low_i8x16_u`.
+(rule (lower (has_type $I16X8
+                       (imul (def_inst (uwiden_low x @ (value_type $I8X16)))
+                             (def_inst (uwiden_low y @ (value_type $I8X16))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Umull8) (put_in_reg x) (put_in_reg y) $false)))
+
+;; Special case for `i16x8.extmul_high_i8x16_u`.
+(rule (lower (has_type $I16X8
+                       (imul (def_inst (uwiden_high x @ (value_type $I8X16)))
+                             (def_inst (uwiden_high y @ (value_type $I8X16))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Umull8) (put_in_reg x) (put_in_reg y) $true)))
+
+;; Special case for `i32x4.extmul_low_i16x8_s`.
+(rule (lower (has_type $I32X4
+                       (imul (def_inst (swiden_low x @ (value_type $I16X8)))
+                             (def_inst (swiden_low y @ (value_type $I16X8))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Smull16) (put_in_reg x) (put_in_reg y) $false)))
+
+;; Special case for `i32x4.extmul_high_i16x8_s`.
+(rule (lower (has_type $I32X4
+                       (imul (def_inst (swiden_high x @ (value_type $I16X8)))
+                             (def_inst (swiden_high y @ (value_type $I16X8))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Smull16) (put_in_reg x) (put_in_reg y) $true)))
+
+;; Special case for `i32x4.extmul_low_i16x8_u`.
+(rule (lower (has_type $I32X4
+                       (imul (def_inst (uwiden_low x @ (value_type $I16X8)))
+                             (def_inst (uwiden_low y @ (value_type $I16X8))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Umull16) (put_in_reg x) (put_in_reg y) $false)))
+
+;; Special case for `i32x4.extmul_high_i16x8_u`.
+(rule (lower (has_type $I32X4
+                       (imul (def_inst (uwiden_high x @ (value_type $I16X8)))
+                             (def_inst (uwiden_high y @ (value_type $I16X8))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Umull16) (put_in_reg x) (put_in_reg y) $true)))
+
+;; Special case for `i64x2.extmul_low_i32x4_s`.
+(rule (lower (has_type $I64X2
+                       (imul (def_inst (swiden_low x @ (value_type $I32X4)))
+                             (def_inst (swiden_low y @ (value_type $I32X4))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Smull32) (put_in_reg x) (put_in_reg y) $false)))
+
+;; Special case for `i64x2.extmul_high_i32x4_s`.
+(rule (lower (has_type $I64X2
+                       (imul (def_inst (swiden_high x @ (value_type $I32X4)))
+                             (def_inst (swiden_high y @ (value_type $I32X4))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Smull32) (put_in_reg x) (put_in_reg y) $true)))
+
+;; Special case for `i64x2.extmul_low_i32x4_u`.
+(rule (lower (has_type $I64X2
+                       (imul (def_inst (uwiden_low x @ (value_type $I32X4)))
+                             (def_inst (uwiden_low y @ (value_type $I32X4))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Umull32) (put_in_reg x) (put_in_reg y) $false)))
+
+;; Special case for `i64x2.extmul_high_i32x4_u`.
+(rule (lower (has_type $I64X2
+                       (imul (def_inst (uwiden_high x @ (value_type $I32X4)))
+                             (def_inst (uwiden_high y @ (value_type $I32X4))))))
+      (value_reg (vec_rrr_long (VecRRRLongOp.Umull32) (put_in_reg x) (put_in_reg y) $true)))
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -1285,153 +1285,6 @@ pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
    None
 }

-/// Pattern match an extending vector multiplication.
-/// Returns a tuple of the opcode to use, the two input registers and whether
-/// it's the 'high half' version of the instruction.
-pub(crate) fn match_vec_long_mul<C: LowerCtx<I = Inst>>(
-    c: &mut C,
-    insn: IRInst,
-    ext_op: Opcode,
-) -> Option<(VecRRRLongOp, regalloc::Reg, regalloc::Reg, bool)> {
-    let inputs = insn_inputs(c, insn);
-    if let Some(lhs) = maybe_input_insn(c, inputs[0], ext_op) {
-        if let Some(rhs) = maybe_input_insn(c, inputs[1], ext_op) {
-            let lhs_input = insn_inputs(c, lhs)[0];
-            let rhs_input = insn_inputs(c, rhs)[0];
-            let rn = put_input_in_reg(c, lhs_input, NarrowValueMode::None);
-            let rm = put_input_in_reg(c, rhs_input, NarrowValueMode::None);
-            let lane_type = c.output_ty(insn, 0).lane_type();
-            match (lane_type, ext_op) {
-                (I16, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull8, rn, rm, false)),
-                (I16, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull8, rn, rm, true)),
-                (I16, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull8, rn, rm, false)),
-                (I16, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull8, rn, rm, true)),
-                (I32, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull16, rn, rm, false)),
-                (I32, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull16, rn, rm, true)),
-                (I32, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull16, rn, rm, false)),
-                (I32, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull16, rn, rm, true)),
-                (I64, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull32, rn, rm, false)),
-                (I64, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull32, rn, rm, true)),
-                (I64, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull32, rn, rm, false)),
-                (I64, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull32, rn, rm, true)),
-                _ => {}
-            };
-        }
-    }
-    None
-}
-
-pub(crate) fn lower_i64x2_mul<C: LowerCtx<I = Inst>>(c: &mut C, insn: IRInst) {
-    let inputs = insn_inputs(c, insn);
-    let outputs = insn_outputs(c, insn);
-    let rd = get_output_reg(c, outputs[0]).regs()[0];
-    let rn = put_input_in_regs(c, inputs[0]).regs()[0];
-    let rm = put_input_in_regs(c, inputs[1]).regs()[0];
-
-    let tmp1 = c.alloc_tmp(I64X2).only_reg().unwrap();
-    let tmp2 = c.alloc_tmp(I64X2).only_reg().unwrap();
-
-    // This I64X2 multiplication is performed with several 32-bit
-    // operations.
-
-    // 64-bit numbers x and y, can be represented as:
-    //   x = a + 2^32(b)
-    //   y = c + 2^32(d)
-
-    // A 64-bit multiplication is:
-    //   x * y = ac + 2^32(ad + bc) + 2^64(bd)
-    // note: `2^64(bd)` can be ignored, the value is too large to fit in
-    // 64 bits.
-
-    // This sequence implements a I64X2 multiply, where the registers
-    // `rn` and `rm` are split up into 32-bit components:
-    //   rn = |d|c|b|a|
-    //   rm = |h|g|f|e|
-    //
-    //   rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
-    //
-    //  The sequence is:
-    //  rev64 rd.4s, rm.4s
-    //  mul rd.4s, rd.4s, rn.4s
-    //  xtn tmp1.2s, rn.2d
-    //  addp rd.4s, rd.4s, rd.4s
-    //  xtn tmp2.2s, rm.2d
-    //  shll rd.2d, rd.2s, #32
-    //  umlal rd.2d, tmp2.2s, tmp1.2s
-
-    // Reverse the 32-bit elements in the 64-bit words.
-    //   rd = |g|h|e|f|
-    c.emit(Inst::VecMisc {
-        op: VecMisc2::Rev64,
-        rd,
-        rn: rm,
-        size: VectorSize::Size32x4,
-    });
-
-    // Calculate the high half components.
-    //   rd = |dg|ch|be|af|
-    //
-    // Note that this 32-bit multiply of the high half
-    // discards the bits that would overflow, same as
-    // if 64-bit operations were used. Also the Shll
-    // below would shift out the overflow bits anyway.
-    c.emit(Inst::VecRRR {
-        alu_op: VecALUOp::Mul,
-        rd,
-        rn: rd.to_reg(),
-        rm: rn,
-        size: VectorSize::Size32x4,
-    });
-
-    // Extract the low half components of rn.
-    //   tmp1 = |c|a|
-    c.emit(Inst::VecRRNarrow {
-        op: VecRRNarrowOp::Xtn64,
-        rd: tmp1,
-        rn,
-        high_half: false,
-    });
-
-    // Sum the respective high half components.
-    //   rd = |dg+ch|be+af||dg+ch|be+af|
-    c.emit(Inst::VecRRR {
-        alu_op: VecALUOp::Addp,
-        rd,
-        rn: rd.to_reg(),
-        rm: rd.to_reg(),
-        size: VectorSize::Size32x4,
-    });
-
-    // Extract the low half components of rm.
-    //   tmp2 = |g|e|
-    c.emit(Inst::VecRRNarrow {
-        op: VecRRNarrowOp::Xtn64,
-        rd: tmp2,
-        rn: rm,
-        high_half: false,
-    });
-
-    // Shift the high half components, into the high half.
-    //   rd = |dg+ch << 32|be+af << 32|
-    c.emit(Inst::VecRRLong {
-        op: VecRRLongOp::Shll32,
-        rd,
-        rn: rd.to_reg(),
-        high_half: false,
-    });
-
-    // Multiply the low components together, and accumulate with the high
-    // half.
-    //   rd = |rd[1] + cg|rd[0] + ae|
-    c.emit(Inst::VecRRRLong {
-        alu_op: VecRRRLongOp::Umlal32,
-        rd,
-        rn: tmp2.to_reg(),
-        rm: tmp1.to_reg(),
-        high_half: false,
-    });
-}
-
 /// Specifies what [lower_icmp] should do when lowering
 #[derive(Debug, Clone, PartialEq)]
 pub(crate) enum IcmpOutput {
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb
-src/prelude.isle e4933f2bcb6cd9e00cb6dc0c47c43d096d0c4e37468af17a38fad8906b864d975e0a8b98d15c6a5e2bccf255ec2ced2466991c3405533e9cafefbf4d9ac46823
-src/isa/aarch64/inst.isle c90a42ae8e0d932d200c6150777fa6a8b6d113f2e9ef24a9328669d9d9bebf137004e70eaef91b9be1880eb71e5b1cb28f84d53e2a11c0c45db3c57f5c32441e
-src/isa/aarch64/lower.isle 5b9b2423ff641cb9bc3b297a0fba87813421200de7b83c8d575e52e643439971fb912be8d41043ecbe65107678451a74dfec0012df13dfca34bbfed4857504af
+src/prelude.isle fc3ca134da0df8e7309db0f6969c8f1db85ca7b7590d2e43552ef3134b9a55bd358a93e3aadf79d5c31d3fc95ce5c9c52f8313183c688259c027ee494913869c
+src/isa/aarch64/inst.isle 30c88514c23dfda849aa4a98b981b52b569994cdf3424a93d77429246ebce8c45575a76387ae2f3e4901ba6b21c846a419231da413f2df6c5dcea681eab6bf0c
+src/isa/aarch64/lower.isle 97392236095b99e93c97732b2af0778aba409f81da22b6879cf6e8f2513f0de5b3017bfa072dc60e7f6bf21aac91a5153133c01b041fb174fab0680d2fd4886c
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
@@ -34,6 +34,7 @@ pub trait Context {
    fn fits_in_32(&mut self, arg0: Type) -> Option<Type>;
    fn fits_in_64(&mut self, arg0: Type) -> Option<Type>;
    fn vec128(&mut self, arg0: Type) -> Option<Type>;
+    fn not_i64x2(&mut self, arg0: Type) -> Option<()>;
    fn value_list_slice(&mut self, arg0: ValueList) -> ValueSlice;
    fn unwrap_head_value_list_1(&mut self, arg0: ValueList) -> (Value, ValueSlice);
    fn unwrap_head_value_list_2(&mut self, arg0: ValueList) -> (Value, Value, ValueSlice);
@@ -63,13 +64,13 @@ pub trait Context {
    fn load_constant64_full(&mut self, arg0: u64) -> Reg;
 }

-/// Internal type ProducesFlags: defined at src/prelude.isle line 234.
+/// Internal type ProducesFlags: defined at src/prelude.isle line 238.
 #[derive(Clone, Debug)]
 pub enum ProducesFlags {
    ProducesFlags { inst: MInst, result: Reg },
 }

-/// Internal type ConsumesFlags: defined at src/prelude.isle line 237.
+/// Internal type ConsumesFlags: defined at src/prelude.isle line 241.
 #[derive(Clone, Debug)]
 pub enum ConsumesFlags {
    ConsumesFlags { inst: MInst, result: Reg },
@@ -983,7 +984,7 @@ pub fn constructor_with_flags<C: Context>(
            result: pattern3_1,
        } = pattern2_0
        {
-            // Rule at src/prelude.isle line 247.
+            // Rule at src/prelude.isle line 251.
            let expr0_0 = C::emit(ctx, &pattern1_0);
            let expr1_0 = C::emit(ctx, &pattern3_0);
            let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -1011,7 +1012,7 @@ pub fn constructor_with_flags_1<C: Context>(
            result: pattern3_1,
        } = pattern2_0
        {
-            // Rule at src/prelude.isle line 255.
+            // Rule at src/prelude.isle line 259.
            let expr0_0 = C::emit(ctx, &pattern1_0);
            let expr1_0 = C::emit(ctx, &pattern3_0);
            return Some(pattern3_1);
@@ -1045,7 +1046,7 @@ pub fn constructor_with_flags_2<C: Context>(
                result: pattern5_1,
            } = pattern4_0
            {
-                // Rule at src/prelude.isle line 265.
+                // Rule at src/prelude.isle line 269.
                let expr0_0 = C::emit(ctx, &pattern1_0);
                let expr1_0 = C::emit(ctx, &pattern3_0);
                let expr2_0 = C::emit(ctx, &pattern5_0);
@@ -1452,31 +1453,140 @@ pub fn constructor_vec_misc<C: Context>(
    return Some(expr4_0);
 }

+// Generated as internal constructor for term vec_rrr_long.
+pub fn constructor_vec_rrr_long<C: Context>(
+    ctx: &mut C,
+    arg0: &VecRRRLongOp,
+    arg1: Reg,
+    arg2: Reg,
+    arg3: bool,
+) -> Option<Reg> {
+    let pattern0_0 = arg0;
+    let pattern1_0 = arg1;
+    let pattern2_0 = arg2;
+    let pattern3_0 = arg3;
+    // Rule at src/isa/aarch64/inst.isle line 1460.
+    let expr0_0: Type = I8X16;
+    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
+    let expr2_0 = MInst::VecRRRLong {
+        alu_op: pattern0_0.clone(),
+        rd: expr1_0,
+        rn: pattern1_0,
+        rm: pattern2_0,
+        high_half: pattern3_0,
+    };
+    let expr3_0 = C::emit(ctx, &expr2_0);
+    let expr4_0 = C::writable_reg_to_reg(ctx, expr1_0);
+    return Some(expr4_0);
+}
+
+// Generated as internal constructor for term vec_rrrr_long.
+pub fn constructor_vec_rrrr_long<C: Context>(
+    ctx: &mut C,
+    arg0: &VecRRRLongOp,
+    arg1: Reg,
+    arg2: Reg,
+    arg3: Reg,
+    arg4: bool,
+) -> Option<Reg> {
+    let pattern0_0 = arg0;
+    let pattern1_0 = arg1;
+    let pattern2_0 = arg2;
+    let pattern3_0 = arg3;
+    let pattern4_0 = arg4;
+    // Rule at src/isa/aarch64/inst.isle line 1470.
+    let expr0_0: Type = I8X16;
+    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
+    let expr2_0 = MInst::FpuMove128 {
+        rd: expr1_0,
+        rn: pattern1_0,
+    };
+    let expr3_0 = C::emit(ctx, &expr2_0);
+    let expr4_0 = MInst::VecRRRLong {
+        alu_op: pattern0_0.clone(),
+        rd: expr1_0,
+        rn: pattern2_0,
+        rm: pattern3_0,
+        high_half: pattern4_0,
+    };
+    let expr5_0 = C::emit(ctx, &expr4_0);
+    let expr6_0 = C::writable_reg_to_reg(ctx, expr1_0);
+    return Some(expr6_0);
+}
+
+// Generated as internal constructor for term vec_rr_narrow.
+pub fn constructor_vec_rr_narrow<C: Context>(
+    ctx: &mut C,
+    arg0: &VecRRNarrowOp,
+    arg1: Reg,
+    arg2: bool,
+) -> Option<Reg> {
+    let pattern0_0 = arg0;
+    let pattern1_0 = arg1;
+    let pattern2_0 = arg2;
+    // Rule at src/isa/aarch64/inst.isle line 1478.
+    let expr0_0: Type = I8X16;
+    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
+    let expr2_0 = MInst::VecRRNarrow {
+        op: pattern0_0.clone(),
+        rd: expr1_0,
+        rn: pattern1_0,
+        high_half: pattern2_0,
+    };
+    let expr3_0 = C::emit(ctx, &expr2_0);
+    let expr4_0 = C::writable_reg_to_reg(ctx, expr1_0);
+    return Some(expr4_0);
+}
+
+// Generated as internal constructor for term vec_rr_long.
+pub fn constructor_vec_rr_long<C: Context>(
+    ctx: &mut C,
+    arg0: &VecRRLongOp,
+    arg1: Reg,
+    arg2: bool,
+) -> Option<Reg> {
+    let pattern0_0 = arg0;
+    let pattern1_0 = arg1;
+    let pattern2_0 = arg2;
+    // Rule at src/isa/aarch64/inst.isle line 1485.
+    let expr0_0: Type = I8X16;
+    let expr1_0 = C::temp_writable_reg(ctx, expr0_0);
+    let expr2_0 = MInst::VecRRLong {
+        op: pattern0_0.clone(),
+        rd: expr1_0,
+        rn: pattern1_0,
+        high_half: pattern2_0,
+    };
+    let expr3_0 = C::emit(ctx, &expr2_0);
+    let expr4_0 = C::writable_reg_to_reg(ctx, expr1_0);
+    return Some(expr4_0);
+}
+
 // Generated as internal constructor for term imm.
 pub fn constructor_imm<C: Context>(ctx: &mut C, arg0: Type, arg1: u64) -> Option<Reg> {
    let pattern0_0 = arg0;
    if let Some(pattern1_0) = C::integral_ty(ctx, pattern0_0) {
        let pattern2_0 = arg1;
        if let Some(pattern3_0) = C::imm_logic_from_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1471.
+            // Rule at src/isa/aarch64/inst.isle line 1503.
            let expr0_0 = ALUOp::Orr64;
            let expr1_0 = C::zero_reg(ctx);
            let expr2_0 = constructor_alu_rr_imm_logic(ctx, &expr0_0, expr1_0, pattern3_0)?;
            return Some(expr2_0);
        }
        if let Some(pattern3_0) = C::move_wide_const_from_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1463.
+            // Rule at src/isa/aarch64/inst.isle line 1495.
            let expr0_0 = OperandSize::Size64;
            let expr1_0 = constructor_movz(ctx, pattern3_0, &expr0_0)?;
            return Some(expr1_0);
        }
        if let Some(pattern3_0) = C::move_wide_const_from_negated_u64(ctx, pattern2_0) {
-            // Rule at src/isa/aarch64/inst.isle line 1467.
+            // Rule at src/isa/aarch64/inst.isle line 1499.
            let expr0_0 = OperandSize::Size64;
            let expr1_0 = constructor_movn(ctx, pattern3_0, &expr0_0)?;
            return Some(expr1_0);
        }
-        // Rule at src/isa/aarch64/inst.isle line 1478.
+        // Rule at src/isa/aarch64/inst.isle line 1510.
        let expr0_0 = C::load_constant64_full(ctx, pattern2_0);
        return Some(expr0_0);
    }
@@ -1532,10 +1642,535 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
                        let expr12_0 = constructor_with_flags(ctx, &expr10_0, &expr11_0)?;
                        return Some(expr12_0);
                    }
+                    &Opcode::Imul => {
+                        let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1);
+                        // Rule at src/isa/aarch64/lower.isle line 200.
+                        let expr0_0 = C::put_in_regs(ctx, pattern7_0);
+                        let expr1_0: usize = 0;
+                        let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0);
+                        let expr3_0: usize = 1;
+                        let expr4_0 = C::value_regs_get(ctx, expr0_0, expr3_0);
+                        let expr5_0 = C::put_in_regs(ctx, pattern7_1);
+                        let expr6_0: usize = 0;
+                        let expr7_0 = C::value_regs_get(ctx, expr5_0, expr6_0);
+                        let expr8_0: usize = 1;
+                        let expr9_0 = C::value_regs_get(ctx, expr5_0, expr8_0);
+                        let expr10_0 = ALUOp::UMulH;
+                        let expr11_0 = constructor_alu_rrr(ctx, &expr10_0, expr2_0, expr7_0)?;
+                        let expr12_0 = ALUOp3::MAdd64;
+                        let expr13_0 =
+                            constructor_alu_rrrr(ctx, &expr12_0, expr2_0, expr9_0, expr11_0)?;
+                        let expr14_0 = ALUOp3::MAdd64;
+                        let expr15_0 =
+                            constructor_alu_rrrr(ctx, &expr14_0, expr4_0, expr7_0, expr13_0)?;
+                        let expr16_0 = ALUOp3::MAdd64;
+                        let expr17_0 = C::zero_reg(ctx);
+                        let expr18_0 =
+                            constructor_alu_rrrr(ctx, &expr16_0, expr2_0, expr7_0, expr17_0)?;
+                        let expr19_0 = C::value_regs(ctx, expr18_0, expr15_0);
+                        return Some(expr19_0);
+                    }
                    _ => {}
                }
            }
        }
+        if pattern2_0 == I16X8 {
+            let pattern4_0 = C::inst_data(ctx, pattern0_0);
+            if let &InstructionData::Binary {
+                opcode: ref pattern5_0,
+                args: ref pattern5_1,
+            } = &pattern4_0
+            {
+                if let &Opcode::Imul = &pattern5_0 {
+                    let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1);
+                    if let Some(pattern8_0) = C::def_inst(ctx, pattern7_0) {
+                        let pattern9_0 = C::inst_data(ctx, pattern8_0);
+                        if let &InstructionData::Unary {
+                            opcode: ref pattern10_0,
+                            arg: pattern10_1,
+                        } = &pattern9_0
+                        {
+                            match &pattern10_0 {
+                                &Opcode::SwidenLow => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I8X16 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::SwidenLow = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I8X16 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 302.
+                                                        let expr0_0 = VecRRRLongOp::Smull8;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = false;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::SwidenHigh => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I8X16 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::SwidenHigh = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I8X16 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 308.
+                                                        let expr0_0 = VecRRRLongOp::Smull8;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = true;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::UwidenLow => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I8X16 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::UwidenLow = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I8X16 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 314.
+                                                        let expr0_0 = VecRRRLongOp::Umull8;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = false;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::UwidenHigh => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I8X16 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::UwidenHigh = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I8X16 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 320.
+                                                        let expr0_0 = VecRRRLongOp::Umull8;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = true;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                _ => {}
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        if pattern2_0 == I32X4 {
+            let pattern4_0 = C::inst_data(ctx, pattern0_0);
+            if let &InstructionData::Binary {
+                opcode: ref pattern5_0,
+                args: ref pattern5_1,
+            } = &pattern4_0
+            {
+                if let &Opcode::Imul = &pattern5_0 {
+                    let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1);
+                    if let Some(pattern8_0) = C::def_inst(ctx, pattern7_0) {
+                        let pattern9_0 = C::inst_data(ctx, pattern8_0);
+                        if let &InstructionData::Unary {
+                            opcode: ref pattern10_0,
+                            arg: pattern10_1,
+                        } = &pattern9_0
+                        {
+                            match &pattern10_0 {
+                                &Opcode::SwidenLow => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I16X8 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::SwidenLow = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I16X8 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 326.
+                                                        let expr0_0 = VecRRRLongOp::Smull16;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = false;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::SwidenHigh => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I16X8 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::SwidenHigh = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I16X8 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 332.
+                                                        let expr0_0 = VecRRRLongOp::Smull16;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = true;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::UwidenLow => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I16X8 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::UwidenLow = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I16X8 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 338.
+                                                        let expr0_0 = VecRRRLongOp::Umull16;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = false;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::UwidenHigh => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I16X8 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::UwidenHigh = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I16X8 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 344.
+                                                        let expr0_0 = VecRRRLongOp::Umull16;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = true;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                _ => {}
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        if pattern2_0 == I64X2 {
+            let pattern4_0 = C::inst_data(ctx, pattern0_0);
+            if let &InstructionData::Binary {
+                opcode: ref pattern5_0,
+                args: ref pattern5_1,
+            } = &pattern4_0
+            {
+                if let &Opcode::Imul = &pattern5_0 {
+                    let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1);
+                    if let Some(pattern8_0) = C::def_inst(ctx, pattern7_0) {
+                        let pattern9_0 = C::inst_data(ctx, pattern8_0);
+                        if let &InstructionData::Unary {
+                            opcode: ref pattern10_0,
+                            arg: pattern10_1,
+                        } = &pattern9_0
+                        {
+                            match &pattern10_0 {
+                                &Opcode::SwidenLow => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I32X4 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::SwidenLow = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I32X4 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 350.
+                                                        let expr0_0 = VecRRRLongOp::Smull32;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = false;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::SwidenHigh => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I32X4 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::SwidenHigh = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I32X4 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 356.
+                                                        let expr0_0 = VecRRRLongOp::Smull32;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = true;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::UwidenLow => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I32X4 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::UwidenLow = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I32X4 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 362.
+                                                        let expr0_0 = VecRRRLongOp::Umull32;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = false;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                &Opcode::UwidenHigh => {
+                                    let pattern12_0 = C::value_type(ctx, pattern10_1);
+                                    if pattern12_0 == I32X4 {
+                                        if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) {
+                                            let pattern15_0 = C::inst_data(ctx, pattern14_0);
+                                            if let &InstructionData::Unary {
+                                                opcode: ref pattern16_0,
+                                                arg: pattern16_1,
+                                            } = &pattern15_0
+                                            {
+                                                if let &Opcode::UwidenHigh = &pattern16_0 {
+                                                    let pattern18_0 =
+                                                        C::value_type(ctx, pattern16_1);
+                                                    if pattern18_0 == I32X4 {
+                                                        // Rule at src/isa/aarch64/lower.isle line 368.
+                                                        let expr0_0 = VecRRRLongOp::Umull32;
+                                                        let expr1_0 =
+                                                            C::put_in_reg(ctx, pattern10_1);
+                                                        let expr2_0 =
+                                                            C::put_in_reg(ctx, pattern16_1);
+                                                        let expr3_0: bool = true;
+                                                        let expr4_0 = constructor_vec_rrr_long(
+                                                            ctx, &expr0_0, expr1_0, expr2_0,
+                                                            expr3_0,
+                                                        )?;
+                                                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                                                        return Some(expr5_0);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                _ => {}
+                            }
+                        }
+                    }
+                    // Rule at src/isa/aarch64/lower.isle line 261.
+                    let expr0_0 = C::put_in_reg(ctx, pattern7_0);
+                    let expr1_0 = C::put_in_reg(ctx, pattern7_1);
+                    let expr2_0 = VecMisc2::Rev64;
+                    let expr3_0 = VectorSize::Size32x4;
+                    let expr4_0 = constructor_vec_misc(ctx, &expr2_0, expr1_0, &expr3_0)?;
+                    let expr5_0 = VecALUOp::Mul;
+                    let expr6_0 = VectorSize::Size32x4;
+                    let expr7_0 = constructor_vec_rrr(ctx, &expr5_0, expr4_0, expr0_0, &expr6_0)?;
+                    let expr8_0 = VecRRNarrowOp::Xtn64;
+                    let expr9_0: bool = false;
+                    let expr10_0 = constructor_vec_rr_narrow(ctx, &expr8_0, expr0_0, expr9_0)?;
+                    let expr11_0 = VecALUOp::Addp;
+                    let expr12_0 = VectorSize::Size32x4;
+                    let expr13_0 =
+                        constructor_vec_rrr(ctx, &expr11_0, expr7_0, expr7_0, &expr12_0)?;
+                    let expr14_0 = VecRRNarrowOp::Xtn64;
+                    let expr15_0: bool = false;
+                    let expr16_0 = constructor_vec_rr_narrow(ctx, &expr14_0, expr1_0, expr15_0)?;
+                    let expr17_0 = VecRRLongOp::Shll32;
+                    let expr18_0: bool = false;
+                    let expr19_0 = constructor_vec_rr_long(ctx, &expr17_0, expr13_0, expr18_0)?;
+                    let expr20_0 = VecRRRLongOp::Umlal32;
+                    let expr21_0: bool = false;
+                    let expr22_0 = constructor_vec_rrrr_long(
+                        ctx, &expr20_0, expr19_0, expr16_0, expr10_0, expr21_0,
+                    )?;
+                    let expr23_0 = C::value_reg(ctx, expr22_0);
+                    return Some(expr23_0);
+                }
+            }
+        }
        let pattern3_0 = C::inst_data(ctx, pattern0_0);
        match &pattern3_0 {
            &InstructionData::NullAry {
@@ -2029,6 +2664,19 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
                            let expr4_0 = C::value_reg(ctx, expr3_0);
                            return Some(expr4_0);
                        }
+                        &Opcode::Imul => {
+                            let (pattern7_0, pattern7_1) =
+                                C::unpack_value_array_2(ctx, &pattern5_1);
+                            // Rule at src/isa/aarch64/lower.isle line 196.
+                            let expr0_0 = constructor_madd_op(ctx, pattern3_0)?;
+                            let expr1_0 = C::put_in_reg(ctx, pattern7_0);
+                            let expr2_0 = C::put_in_reg(ctx, pattern7_1);
+                            let expr3_0 = C::zero_reg(ctx);
+                            let expr4_0 =
+                                constructor_alu_rrrr(ctx, &expr0_0, expr1_0, expr2_0, expr3_0)?;
+                            let expr5_0 = C::value_reg(ctx, expr4_0);
+                            return Some(expr5_0);
+                        }
                        _ => {}
                    }
                }
@@ -2128,6 +2776,27 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
                }
                _ => {}
            }
+            if let Some(()) = C::not_i64x2(ctx, pattern3_0) {
+                let pattern5_0 = C::inst_data(ctx, pattern0_0);
+                if let &InstructionData::Binary {
+                    opcode: ref pattern6_0,
+                    args: ref pattern6_1,
+                } = &pattern5_0
+                {
+                    if let &Opcode::Imul = &pattern6_0 {
+                        let (pattern8_0, pattern8_1) = C::unpack_value_array_2(ctx, &pattern6_1);
+                        // Rule at src/isa/aarch64/lower.isle line 229.
+                        let expr0_0 = VecALUOp::Mul;
+                        let expr1_0 = C::put_in_reg(ctx, pattern8_0);
+                        let expr2_0 = C::put_in_reg(ctx, pattern8_1);
+                        let expr3_0 = constructor_vector_size(ctx, pattern3_0)?;
+                        let expr4_0 =
+                            constructor_vec_rrr(ctx, &expr0_0, expr1_0, expr2_0, &expr3_0)?;
+                        let expr5_0 = C::value_reg(ctx, expr4_0);
+                        return Some(expr5_0);
+                    }
+                }
+            }
        }
    }
    return None;
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -71,102 +71,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

        Opcode::Ineg => implemented_in_isle(ctx),

-        Opcode::Imul => {
-            let ty = ty.unwrap();
-            if ty == I128 {
-                let lhs = put_input_in_regs(ctx, inputs[0]);
-                let rhs = put_input_in_regs(ctx, inputs[1]);
-                let dst = get_output_reg(ctx, outputs[0]);
-                assert_eq!(lhs.len(), 2);
-                assert_eq!(rhs.len(), 2);
-                assert_eq!(dst.len(), 2);
-
-                // 128bit mul formula:
-                //   dst_lo = lhs_lo * rhs_lo
-                //   dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo)
-                //
-                // We can convert the above formula into the following
-                // umulh   dst_hi, lhs_lo, rhs_lo
-                // madd    dst_hi, lhs_lo, rhs_hi, dst_hi
-                // madd    dst_hi, lhs_hi, rhs_lo, dst_hi
-                // mul     dst_lo, lhs_lo, rhs_lo
-
-                ctx.emit(Inst::AluRRR {
-                    alu_op: ALUOp::UMulH,
-                    rd: dst.regs()[1],
-                    rn: lhs.regs()[0],
-                    rm: rhs.regs()[0],
-                });
-                ctx.emit(Inst::AluRRRR {
-                    alu_op: ALUOp3::MAdd64,
-                    rd: dst.regs()[1],
-                    rn: lhs.regs()[0],
-                    rm: rhs.regs()[1],
-                    ra: dst.regs()[1].to_reg(),
-                });
-                ctx.emit(Inst::AluRRRR {
-                    alu_op: ALUOp3::MAdd64,
-                    rd: dst.regs()[1],
-                    rn: lhs.regs()[1],
-                    rm: rhs.regs()[0],
-                    ra: dst.regs()[1].to_reg(),
-                });
-                ctx.emit(Inst::AluRRRR {
-                    alu_op: ALUOp3::MAdd64,
-                    rd: dst.regs()[0],
-                    rn: lhs.regs()[0],
-                    rm: rhs.regs()[0],
-                    ra: zero_reg(),
-                });
-            } else if ty.is_vector() {
-                for ext_op in &[
-                    Opcode::SwidenLow,
-                    Opcode::SwidenHigh,
-                    Opcode::UwidenLow,
-                    Opcode::UwidenHigh,
-                ] {
-                    if let Some((alu_op, rn, rm, high_half)) =
-                        match_vec_long_mul(ctx, insn, *ext_op)
-                    {
-                        let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                        ctx.emit(Inst::VecRRRLong {
-                            alu_op,
-                            rd,
-                            rn,
-                            rm,
-                            high_half,
-                        });
-                        return Ok(());
-                    }
-                }
-                if ty == I64X2 {
-                    lower_i64x2_mul(ctx, insn);
-                } else {
-                    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-                    let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-                    let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                    ctx.emit(Inst::VecRRR {
-                        alu_op: VecALUOp::Mul,
-                        rd,
-                        rn,
-                        rm,
-                        size: VectorSize::from_ty(ty),
-                    });
-                }
-            } else {
-                let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
-                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-                let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-                let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                ctx.emit(Inst::AluRRRR {
-                    alu_op,
-                    rd,
-                    rn,
-                    rm,
-                    ra: zero_reg(),
-                });
-            }
-        }
+        Opcode::Imul => implemented_in_isle(ctx),

        Opcode::Umulhi | Opcode::Smulhi => {
            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
--- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb
-src/prelude.isle e4933f2bcb6cd9e00cb6dc0c47c43d096d0c4e37468af17a38fad8906b864d975e0a8b98d15c6a5e2bccf255ec2ced2466991c3405533e9cafefbf4d9ac46823
+src/prelude.isle fc3ca134da0df8e7309db0f6969c8f1db85ca7b7590d2e43552ef3134b9a55bd358a93e3aadf79d5c31d3fc95ce5c9c52f8313183c688259c027ee494913869c
 src/isa/x64/inst.isle 12dc8fa43cbba6e9c5cf46a2472e2754abfe33b7fd38f80e271afa3f6c002efad7a4202c8f00ff27d5e6176de8fec97e1887d382cbd4ef06eaac177a0b5992e3
 src/isa/x64/lower.isle 333e1be62f602bb835a3cebc3299290a3d386438e9190d2db219263d974e097bfc3f1afdaac9401853806d21d548cad70bab2ffbc3b1cf5c3bebdd971a961f70
--- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
@@ -34,6 +34,7 @@ pub trait Context {
    fn fits_in_32(&mut self, arg0: Type) -> Option<Type>;
    fn fits_in_64(&mut self, arg0: Type) -> Option<Type>;
    fn vec128(&mut self, arg0: Type) -> Option<Type>;
+    fn not_i64x2(&mut self, arg0: Type) -> Option<()>;
    fn value_list_slice(&mut self, arg0: ValueList) -> ValueSlice;
    fn unwrap_head_value_list_1(&mut self, arg0: ValueList) -> (Value, ValueSlice);
    fn unwrap_head_value_list_2(&mut self, arg0: ValueList) -> (Value, Value, ValueSlice);
@@ -66,13 +67,13 @@ pub trait Context {
    fn sse_insertps_lane_imm(&mut self, arg0: u8) -> u8;
 }

-/// Internal type ProducesFlags: defined at src/prelude.isle line 234.
+/// Internal type ProducesFlags: defined at src/prelude.isle line 238.
 #[derive(Clone, Debug)]
 pub enum ProducesFlags {
    ProducesFlags { inst: MInst, result: Reg },
 }

-/// Internal type ConsumesFlags: defined at src/prelude.isle line 237.
+/// Internal type ConsumesFlags: defined at src/prelude.isle line 241.
 #[derive(Clone, Debug)]
 pub enum ConsumesFlags {
    ConsumesFlags { inst: MInst, result: Reg },
@@ -122,7 +123,7 @@ pub fn constructor_with_flags<C: Context>(
            result: pattern3_1,
        } = pattern2_0
        {
-            // Rule at src/prelude.isle line 247.
+            // Rule at src/prelude.isle line 251.
            let expr0_0 = C::emit(ctx, &pattern1_0);
            let expr1_0 = C::emit(ctx, &pattern3_0);
            let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -150,7 +151,7 @@ pub fn constructor_with_flags_1<C: Context>(
            result: pattern3_1,
        } = pattern2_0
        {
-            // Rule at src/prelude.isle line 255.
+            // Rule at src/prelude.isle line 259.
            let expr0_0 = C::emit(ctx, &pattern1_0);
            let expr1_0 = C::emit(ctx, &pattern3_0);
            return Some(pattern3_1);
@@ -184,7 +185,7 @@ pub fn constructor_with_flags_2<C: Context>(
                result: pattern5_1,
            } = pattern4_0
            {
-                // Rule at src/prelude.isle line 265.
+                // Rule at src/prelude.isle line 269.
                let expr0_0 = C::emit(ctx, &pattern1_0);
                let expr1_0 = C::emit(ctx, &pattern3_0);
                let expr2_0 = C::emit(ctx, &pattern5_0);
--- a/cranelift/codegen/src/machinst/isle.rs
+++ b/cranelift/codegen/src/machinst/isle.rs
@@ -196,6 +196,14 @@ macro_rules! isle_prelude_methods {
        fn u8_from_uimm8(&mut self, val: Uimm8) -> u8 {
            val
        }
+
+        fn not_i64x2(&mut self, ty: Type) -> Option<()> {
+            if ty == I64X2 {
+                None
+            } else {
+                Some(())
+            }
+        }
    };
 }

--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -156,6 +156,10 @@
 (decl vec128 (Type) Type)
 (extern extractor vec128 vec128)

+;; An extractor that matches everything except i64x2
+(decl not_i64x2 () Type)
+(extern extractor not_i64x2 not_i64x2)
+
 ;; Extractor to get a `ValueSlice` out of a `ValueList`.
 (decl value_list_slice (ValueSlice) ValueList)
 (extern extractor infallible value_list_slice value_list_slice)