From ab4be2bdd12e7b382b5df21d689f55c5075f4e48 Mon Sep 17 00:00:00 2001
From: Trevor Elliott <telliott@fastly.com>
Date: Fri, 30 Sep 2022 12:57:50 -0700
Subject: [PATCH] ISLE: Resolve overlaps in the aarch64 backend (#4988)

---
 cranelift/codegen/src/isa/aarch64/inst.isle   |  90 ++---
 cranelift/codegen/src/isa/aarch64/lower.isle  | 338 +++++++++---------
 .../src/isa/aarch64/lower_dynamic_neon.isle   |  48 +--
 3 files changed, 239 insertions(+), 237 deletions(-)

diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle
index c11d3d5ff8..0ed2b14e81 100644
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -1137,7 +1137,7 @@
 
 ;; Helper for calculating the `OperandSize` corresponding to a type
 (decl operand_size (Type) OperandSize)
-(rule (operand_size (fits_in_32 _ty)) (OperandSize.Size32))
+(rule 1 (operand_size (fits_in_32 _ty)) (OperandSize.Size32))
 (rule (operand_size (fits_in_64 _ty)) (OperandSize.Size64))
 
 (type ScalarSize extern
@@ -1167,10 +1167,10 @@
 
 ;; Helper for calculating the `ScalarSize` lane type from vector type
 (decl lane_size (Type) ScalarSize)
-(rule (lane_size (multi_lane 8 _)) (ScalarSize.Size8))
-(rule (lane_size (multi_lane 16 _)) (ScalarSize.Size16))
-(rule (lane_size (multi_lane 32 _)) (ScalarSize.Size32))
-(rule (lane_size (multi_lane 64 _)) (ScalarSize.Size64))
+(rule 1 (lane_size (multi_lane 8 _)) (ScalarSize.Size8))
+(rule 1 (lane_size (multi_lane 16 _)) (ScalarSize.Size16))
+(rule 1 (lane_size (multi_lane 32 _)) (ScalarSize.Size32))
+(rule 1 (lane_size (multi_lane 64 _)) (ScalarSize.Size64))
 (rule (lane_size (dynamic_lane 8 _)) (ScalarSize.Size8))
 (rule (lane_size (dynamic_lane 16 _)) (ScalarSize.Size16))
 (rule (lane_size (dynamic_lane 32 _)) (ScalarSize.Size32))
@@ -1209,13 +1209,13 @@
 
 ;; Helper for calculating the `VectorSize` corresponding to a type
 (decl vector_size (Type) VectorSize)
-(rule (vector_size (multi_lane 8 8)) (VectorSize.Size8x8))
-(rule (vector_size (multi_lane 8 16)) (VectorSize.Size8x16))
-(rule (vector_size (multi_lane 16 4)) (VectorSize.Size16x4))
-(rule (vector_size (multi_lane 16 8)) (VectorSize.Size16x8))
-(rule (vector_size (multi_lane 32 2)) (VectorSize.Size32x2))
-(rule (vector_size (multi_lane 32 4)) (VectorSize.Size32x4))
-(rule (vector_size (multi_lane 64 2)) (VectorSize.Size64x2))
+(rule 1 (vector_size (multi_lane 8 8)) (VectorSize.Size8x8))
+(rule 1 (vector_size (multi_lane 8 16)) (VectorSize.Size8x16))
+(rule 1 (vector_size (multi_lane 16 4)) (VectorSize.Size16x4))
+(rule 1 (vector_size (multi_lane 16 8)) (VectorSize.Size16x8))
+(rule 1 (vector_size (multi_lane 32 2)) (VectorSize.Size32x2))
+(rule 1 (vector_size (multi_lane 32 4)) (VectorSize.Size32x4))
+(rule 1 (vector_size (multi_lane 64 2)) (VectorSize.Size64x2))
 (rule (vector_size (dynamic_lane 8 8)) (VectorSize.Size8x8))
 (rule (vector_size (dynamic_lane 8 16)) (VectorSize.Size8x16))
 (rule (vector_size (dynamic_lane 16 4)) (VectorSize.Size16x4))
@@ -2113,7 +2113,7 @@
       (let ((dst WritableReg (temp_writable_reg $I8X16))
             (_ Unit (emit (MInst.FpuMove128 dst src))))
         dst))
-(rule (fpu_move (fits_in_64 _) src)
+(rule 1 (fpu_move (fits_in_64 _) src)
       (let ((dst WritableReg (temp_writable_reg $F64))
             (_ Unit (emit (MInst.FpuMove64 dst src))))
         dst))
@@ -2245,7 +2245,7 @@
 
 ;; Helper for generating `MInst.CCmpImm` instructions.
 (decl ccmp_imm (OperandSize u8 Reg UImm5 NZCV Cond) ConsumesFlags)
-(rule (ccmp_imm size 1 rn imm nzcv cond)
+(rule 1 (ccmp_imm size 1 rn imm nzcv cond)
       (let ((dst WritableReg (temp_writable_reg $I64)))
         (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
          (MInst.CCmpImm size rn imm nzcv cond)
@@ -2700,7 +2700,7 @@
 
 ;; Weird logical-instruction immediate in ORI using zero register; to simplify,
 ;; we only match when we are zero-extending the value.
-(rule (imm (integral_ty ty) (ImmExtend.Zero) k)
+(rule 1 (imm (integral_ty ty) (ImmExtend.Zero) k)
       (if-let n (imm_logic_from_u64 ty k))
       (orr_imm ty (zero_reg) n))
 
@@ -2715,7 +2715,7 @@
 
 ;; Place a `Value` into a register, sign extending it to 32-bits
 (decl put_in_reg_sext32 (Value) Reg)
-(rule (put_in_reg_sext32 val @ (value_type (fits_in_32 ty)))
+(rule -1 (put_in_reg_sext32 val @ (value_type (fits_in_32 ty)))
       (extend val $true (ty_bits ty) 32))
 
 ;; 32/64-bit passthrough.
@@ -2724,7 +2724,7 @@
 
 ;; Place a `Value` into a register, zero extending it to 32-bits
 (decl put_in_reg_zext32 (Value) Reg)
-(rule (put_in_reg_zext32 val @ (value_type (fits_in_32 ty)))
+(rule -1 (put_in_reg_zext32 val @ (value_type (fits_in_32 ty)))
       (extend val $false (ty_bits ty) 32))
 
 ;; 32/64-bit passthrough.
@@ -2733,7 +2733,7 @@
 
 ;; Place a `Value` into a register, sign extending it to 64-bits
 (decl put_in_reg_sext64 (Value) Reg)
-(rule (put_in_reg_sext64 val @ (value_type (fits_in_32 ty)))
+(rule 1 (put_in_reg_sext64 val @ (value_type (fits_in_32 ty)))
       (extend val $true (ty_bits ty) 64))
 
 ;; 64-bit passthrough.
@@ -2741,7 +2741,7 @@
 
 ;; Place a `Value` into a register, zero extending it to 64-bits
 (decl put_in_reg_zext64 (Value) Reg)
-(rule (put_in_reg_zext64 val @ (value_type (fits_in_32 ty)))
+(rule 1 (put_in_reg_zext64 val @ (value_type (fits_in_32 ty)))
       (extend val $false (ty_bits ty) 64))
 
 ;; 64-bit passthrough.
@@ -2755,7 +2755,7 @@
         reg))
 
 (decl size_from_ty (Type) OperandSize)
-(rule (size_from_ty (fits_in_32 _ty)) (OperandSize.Size32))
+(rule 1 (size_from_ty (fits_in_32 _ty)) (OperandSize.Size32))
 (rule (size_from_ty $I64) (OperandSize.Size64))
 
 ;; Check for signed overflow. The only case is min_value / -1.
@@ -2790,14 +2790,14 @@
 (decl alu_rs_imm_logic_commutative (ALUOp Type Value Value) Reg)
 
 ;; Base case of operating on registers.
-(rule (alu_rs_imm_logic_commutative op ty x y)
+(rule -1 (alu_rs_imm_logic_commutative op ty x y)
       (alu_rrr op ty x y))
 
 ;; Special cases for when one operand is a constant.
 (rule (alu_rs_imm_logic_commutative op ty x (iconst k))
       (if-let imm (imm_logic_from_imm64 ty k))
       (alu_rr_imm_logic op ty x imm))
-(rule (alu_rs_imm_logic_commutative op ty (iconst k) x)
+(rule 1 (alu_rs_imm_logic_commutative op ty (iconst k) x)
       (if-let imm (imm_logic_from_imm64 ty k))
       (alu_rr_imm_logic op ty x imm))
 
@@ -2805,14 +2805,14 @@
 (rule (alu_rs_imm_logic_commutative op ty x (ishl y (iconst k)))
       (if-let amt (lshl_from_imm64 ty k))
       (alu_rrr_shift op ty x y amt))
-(rule (alu_rs_imm_logic_commutative op ty (ishl x (iconst k)) y)
+(rule 1 (alu_rs_imm_logic_commutative op ty (ishl x (iconst k)) y)
       (if-let amt (lshl_from_imm64 ty k))
       (alu_rrr_shift op ty y x amt))
 
 ;; Same as `alu_rs_imm_logic_commutative` above, except that it doesn't require
 ;; that the operation is commutative.
 (decl alu_rs_imm_logic (ALUOp Type Value Value) Reg)
-(rule (alu_rs_imm_logic op ty x y)
+(rule -1 (alu_rs_imm_logic op ty x y)
       (alu_rrr op ty x y))
 (rule (alu_rs_imm_logic op ty x (iconst k))
       (if-let imm (imm_logic_from_imm64 ty k))
@@ -2868,7 +2868,7 @@
 (rule (load_addr (AMode.FPOffset 0 _)) (fp_reg))
 (rule (load_addr (AMode.SPOffset 0 _)) (stack_reg))
 
-(rule (load_addr addr)
+(rule -1 (load_addr addr)
       (let ((dst WritableReg (temp_writable_reg $I64))
             (_ Unit (emit (MInst.LoadAddr dst addr))))
         dst))
@@ -3044,7 +3044,7 @@
       (mov_preg (preg_fp)))
 
 (decl aarch64_link () Reg)
-(rule (aarch64_link)
+(rule 1 (aarch64_link)
       (if (preserve_frame_pointers))
       (if (sign_return_address_disabled))
       (let ((dst WritableReg (temp_writable_reg $I64))
@@ -3081,7 +3081,7 @@
 ;; Helper for generating `fcopysign` instruction sequences.
 
 (decl fcopy_sign (Reg Reg Type) Reg)
-(rule (fcopy_sign x y (ty_scalar_float ty))
+(rule 1 (fcopy_sign x y (ty_scalar_float ty))
       (let ((dst WritableReg (temp_writable_reg $F64))
             (tmp Reg (fpu_rri (fpu_op_ri_ushr (ty_bits ty) (max_shift ty)) y))
             (_ Unit (emit (MInst.FpuRRIMod (fpu_op_ri_sli (ty_bits ty) (max_shift ty)) dst x tmp))))
@@ -3175,9 +3175,9 @@
 ;; Accepts the specific conversion op, the source register,
 ;; whether the input is signed, and finally the output type.
 (decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type) Reg)
-(rule (fpu_to_int_cvt_sat op src _ $I64)
+(rule 1 (fpu_to_int_cvt_sat op src _ $I64)
       (fpu_to_int op src))
-(rule (fpu_to_int_cvt_sat op src _ $I32)
+(rule 1 (fpu_to_int_cvt_sat op src _ $I32)
       (fpu_to_int op src))
 (rule (fpu_to_int_cvt_sat op src $false (fits_in_16 out_ty))
       (let ((result Reg (fpu_to_int op src))
@@ -3295,17 +3295,17 @@
       (vec_rrr (VecALUOp.Fcmge) rm rn (vector_size ty)))
 
 ;; Integer
-(rule (vec_cmp rn rm ty (Cond.Eq))
+(rule 1 (vec_cmp rn rm ty (Cond.Eq))
       (if (ty_vector_not_float ty))
       (vec_rrr (VecALUOp.Cmeq) rn rm (vector_size ty)))
-(rule (vec_cmp rn rm ty (Cond.Ne))
+(rule 1 (vec_cmp rn rm ty (Cond.Ne))
       (if (ty_vector_not_float ty))
       (let ((tmp Reg (vec_rrr (VecALUOp.Cmeq) rn rm (vector_size ty))))
        (vec_misc (VecMisc2.Not) tmp (vector_size ty))))
-(rule (vec_cmp rn rm ty (Cond.Ge))
+(rule 1 (vec_cmp rn rm ty (Cond.Ge))
       (if (ty_vector_not_float ty))
       (vec_rrr (VecALUOp.Cmge) rn rm (vector_size ty)))
-(rule (vec_cmp rn rm ty (Cond.Gt))
+(rule 1 (vec_cmp rn rm ty (Cond.Gt))
       (if (ty_vector_not_float ty))
       (vec_rrr (VecALUOp.Cmgt) rn rm (vector_size ty)))
 (rule (vec_cmp rn rm ty (Cond.Hs))
@@ -3321,7 +3321,7 @@
 (rule (vec_cmp rn rm ty (Cond.Lt))
       (if (ty_vector_not_float ty))
       (vec_rrr (VecALUOp.Cmgt) rm rn (vector_size ty)))
-(rule (vec_cmp rn rm ty (Cond.Ls))
+(rule 1 (vec_cmp rn rm ty (Cond.Ls))
       (if (ty_vector_not_float ty))
       (vec_rrr (VecALUOp.Cmhs) rm rn (vector_size ty)))
 (rule (vec_cmp rn rm ty (Cond.Lo))
@@ -3336,7 +3336,7 @@
 ;; mov xm, vn.d[0]
 ;; cmp xm, #0
 (decl vanytrue (Reg Type) ProducesFlags)
-(rule (vanytrue src (ty_vec128 ty))
+(rule 1 (vanytrue src (ty_vec128 ty))
       (let ((src Reg (vec_rrr (VecALUOp.Umaxp) src src (VectorSize.Size32x4)))
             (src Reg (mov_from_vec src 0 (ScalarSize.Size64))))
        (cmp_imm (OperandSize.Size64) src (u8_into_imm12 0))))
@@ -3366,7 +3366,7 @@
 
 ;; Vectors.
 ;; `icmp` into flags for vectors is invalid.
-(rule (lower_icmp_into_reg cond x y in_ty @ (multi_lane _ _) _out_ty)
+(rule 1 (lower_icmp_into_reg cond x y in_ty @ (multi_lane _ _) _out_ty)
       (let ((cond Cond (cond_code cond))
             (rn Reg (put_in_reg x))
             (rm Reg (put_in_reg y)))
@@ -3380,7 +3380,7 @@
 (rule (lower_icmp_extend $I16 $false) (ExtendOp.UXTH))
 
 ;; Integers <= 64-bits.
-(rule (lower_icmp_into_reg cond rn rm in_ty out_ty)
+(rule -2 (lower_icmp_into_reg cond rn rm in_ty out_ty)
       (if (ty_int_bool_ref_scalar_64 in_ty))
       (let ((cc Cond (cond_code cond)))
        (with_flags
@@ -3391,16 +3391,16 @@
       (if (signed_cond_code cond))
       (let ((rn Reg (put_in_reg_sext32 rn)))
       (cmp_extend (operand_size ty) rn rm (lower_icmp_extend ty $true))))
-(rule (lower_icmp cond rn (imm12_from_value rm) (fits_in_16 ty))
+(rule -1 (lower_icmp cond rn (imm12_from_value rm) (fits_in_16 ty))
       (let ((rn Reg (put_in_reg_zext32 rn)))
       (cmp_imm (operand_size ty) rn rm)))
-(rule -1 (lower_icmp cond rn rm (fits_in_16 ty))
+(rule -2 (lower_icmp cond rn rm (fits_in_16 ty))
       (let ((rn Reg (put_in_reg_zext32 rn)))
       (cmp_extend (operand_size ty) rn rm (lower_icmp_extend ty $false))))
-(rule -2 (lower_icmp cond rn (imm12_from_value rm) ty)
+(rule -3 (lower_icmp cond rn (imm12_from_value rm) ty)
       (if (ty_int_bool_ref_scalar_64 ty))
       (cmp_imm (operand_size ty) rn rm))
-(rule -3 (lower_icmp cond rn rm ty)
+(rule -4 (lower_icmp cond rn rm ty)
       (if (ty_int_bool_ref_scalar_64 ty))
       (cmp (operand_size ty) rn rm))
 
@@ -3526,14 +3526,14 @@
 
 ;; Helpers for generating select instruction sequences.
 (decl lower_select (ProducesFlags Cond Type Value Value) ValueRegs)
-(rule (lower_select flags cond (ty_scalar_float ty) rn rm)
+(rule 2 (lower_select flags cond (ty_scalar_float ty) rn rm)
       (with_flags flags (fpu_csel ty cond rn rm)))
-(rule (lower_select flags cond (ty_vec128 ty) rn rm)
+(rule 3 (lower_select flags cond (ty_vec128 ty) rn rm)
       (with_flags flags (vec_csel cond rn rm)))
 (rule (lower_select flags cond ty rn rm)
       (if (ty_vec64 ty))
       (with_flags flags (fpu_csel $F64 cond rn rm)))
-(rule (lower_select flags cond $I128 rn rm)
+(rule 4 (lower_select flags cond $I128 rn rm)
       (let ((dst_lo WritableReg (temp_writable_reg $I64))
             (dst_hi WritableReg (temp_writable_reg $I64))
             (rn ValueRegs (put_in_regs rn))
@@ -3547,7 +3547,7 @@
          (MInst.CSel dst_lo cond rn_lo rm_lo)
          (MInst.CSel dst_hi cond rn_hi rm_hi)
          (value_regs dst_lo dst_hi)))))
-(rule (lower_select flags cond ty rn rm)
+(rule 1 (lower_select flags cond ty rn rm)
       (if (ty_int_bool_ref_scalar_64 ty))
       (with_flags flags (csel cond rn rm)))
 
diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle
index 7d2105f923..7114a0929f 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -1,5 +1,7 @@
 ;; aarch64 instruction selection and CLIF-to-MachInst lowering.
 
+(pragma overlap_errors)
+
 ;; The main lowering constructor term: takes a clif `Inst` and returns the
 ;; register(s) within which the lowered instruction's result values live.
 (decl lower (Inst) InstOutput)
@@ -37,49 +39,49 @@
 ;; `i64` and smaller
 
 ;; Base case, simply adding things in registers.
-(rule (lower (has_type (fits_in_64 ty) (iadd x y)))
+(rule -1 (lower (has_type (fits_in_64 ty) (iadd x y)))
       (add ty  x y))
 
 ;; Special cases for when one operand is an immediate that fits in 12 bits.
-(rule (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y))))
+(rule 4 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y))))
       (add_imm ty x y))
 
-(rule (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y)))
+(rule 5 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y)))
       (add_imm ty y x))
 
 ;; Same as the previous special cases, except we can switch the addition to a
 ;; subtraction if the negated immediate fits in 12 bits.
-(rule (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_negated_value y))))
+(rule 2 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_negated_value y))))
       (sub_imm ty x y))
 
-(rule (lower (has_type (fits_in_64 ty) (iadd (imm12_from_negated_value x) y)))
+(rule 3 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_negated_value x) y)))
       (sub_imm ty y x))
 
 ;; Special cases for when we're adding an extended register where the extending
 ;; operation can get folded into the add itself.
-(rule (lower (has_type (fits_in_64 ty) (iadd x (extended_value_from_value y))))
+(rule 0 (lower (has_type (fits_in_64 ty) (iadd x (extended_value_from_value y))))
       (add_extend ty x y))
 
-(rule (lower (has_type (fits_in_64 ty) (iadd (extended_value_from_value x) y)))
+(rule 1 (lower (has_type (fits_in_64 ty) (iadd (extended_value_from_value x) y)))
       (add_extend ty y x))
 
 ;; Special cases for when we're adding the shift of a different
 ;; register by a constant amount and the shift can get folded into the add.
-(rule (lower (has_type (fits_in_64 ty)
+(rule 7 (lower (has_type (fits_in_64 ty)
                        (iadd x (ishl y (iconst k)))))
       (if-let amt (lshl_from_imm64 ty k))
       (add_shift ty x y amt))
 
-(rule (lower (has_type (fits_in_64 ty)
+(rule 6 (lower (has_type (fits_in_64 ty)
                        (iadd (ishl x (iconst k)) y)))
       (if-let amt (lshl_from_imm64 ty k))
       (add_shift ty y x amt))
 
 ;; Fold an `iadd` and `imul` combination into a `madd` instruction.
-(rule (lower (has_type (fits_in_64 ty) (iadd x (imul y z))))
+(rule 7 (lower (has_type (fits_in_64 ty) (iadd x (imul y z))))
       (madd ty y z x))
 
-(rule (lower (has_type (fits_in_64 ty) (iadd (imul x y) z)))
+(rule 6 (lower (has_type (fits_in_64 ty) (iadd (imul x y) z)))
       (madd ty x y z))
 
 ;; Fold an `isub` and `imul` combination into a `msub` instruction.
@@ -88,11 +90,11 @@
 
 ;; vectors
 
-(rule (lower (has_type ty @ (multi_lane _ _) (iadd x y)))
+(rule -2 (lower (has_type ty @ (multi_lane _ _) (iadd x y)))
       (add_vec x y (vector_size ty)))
 
 ;; `i128`
-(rule (lower (has_type $I128 (iadd x y)))
+(rule -3 (lower (has_type $I128 (iadd x y)))
       (let
           ;; Get the high/low registers for `x`.
           ((x_regs ValueRegs x)
@@ -142,10 +144,10 @@
 (rule (lower (has_type $F64X2 (scalar_to_vector x)))
       (fpu_extend x (ScalarSize.Size64)))
 
-(rule (lower (scalar_to_vector x @ (value_type (ty_int_bool_64 _))))
+(rule -1 (lower (scalar_to_vector x @ (value_type (ty_int_bool_64 _))))
       (mov_to_fpu x (ScalarSize.Size64)))
 
-(rule (lower (scalar_to_vector x @ (value_type (int_bool_fits_in_32 _))))
+(rule -2 (lower (scalar_to_vector x @ (value_type (int_bool_fits_in_32 _))))
       (mov_to_fpu (put_in_reg_zext32 x) (ScalarSize.Size32)))
 
 ;;;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -183,7 +185,7 @@
 ;; mov xm, vn.d[0]
 ;; cmp xm, #0
 ;; cset xm, ne
-(rule (lower (has_type out_ty (vall_true x @ (value_type (lane_fits_in_32 ty)))))
+(rule -1 (lower (has_type out_ty (vall_true x @ (value_type (lane_fits_in_32 ty)))))
       (if (not_vec32x2 ty))
       (let ((x1 Reg (vec_lanes (VecLanesOp.Uminv) x (vector_size ty)))
             (x2 Reg (mov_from_vec x1 0 (ScalarSize.Size64))))
@@ -214,7 +216,7 @@
       (if-let z (same_value x y))
       (uaddlp16 z))
 
-(rule (lower (has_type ty (iadd_pairwise x y)))
+(rule -1 (lower (has_type ty (iadd_pairwise x y)))
       (addp x y (vector_size ty)))
 
 ;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -222,10 +224,10 @@
 (rule (lower (has_type ty @ (multi_lane _ _) (iabs x)))
       (vec_abs x (vector_size ty)))
 
-(rule (lower (has_type $I64 (iabs x)))
+(rule 2 (lower (has_type $I64 (iabs x)))
       (abs (OperandSize.Size64) x))
 
-(rule (lower (has_type (fits_in_32 ty) (iabs x)))
+(rule 1 (lower (has_type (fits_in_32 ty) (iabs x)))
       (abs (OperandSize.Size32) (put_in_reg_sext32 x)))
 
 ;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -241,7 +243,7 @@
             (sum Reg (add_vec x y (VectorSize.Size64x2))))
        (add_vec c sum (VectorSize.Size64x2))))
 
-(rule (lower (has_type (lane_fits_in_32 ty) (avg_round x y)))
+(rule -1 (lower (has_type (lane_fits_in_32 ty) (avg_round x y)))
       (vec_rrr (VecALUOp.Urhadd) x y (vector_size ty)))
 
 ;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -251,7 +253,7 @@
 
 ;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fadd rn rm)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (fadd rn rm)))
       (vec_rrr (VecALUOp.Fadd) rn rm (vector_size ty)))
 
 (rule (lower (has_type (ty_scalar_float ty) (fadd rn rm)))
@@ -259,7 +261,7 @@
 
 ;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fsub rn rm)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (fsub rn rm)))
       (vec_rrr (VecALUOp.Fsub) rn rm (vector_size ty)))
 
 (rule (lower (has_type (ty_scalar_float ty) (fsub rn rm)))
@@ -267,7 +269,7 @@
 
 ;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fmul rn rm)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (fmul rn rm)))
       (vec_rrr (VecALUOp.Fmul) rn rm (vector_size ty)))
 
 (rule (lower (has_type (ty_scalar_float ty) (fmul rn rm)))
@@ -275,7 +277,7 @@
 
 ;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fdiv rn rm)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (fdiv rn rm)))
       (vec_rrr (VecALUOp.Fdiv) rn rm (vector_size ty)))
 
 (rule (lower (has_type (ty_scalar_float ty) (fdiv rn rm)))
@@ -283,7 +285,7 @@
 
 ;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fmin rn rm)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (fmin rn rm)))
       (vec_rrr (VecALUOp.Fmin) rn rm (vector_size ty)))
 
 (rule (lower (has_type (ty_scalar_float ty) (fmin rn rm)))
@@ -291,7 +293,7 @@
 
 ;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fmax rn rm)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (fmax rn rm)))
       (vec_rrr (VecALUOp.Fmax) rn rm (vector_size ty)))
 
 (rule (lower (has_type (ty_scalar_float ty) (fmax rn rm)))
@@ -299,7 +301,7 @@
 
 ;;;; Rules for `fmin_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fmin_pseudo rm rn)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (fmin_pseudo rm rn)))
       (bsl ty (vec_rrr (VecALUOp.Fcmgt) rm rn (vector_size ty)) rn rm))
 
 (rule (lower (has_type (ty_scalar_float ty) (fmin_pseudo rm rn)))
@@ -308,7 +310,7 @@
 
 ;;;; Rules for `fmax_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fmax_pseudo rm rn)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (fmax_pseudo rm rn)))
       (bsl ty (vec_rrr (VecALUOp.Fcmgt) rn rm (vector_size ty)) rn rm))
 
 (rule (lower (has_type (ty_scalar_float ty) (fmax_pseudo rm rn)))
@@ -317,7 +319,7 @@
 
 ;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (sqrt x)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (sqrt x)))
       (vec_misc (VecMisc2.Fsqrt) x (vector_size ty)))
 
 (rule (lower (has_type (ty_scalar_float ty) (sqrt x)))
@@ -325,7 +327,7 @@
 
 ;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fneg x)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (fneg x)))
       (vec_misc (VecMisc2.Fneg) x (vector_size ty)))
 
 (rule (lower (has_type (ty_scalar_float ty) (fneg x)))
@@ -333,7 +335,7 @@
 
 ;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fabs x)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (fabs x)))
       (vec_misc (VecMisc2.Fabs) x (vector_size ty)))
 
 (rule (lower (has_type (ty_scalar_float ty) (fabs x)))
@@ -351,7 +353,7 @@
 
 ;;;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (ceil x)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (ceil x)))
       (vec_misc (VecMisc2.Frintp) x (vector_size ty)))
 
 (rule (lower (has_type $F32 (ceil x)))
@@ -362,7 +364,7 @@
 
 ;;;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (floor x)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (floor x)))
       (vec_misc (VecMisc2.Frintm) x (vector_size ty)))
 
 (rule (lower (has_type $F32 (floor x)))
@@ -373,7 +375,7 @@
 
 ;;;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (trunc x)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (trunc x)))
       (vec_misc (VecMisc2.Frintz) x (vector_size ty)))
 
 (rule (lower (has_type $F32 (trunc x)))
@@ -384,7 +386,7 @@
 
 ;;;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (nearest x)))
+(rule -1 (lower (has_type ty @ (multi_lane _ _) (nearest x)))
       (vec_misc (VecMisc2.Frintn) x (vector_size ty)))
 
 (rule (lower (has_type $F32 (nearest x)))
@@ -398,7 +400,7 @@
 (rule (lower (has_type ty @ (multi_lane _ _) (fma x y z)))
       (vec_rrr_mod (VecALUModOp.Fmla) z x y (vector_size ty)))
 
-(rule (lower (has_type (ty_scalar_float ty) (fma x y z)))
+(rule 1 (lower (has_type (ty_scalar_float ty) (fma x y z)))
       (fpu_rrrr (FPUOp3.MAdd) (scalar_size ty) x y z))
 
 ;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -411,13 +413,13 @@
 (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint x @ (value_type $F32))))
       (fpu_to_int_cvt (FpuToIntOp.F32ToU32) x $false $F32 out_ty))
 
-(rule (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F32))))
+(rule 1 (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F32))))
       (fpu_to_int_cvt (FpuToIntOp.F32ToU64) x $false $F32 $I64))
 
 (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint x @ (value_type $F64))))
       (fpu_to_int_cvt (FpuToIntOp.F64ToU32) x $false $F64 out_ty))
 
-(rule (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F64))))
+(rule 1 (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F64))))
       (fpu_to_int_cvt (FpuToIntOp.F64ToU64) x $false $F64 $I64))
 
 ;;;; Rules for `fcvt_to_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -425,21 +427,21 @@
 (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint x @ (value_type $F32))))
       (fpu_to_int_cvt (FpuToIntOp.F32ToI32) x $true $F32 out_ty))
 
-(rule (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F32))))
+(rule 1 (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F32))))
       (fpu_to_int_cvt (FpuToIntOp.F32ToI64) x $true $F32 $I64))
 
 (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint x @ (value_type $F64))))
       (fpu_to_int_cvt (FpuToIntOp.F64ToI32) x $true $F64 out_ty))
 
-(rule (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F64))))
+(rule 1 (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F64))))
       (fpu_to_int_cvt (FpuToIntOp.F64ToI64) x $true $F64 $I64))
 
 ;;;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_uint x @ (value_type (multi_lane 32 _)))))
+(rule -1 (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_uint x @ (value_type (multi_lane 32 _)))))
       (vec_misc (VecMisc2.Ucvtf) x (vector_size ty)))
 
-(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_uint x @ (value_type (multi_lane 64 _)))))
+(rule -1 (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_uint x @ (value_type (multi_lane 64 _)))))
       (vec_misc (VecMisc2.Ucvtf) x (vector_size ty)))
 
 (rule (lower (has_type $F32 (fcvt_from_uint x @ (value_type (fits_in_32 _)))))
@@ -448,18 +450,18 @@
 (rule (lower (has_type $F64 (fcvt_from_uint x @ (value_type (fits_in_32 _)))))
       (int_to_fpu (IntToFpuOp.U32ToF64) (put_in_reg_zext32 x)))
 
-(rule (lower (has_type $F32 (fcvt_from_uint x @ (value_type $I64))))
+(rule 1 (lower (has_type $F32 (fcvt_from_uint x @ (value_type $I64))))
       (int_to_fpu (IntToFpuOp.U64ToF32) x))
 
-(rule (lower (has_type $F64 (fcvt_from_uint x @ (value_type $I64))))
+(rule 1 (lower (has_type $F64 (fcvt_from_uint x @ (value_type $I64))))
       (int_to_fpu (IntToFpuOp.U64ToF64) x))
 
 ;;;; Rules for `fcvt_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_sint x @ (value_type (multi_lane 32 _)))))
+(rule -1 (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_sint x @ (value_type (multi_lane 32 _)))))
       (vec_misc (VecMisc2.Scvtf) x (vector_size ty)))
 
-(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_sint x @ (value_type (multi_lane 64 _)))))
+(rule -1 (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_sint x @ (value_type (multi_lane 64 _)))))
       (vec_misc (VecMisc2.Scvtf) x (vector_size ty)))
 
 (rule (lower (has_type $F32 (fcvt_from_sint x @ (value_type (fits_in_32 _)))))
@@ -468,50 +470,50 @@
 (rule (lower (has_type $F64 (fcvt_from_sint x @ (value_type (fits_in_32 _)))))
       (int_to_fpu (IntToFpuOp.I32ToF64) (put_in_reg_sext32 x)))
 
-(rule (lower (has_type $F32 (fcvt_from_sint x @ (value_type $I64))))
+(rule 1 (lower (has_type $F32 (fcvt_from_sint x @ (value_type $I64))))
       (int_to_fpu (IntToFpuOp.I64ToF32) x))
 
-(rule (lower (has_type $F64 (fcvt_from_sint x @ (value_type $I64))))
+(rule 1 (lower (has_type $F64 (fcvt_from_sint x @ (value_type $I64))))
       (int_to_fpu (IntToFpuOp.I64ToF64) x))
 
 ;;;; Rules for `fcvt_to_uint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 32 _)))))
+(rule -1 (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 32 _)))))
       (vec_misc (VecMisc2.Fcvtzu) x (vector_size ty)))
 
-(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 64 _)))))
+(rule -1 (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 64 _)))))
       (vec_misc (VecMisc2.Fcvtzu) x (vector_size ty)))
 
 (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F32))))
       (fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false out_ty))
 
-(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F32))))
+(rule 1 (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F32))))
       (fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $I64))
 
 (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F64))))
       (fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false out_ty))
 
-(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F64))))
+(rule 1 (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F64))))
       (fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $I64))
 
 ;;;; Rules for `fcvt_to_sint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 32 _)))))
+(rule -1 (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 32 _)))))
       (vec_misc (VecMisc2.Fcvtzs) x (vector_size ty)))
 
-(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 64 _)))))
+(rule -1 (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 64 _)))))
       (vec_misc (VecMisc2.Fcvtzs) x (vector_size ty)))
 
 (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F32))))
       (fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true out_ty))
 
-(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F32))))
+(rule 1 (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F32))))
       (fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $I64))
 
 (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F64))))
       (fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true out_ty))
 
-(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F64))))
+(rule 1 (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F64))))
       (fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $I64))
 
 ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -519,36 +521,36 @@
 ;; `i64` and smaller
 
 ;; Base case, simply subtracting things in registers.
-(rule (lower (has_type (fits_in_64 ty) (isub x y)))
+(rule -4 (lower (has_type (fits_in_64 ty) (isub x y)))
       (sub ty x y))
 
 ;; Special case for when one operand is an immediate that fits in 12 bits.
-(rule (lower (has_type (fits_in_64 ty) (isub x (imm12_from_value y))))
+(rule 0 (lower (has_type (fits_in_64 ty) (isub x (imm12_from_value y))))
       (sub_imm ty x y))
 
 ;; Same as the previous special case, except we can switch the subtraction to an
 ;; addition if the negated immediate fits in 12 bits.
-(rule (lower (has_type (fits_in_64 ty) (isub x (imm12_from_negated_value y))))
+(rule 2 (lower (has_type (fits_in_64 ty) (isub x (imm12_from_negated_value y))))
       (add_imm ty x y))
 
 ;; Special cases for when we're subtracting an extended register where the
 ;; extending operation can get folded into the sub itself.
-(rule (lower (has_type (fits_in_64 ty) (isub x (extended_value_from_value y))))
+(rule 1 (lower (has_type (fits_in_64 ty) (isub x (extended_value_from_value y))))
       (sub_extend ty x y))
 
 ;; Finally a special case for when we're subtracting the shift of a different
 ;; register by a constant amount and the shift can get folded into the sub.
-(rule (lower (has_type (fits_in_64 ty)
+(rule -3 (lower (has_type (fits_in_64 ty)
                        (isub x (ishl y (iconst k)))))
       (if-let amt (lshl_from_imm64 ty k))
       (sub_shift ty x y amt))
 
 ;; vectors
-(rule (lower (has_type ty @ (multi_lane _ _) (isub x y)))
+(rule -2 (lower (has_type ty @ (multi_lane _ _) (isub x y)))
       (sub_vec x y (vector_size ty)))
 
 ;; `i128`
-(rule (lower (has_type $I128 (isub x y)))
+(rule -1 (lower (has_type $I128 (isub x y)))
       (let
           ;; Get the high/low registers for `x`.
           ((x_regs ValueRegs x)
@@ -588,7 +590,7 @@
 ;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; `i64` and smaller.
-(rule (lower (has_type (fits_in_64 ty) (ineg x)))
+(rule 1 (lower (has_type (fits_in_64 ty) (ineg x)))
       (sub ty (zero_reg) x))
 
 ;; vectors.
@@ -598,11 +600,11 @@
 ;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; `i64` and smaller.
-(rule (lower (has_type (fits_in_64 ty) (imul x y)))
+(rule -3 (lower (has_type (fits_in_64 ty) (imul x y)))
       (madd ty x y (zero_reg)))
 
 ;; `i128`.
-(rule (lower (has_type $I128 (imul x y)))
+(rule -1 (lower (has_type $I128 (imul x y)))
       (let
           ;; Get the high/low registers for `x`.
           ((x_regs ValueRegs x)
@@ -630,7 +632,7 @@
         (value_regs dst_lo dst_hi)))
 
 ;; Case for i8x16, i16x8, and i32x4.
-(rule (lower (has_type (ty_vec128 ty @ (not_i64x2)) (imul x y)))
+(rule -2 (lower (has_type (ty_vec128 ty @ (not_i64x2)) (imul x y)))
       (mul x y (vector_size ty)))
 
 ;; Special lowering for i64x2.
@@ -662,7 +664,7 @@
 ;;  xtn tmp2.2s, rm.2d
 ;;  shll rd.2d, rd.2s, #32
 ;;  umlal rd.2d, tmp2.2s, tmp1.2s
-(rule (lower (has_type $I64X2 (imul x y)))
+(rule -1 (lower (has_type $I64X2 (imul x y)))
       (let ((rn Reg x)
             (rm Reg y)
             ;; Reverse the 32-bit elements in the 64-bit words.
@@ -774,7 +776,7 @@
 
 ;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type $I64 (smulhi x y)))
+(rule 1 (lower (has_type $I64 (smulhi x y)))
       (smulh $I64 x y))
 
 (rule (lower (has_type (fits_in_32 ty) (smulhi x y)))
@@ -786,7 +788,7 @@
 
 ;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type $I64 (umulhi x y)))
+(rule 1 (lower (has_type $I64 (umulhi x y)))
       (umulh $I64 x y))
 
 (rule (lower (has_type (fits_in_32 ty) (umulhi x y)))
@@ -810,7 +812,7 @@
 
 ;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
 (decl put_nonzero_in_reg_zext64 (Value) Reg)
-(rule (put_nonzero_in_reg_zext64 val)
+(rule -1 (put_nonzero_in_reg_zext64 val)
       (trap_if_zero_divisor (put_in_reg_zext64 val)))
 
 ;; Special case where if a `Value` is known to be nonzero we can trivially
@@ -851,12 +853,12 @@
 
 ;; Special case for `sdiv` where no checks are needed due to division by a
 ;; constant meaning the checks are always passed.
-(rule (lower (has_type (fits_in_64 ty) (sdiv x (iconst (safe_divisor_from_imm64 y)))))
+(rule 1 (lower (has_type (fits_in_64 ty) (sdiv x (iconst (safe_divisor_from_imm64 y)))))
       (a64_sdiv $I64 (put_in_reg_sext64 x) (imm ty (ImmExtend.Sign) y)))
 
 ;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero.
 (decl put_nonzero_in_reg_sext64 (Value) Reg)
-(rule (put_nonzero_in_reg_sext64 val)
+(rule -1 (put_nonzero_in_reg_sext64 val)
       (trap_if_zero_divisor (put_in_reg_sext64 val)))
 
 ;; Note that this has a special case where if the `Value` is a constant that's
@@ -898,51 +900,51 @@
 (rule (lower (has_type ty @ (not_i64x2) (imin x y)))
       (vec_rrr (VecALUOp.Smin) x y (vector_size ty)))
 
-(rule (lower (has_type $I64X2 (imin x y)))
+(rule 1 (lower (has_type $I64X2 (imin x y)))
       (bsl $I64X2 (vec_rrr (VecALUOp.Cmgt) y x (VectorSize.Size64x2)) x y))
 
 (rule (lower (has_type ty @ (not_i64x2) (umin x y)))
       (vec_rrr (VecALUOp.Umin) x y (vector_size ty)))
 
-(rule (lower (has_type $I64X2 (umin x y)))
+(rule 1 (lower (has_type $I64X2 (umin x y)))
       (bsl $I64X2 (vec_rrr (VecALUOp.Cmhi) y x (VectorSize.Size64x2)) x y))
 
 (rule (lower (has_type ty @ (not_i64x2) (imax x y)))
       (vec_rrr (VecALUOp.Smax) x y (vector_size ty)))
 
-(rule (lower (has_type $I64X2 (imax x y)))
+(rule 1 (lower (has_type $I64X2 (imax x y)))
       (bsl $I64X2 (vec_rrr (VecALUOp.Cmgt) x y (VectorSize.Size64x2)) x y))
 
 (rule (lower (has_type ty @ (not_i64x2) (umax x y)))
       (vec_rrr (VecALUOp.Umax) x y (vector_size ty)))
 
-(rule (lower (has_type $I64X2 (umax x y)))
+(rule 1 (lower (has_type $I64X2 (umax x y)))
       (bsl $I64X2 (vec_rrr (VecALUOp.Cmhi) x y (VectorSize.Size64x2)) x y))
 
 ;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; General rule for extending input to an output which fits in a single
 ;; register.
-(rule (lower (has_type (fits_in_64 out) (uextend x @ (value_type in))))
+(rule -2 (lower (has_type (fits_in_64 out) (uextend x @ (value_type in))))
       (extend x $false (ty_bits in) (ty_bits out)))
 
 ;; Extraction of a vector lane automatically extends as necessary, so we can
 ;; skip an explicit extending instruction.
-(rule (lower (has_type (fits_in_64 out)
+(rule 1 (lower (has_type (fits_in_64 out)
                        (uextend (extractlane vec @ (value_type in)
                                              (u8_from_uimm8 lane)))))
       (mov_from_vec (put_in_reg vec) lane (lane_size in)))
 
 ;; Atomic loads will also automatically zero their upper bits so the `uextend`
 ;; instruction can effectively get skipped here.
-(rule (lower (has_type (fits_in_64 out)
+(rule 1 (lower (has_type (fits_in_64 out)
                        (uextend x @ (and (value_type in) (atomic_load flags _)))))
       (if-let mem_op (is_sinkable_inst x))
       (load_acquire in flags (sink_atomic_load mem_op)))
 
 ;; Conversion to 128-bit needs a zero-extension of the lower bits and the upper
 ;; bits are all zero.
-(rule (lower (has_type $I128 (uextend x)))
+(rule -1 (lower (has_type $I128 (uextend x)))
       (value_regs (put_in_reg_zext64 x) (imm $I64 (ImmExtend.Zero) 0)))
 
 ;; Like above where vector extraction automatically zero-extends extending to
@@ -956,12 +958,12 @@
 
 ;; General rule for extending input to an output which fits in a single
 ;; register.
-(rule (lower (has_type (fits_in_64 out) (sextend x @ (value_type in))))
+(rule -4 (lower (has_type (fits_in_64 out) (sextend x @ (value_type in))))
       (extend x $true (ty_bits in) (ty_bits out)))
 
 ;; Extraction of a vector lane automatically extends as necessary, so we can
 ;; skip an explicit extending instruction.
-(rule (lower (has_type (fits_in_64 out)
+(rule -3 (lower (has_type (fits_in_64 out)
                        (sextend (extractlane vec @ (value_type in)
                                              (u8_from_uimm8 lane)))))
       (mov_from_vec_signed (put_in_reg vec)
@@ -970,7 +972,7 @@
                            (size_from_ty out)))
 
 ;; 64-bit to 128-bit only needs to sign-extend the input to the upper bits.
-(rule (lower (has_type $I128 (sextend x)))
+(rule -2 (lower (has_type $I128 (sextend x)))
       (let ((lo Reg (put_in_reg_sext64 x))
             (hi Reg (asr_imm $I64 lo (imm_shift_from_u8 63))))
         (value_regs lo hi)))
@@ -991,7 +993,7 @@
         (value_regs lo hi)))
 
 ;; Extension from an extraction of i64x2 into i128.
-(rule (lower (has_type $I128
+(rule -1 (lower (has_type $I128
                        (sextend (extractlane vec @ (value_type $I64X2)
                                              (u8_from_uimm8 lane)))))
       (let ((lo Reg (mov_from_vec (put_in_reg vec)
@@ -1007,12 +1009,12 @@
 ;; Note that bitwise negation is implemented here as
 ;;
 ;;      NOT rd, rm ==> ORR_NOT rd, zero, rm
-(rule (lower (has_type (fits_in_64 ty) (bnot x)))
+(rule -1 (lower (has_type (fits_in_64 ty) (bnot x)))
       (orr_not ty (zero_reg) x))
 
 ;; Special case to use `orr_not_shift` if it's a `bnot` of a const-left-shifted
 ;; value.
-(rule (lower (has_type (fits_in_64 ty)
+(rule 1 (lower (has_type (fits_in_64 ty)
                        (bnot (ishl x (iconst k)))))
       (if-let amt (lshl_from_imm64 ty k))
       (orr_not_shift ty (zero_reg) x amt))
@@ -1027,12 +1029,12 @@
         (value_regs new_lo new_hi)))
 
 ;; Implementation of `bnot` for vector types.
-(rule (lower (has_type (ty_vec128 ty) (bnot x)))
+(rule -2 (lower (has_type (ty_vec128 ty) (bnot x)))
       (not x (vector_size ty)))
 
 ;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type (fits_in_32 ty) (band x y)))
+(rule -1 (lower (has_type (fits_in_32 ty) (band x y)))
       (alu_rs_imm_logic_commutative (ALUOp.And) ty x y))
 
 (rule (lower (has_type $I64 (band x y)))
@@ -1040,12 +1042,12 @@
 
 (rule (lower (has_type $I128 (band x y))) (i128_alu_bitop (ALUOp.And) $I64 x y))
 
-(rule (lower (has_type (ty_vec128 ty) (band x y)))
+(rule -2 (lower (has_type (ty_vec128 ty) (band x y)))
       (and_vec x y (vector_size ty)))
 
 ;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type (fits_in_32 ty) (bor x y)))
+(rule -1 (lower (has_type (fits_in_32 ty) (bor x y)))
       (alu_rs_imm_logic_commutative (ALUOp.Orr) ty x y))
 
 (rule (lower (has_type $I64 (bor x y)))
@@ -1053,12 +1055,12 @@
 
 (rule (lower (has_type $I128 (bor x y))) (i128_alu_bitop (ALUOp.Orr) $I64 x y))
 
-(rule (lower (has_type (ty_vec128 ty) (bor x y)))
+(rule -2 (lower (has_type (ty_vec128 ty) (bor x y)))
       (orr_vec x y (vector_size ty)))
 
 ;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type (fits_in_32 ty) (bxor x y)))
+(rule -1 (lower (has_type (fits_in_32 ty) (bxor x y)))
       (alu_rs_imm_logic_commutative (ALUOp.Eor) ty x y))
 
 (rule (lower (has_type $I64 (bxor x y)))
@@ -1066,12 +1068,12 @@
 
 (rule (lower (has_type $I128 (bxor x y))) (i128_alu_bitop (ALUOp.Eor) $I64 x y))
 
-(rule (lower (has_type (ty_vec128 ty) (bxor x y)))
+(rule -2 (lower (has_type (ty_vec128 ty) (bxor x y)))
       (eor_vec x y (vector_size ty)))
 
 ;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type (fits_in_32 ty) (band_not x y)))
+(rule -1 (lower (has_type (fits_in_32 ty) (band_not x y)))
       (alu_rs_imm_logic (ALUOp.AndNot) ty x y))
 
 (rule (lower (has_type $I64 (band_not x y)))
@@ -1079,12 +1081,12 @@
 
 (rule (lower (has_type $I128 (band_not x y))) (i128_alu_bitop (ALUOp.AndNot) $I64 x y))
 
-(rule (lower (has_type (ty_vec128 ty) (band_not x y)))
+(rule -2 (lower (has_type (ty_vec128 ty) (band_not x y)))
       (bic_vec x y (vector_size ty)))
 
 ;;;; Rules for `bor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type (fits_in_32 ty) (bor_not x y)))
+(rule -1 (lower (has_type (fits_in_32 ty) (bor_not x y)))
       (alu_rs_imm_logic (ALUOp.OrrNot) ty x y))
 
 (rule (lower (has_type $I64 (bor_not x y)))
@@ -1094,7 +1096,7 @@
 
 ;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type (fits_in_32 ty) (bxor_not x y)))
+(rule -1 (lower (has_type (fits_in_32 ty) (bxor_not x y)))
       (alu_rs_imm_logic (ALUOp.EorNot) $I32 x y))
 
 (rule (lower (has_type $I64 (bxor_not x y)))
@@ -1105,7 +1107,7 @@
 ;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; Shift for i8/i16/i32.
-(rule (lower (has_type (fits_in_32 ty) (ishl x y)))
+(rule -1 (lower (has_type (fits_in_32 ty) (ishl x y)))
       (do_shift (ALUOp.Lsl) ty x y))
 
 ;; Shift for i64.
@@ -1143,7 +1145,7 @@
           (csel (Cond.Ne) lo_lshift maybe_hi)))))
 
 ;; Shift for vector types.
-(rule (lower (has_type (ty_vec128 ty) (ishl x y)))
+(rule -2 (lower (has_type (ty_vec128 ty) (ishl x y)))
       (let ((size VectorSize (vector_size ty))
             (masked_shift_amt Reg (and_imm $I32 y (shift_mask ty)))
             (shift Reg (vec_dup masked_shift_amt size)))
@@ -1166,7 +1168,7 @@
 ;; On i32 and i64 types this matches what the aarch64 spec does, but on smaller
 ;; types (i16, i8) we need to do this manually, so we wrap the shift amount
 ;; with an AND instruction
-(rule (do_shift op (fits_in_16 ty) x y)
+(rule -1 (do_shift op (fits_in_16 ty) x y)
       (let ((shift_amt Reg (value_regs_get y 0))
             (masked_shift_amt Reg (and_imm $I32 shift_amt (shift_mask ty))))
         (alu_rrr op $I32 x masked_shift_amt)))
@@ -1191,7 +1193,7 @@
 ;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; Shift for i8/i16/i32.
-(rule (lower (has_type (fits_in_32 ty) (ushr x y)))
+(rule -1 (lower (has_type (fits_in_32 ty) (ushr x y)))
       (do_shift (ALUOp.Lsr) ty (put_in_reg_zext32 x) y))
 
 ;; Shift for i64.
@@ -1203,7 +1205,7 @@
       (lower_ushr128 x (value_regs_get y 0)))
 
 ;; Vector shifts.
-(rule (lower (has_type (ty_vec128 ty) (ushr x y)))
+(rule -2 (lower (has_type (ty_vec128 ty) (ushr x y)))
       (let ((size VectorSize (vector_size ty))
             (masked_shift_amt Reg (and_imm $I32 y (shift_mask ty)))
             (shift Reg (vec_dup (sub $I64 (zero_reg) masked_shift_amt) size)))
@@ -1239,7 +1241,7 @@
 ;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; Shift for i8/i16/i32.
-(rule (lower (has_type (fits_in_32 ty) (sshr x y)))
+(rule -2 (lower (has_type (fits_in_32 ty) (sshr x y)))
       (do_shift (ALUOp.Asr) ty (put_in_reg_sext32 x) y))
 
 ;; Shift for i64.
@@ -1253,7 +1255,7 @@
 ;; Vector shifts.
 ;;
 ;; Note that right shifts are implemented with a negative left shift.
-(rule (lower (has_type (ty_vec128 ty) (sshr x y)))
+(rule -1 (lower (has_type (ty_vec128 ty) (sshr x y)))
       (let ((size VectorSize (vector_size ty))
             (masked_shift_amt Reg (and_imm $I32 y (shift_mask ty)))
             (shift Reg (vec_dup (sub $I64 (zero_reg) masked_shift_amt) size)))
@@ -1291,13 +1293,13 @@
 ;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; General 8/16-bit case.
-(rule (lower (has_type (fits_in_16 ty) (rotl x y)))
+(rule -2 (lower (has_type (fits_in_16 ty) (rotl x y)))
       (let ((amt Reg (value_regs_get y 0))
             (neg_shift Reg (sub $I32 (zero_reg) amt)))
         (small_rotr ty (put_in_reg_zext32 x) neg_shift)))
 
 ;; Specialization for the 8/16-bit case when the rotation amount is an immediate.
-(rule (lower (has_type (fits_in_16 ty) (rotl x (iconst k))))
+(rule -1 (lower (has_type (fits_in_16 ty) (rotl x (iconst k))))
       (if-let n (imm_shift_from_imm64 ty k))
       (small_rotr_imm ty (put_in_reg_zext32 x) (negate_imm_shift ty n)))
 
@@ -1322,12 +1324,12 @@
         (a64_rotr $I64 x neg_shift)))
 
 ;; Specialization for the 32-bit case when the rotation amount is an immediate.
-(rule (lower (has_type $I32 (rotl x (iconst k))))
+(rule 1 (lower (has_type $I32 (rotl x (iconst k))))
       (if-let n (imm_shift_from_imm64 $I32 k))
       (a64_rotr_imm $I32 x (negate_imm_shift $I32 n)))
 
 ;; Specialization for the 64-bit case when the rotation amount is an immediate.
-(rule (lower (has_type $I64 (rotl x (iconst k))))
+(rule 1 (lower (has_type $I64 (rotl x (iconst k))))
       (if-let n (imm_shift_from_imm64 $I64 k))
       (a64_rotr_imm $I64 x (negate_imm_shift $I64 n)))
 
@@ -1350,19 +1352,19 @@
 ;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; General 8/16-bit case.
-(rule (lower (has_type (fits_in_16 ty) (rotr x y)))
+(rule -3 (lower (has_type (fits_in_16 ty) (rotr x y)))
       (small_rotr ty (put_in_reg_zext32 x) (value_regs_get y 0)))
 
 ;; General 32-bit case.
-(rule (lower (has_type $I32 (rotr x y)))
+(rule -1 (lower (has_type $I32 (rotr x y)))
       (a64_rotr $I32 x (value_regs_get y 0)))
 
 ;; General 64-bit case.
-(rule (lower (has_type $I64 (rotr x y)))
+(rule -1 (lower (has_type $I64 (rotr x y)))
       (a64_rotr $I64 x (value_regs_get y 0)))
 
 ;; Specialization for the 8/16-bit case when the rotation amount is an immediate.
-(rule (lower (has_type (fits_in_16 ty) (rotr x (iconst k))))
+(rule -2 (lower (has_type (fits_in_16 ty) (rotr x (iconst k))))
       (if-let n (imm_shift_from_imm64 ty k))
       (small_rotr_imm ty (put_in_reg_zext32 x) n))
 
@@ -1451,7 +1453,7 @@
             (hi_rev Reg (rbit $I64 (value_regs_get val 1))))
         (value_regs hi_rev lo_rev)))
 
-(rule (lower (has_type ty (bitrev x)))
+(rule -1 (lower (has_type ty (bitrev x)))
       (rbit ty x))
 
 
@@ -1466,7 +1468,7 @@
 (rule (lower (has_type $I128 (clz x)))
       (lower_clz128 x))
 
-(rule (lower (has_type ty (clz x)))
+(rule -1 (lower (has_type ty (clz x)))
       (a64_clz ty x))
 
 ;; clz hi_clz, hi
@@ -1499,7 +1501,7 @@
             (hi Reg (rbit $I64 (value_regs_get val 1))))
         (lower_clz128 (value_regs hi lo))))
 
-(rule (lower (has_type ty (ctz x)))
+(rule -1 (lower (has_type ty (ctz x)))
       (a64_clz ty (rbit ty x)))
 
 ;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1533,7 +1535,7 @@
                            (csel (Cond.Eq) lo_sign_bits (zero_reg)))))
         (value_regs (add $I64 maybe_lo hi_cls) (imm $I64 (ImmExtend.Zero) 0))))
 
-(rule (lower (has_type ty (cls x)))
+(rule -1 (lower (has_type ty (cls x)))
       (a64_cls ty x))
 
 ;;;; Rules for `bint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1541,7 +1543,7 @@
 ;; Booleans are stored as all-zeroes (0) or all-ones (-1). We AND
 ;; out the LSB to give a 0 / 1-valued integer result.
 
-(rule (lower (has_type $I128 (bint x)))
+(rule 1 (lower (has_type $I128 (bint x)))
       (let ((val ValueRegs x)
             (in_lo Reg (value_regs_get val 0))
             (dst_lo Reg (and_imm $I32 in_lo (u64_into_imm_logic $I32 1)))
@@ -1574,21 +1576,21 @@
 (rule (cast_bool $B64 (fits_in_64 _out) x) x)
 
 ;; Casting between 128 bits is a noop
-(rule (cast_bool (ty_int_bool_128 _in) (ty_int_bool_128 _out) x)
+(rule -1 (cast_bool (ty_int_bool_128 _in) (ty_int_bool_128 _out) x)
     x)
 
 ;; Converting from 128 bits to anything below we just ignore the top register
-(rule (cast_bool (ty_int_bool_128 _in) (fits_in_64 _out) x)
+(rule -2 (cast_bool (ty_int_bool_128 _in) (fits_in_64 _out) x)
     (value_regs_get x 0))
 
 ;; Extend to 64 bits first, then this will be all 0s or all 1s and we can
 ;; duplicate to both halves of 128 bits
-(rule (cast_bool in (ty_int_bool_128 _out) x)
+(rule -3 (cast_bool in (ty_int_bool_128 _out) x)
       (let ((tmp Reg (extend x $true (ty_bits in) 64)))
         (value_regs tmp tmp)))
 
 ;; Values that fit in a single register are sign extended normally
-(rule (cast_bool (fits_in_64 in) (fits_in_64 out) x)
+(rule -4 (cast_bool (fits_in_64 in) (fits_in_64 out) x)
       (extend x $true (ty_bits in) (ty_bits out)))
 
 ;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1653,7 +1655,7 @@
             (tmp2 Reg (bic ty y c)))
         (orr ty tmp1 tmp2)))
 
-(rule (lower (has_type (ty_vec128 ty) (bitselect c x y)))
+(rule 1 (lower (has_type (ty_vec128 ty) (bitselect c x y)))
         (bsl ty c x y))
 
 ;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1679,62 +1681,62 @@
 
 ;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x y)))
+(rule 4 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x y)))
       (if (zero_value y))
       (let ((rn Reg x)
             (vec_size VectorSize (vector_size ty)))
           (value_reg (not (fcmeq0 rn vec_size) vec_size))))
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x y)))
+(rule 3 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x y)))
       (if (zero_value y))
       (let ((rn Reg x)
             (vec_size VectorSize (vector_size ty)))
           (value_reg (float_cmp_zero cond rn vec_size))))
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x y)))
+(rule 2 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x y)))
       (if (zero_value x))
       (let ((rn Reg y)
             (vec_size VectorSize (vector_size ty)))
           (value_reg (not (fcmeq0 rn vec_size) vec_size))))
 
-(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x y)))
+(rule 1 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x y)))
       (if (zero_value x))
       (let ((rn Reg y)
             (vec_size VectorSize (vector_size ty)))
           (value_reg (float_cmp_zero_swap cond rn vec_size))))
 
-(rule (lower (has_type out_ty
+(rule 0 (lower (has_type out_ty
               (fcmp cond x @ (value_type (ty_scalar_float in_ty)) y)))
       (with_flags (fpu_cmp (scalar_size in_ty) x y)
                   (materialize_bool_result
                    (ty_bits out_ty)
                    (fp_cond_code cond))))
 
-(rule (lower (has_type out_ty (fcmp cond x @ (value_type in_ty) y)))
+(rule -1 (lower (has_type out_ty (fcmp cond x @ (value_type in_ty) y)))
       (if (ty_vector_float in_ty))
       (vec_cmp x y in_ty (fp_cond_code cond)))
 
 ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) x y)))
+(rule 3 (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) x y)))
       (if (zero_value y))
       (let ((rn Reg x)
             (vec_size VectorSize (vector_size ty)))
           (value_reg (not (cmeq0 rn vec_size) vec_size))))
 
-(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) x y)))
+(rule 2 (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) x y)))
       (if (zero_value y))
       (let ((rn Reg x)
             (vec_size VectorSize (vector_size ty)))
           (value_reg (int_cmp_zero cond rn vec_size))))
 
-(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) x y)))
+(rule 1 (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) x y)))
       (if (zero_value x))
       (let ((rn Reg y)
             (vec_size VectorSize (vector_size ty)))
           (value_reg (not (cmeq0 rn vec_size) vec_size))))
 
-(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) x y)))
+(rule 0 (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) x y)))
       (if (zero_value x))
       (let ((rn Reg y)
             (vec_size VectorSize (vector_size ty)))
@@ -1858,17 +1860,17 @@
 (rule (lower (has_type (ty_vec128 _) (vconst (u128_from_constant x))))
       (constant_f128 x))
 
-(rule (lower (has_type ty (vconst (u64_from_constant x))))
+(rule 1 (lower (has_type ty (vconst (u64_from_constant x))))
       (if (ty_vec64 ty))
       (constant_f64 x))
 
 ;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty (splat x @ (value_type in_ty))))
+(rule -1 (lower (has_type ty (splat x @ (value_type in_ty))))
       (if (ty_int_bool_ref_scalar_64 in_ty))
       (vec_dup x (vector_size ty)))
 
-(rule (lower (has_type ty (splat x @ (value_type (ty_scalar_float _)))))
+(rule -2 (lower (has_type ty (splat x @ (value_type (ty_scalar_float _)))))
       (vec_dup_from_fpu x (vector_size ty)))
 
 (rule (lower (has_type ty (splat (bconst (u64_from_bool n)))))
@@ -1996,30 +1998,30 @@
 
 
 ;;;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type (ty_vec128_int ty) (snarrow x y)))
+(rule 1 (lower (has_type (ty_vec128_int ty) (snarrow x y)))
       (if (zero_value y))
       (sqxtn x (lane_size ty)))
 
-(rule (lower (has_type (ty_vec64_int ty) (snarrow x y)))
+(rule 2 (lower (has_type (ty_vec64_int ty) (snarrow x y)))
       (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
             (sqxtn dst (lane_size ty))))
 
-(rule (lower (has_type (ty_vec128_int ty) (snarrow x y)))
+(rule 0 (lower (has_type (ty_vec128_int ty) (snarrow x y)))
       (let ((low_half Reg (sqxtn x (lane_size ty)))
             (result Reg (sqxtn2 low_half y (lane_size ty))))
         result))
 
 
 ;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type (ty_vec128_int ty) (unarrow x y)))
+(rule 1 (lower (has_type (ty_vec128_int ty) (unarrow x y)))
       (if (zero_value y))
       (sqxtun x (lane_size ty)))
 
-(rule (lower (has_type (ty_vec64_int ty) (unarrow x y)))
+(rule 2 (lower (has_type (ty_vec64_int ty) (unarrow x y)))
       (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
             (sqxtun dst (lane_size ty))))
 
-(rule (lower (has_type (ty_vec128_int ty) (unarrow x y)))
+(rule 0 (lower (has_type (ty_vec128_int ty) (unarrow x y)))
       (let ((low_half Reg (sqxtun x (lane_size ty)))
             (result Reg (sqxtun2 low_half y (lane_size ty))))
         result))
@@ -2027,15 +2029,15 @@
 
 ;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type (ty_vec128_int ty) (uunarrow x y)))
+(rule 1 (lower (has_type (ty_vec128_int ty) (uunarrow x y)))
       (if (zero_value y))
       (uqxtn x (lane_size ty)))
 
-(rule (lower (has_type (ty_vec64_int ty) (uunarrow x y)))
+(rule 2 (lower (has_type (ty_vec64_int ty) (uunarrow x y)))
       (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
             (uqxtn dst (lane_size ty))))
 
-(rule (lower (has_type (ty_vec128_int ty) (uunarrow x y)))
+(rule 0 (lower (has_type (ty_vec128_int ty) (uunarrow x y)))
       (let ((low_half Reg (uqxtn x (lane_size ty)))
             (result Reg (uqxtn2 low_half y (lane_size ty))))
         result))
@@ -2047,7 +2049,7 @@
 
 ;;;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type (ty_vec128 ty) (swiden_high x)))
+(rule 1 (lower (has_type (ty_vec128 ty) (swiden_high x)))
       (vec_extend (VecExtendOp.Sxtl) x $true (lane_size ty)))
 
 (rule (lower (has_type ty (swiden_high x)))
@@ -2062,7 +2064,7 @@
 
 ;;;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type (ty_vec128 ty) (uwiden_high x)))
+(rule 1 (lower (has_type (ty_vec128 ty) (uwiden_high x)))
       (vec_extend (VecExtendOp.Uxtl) x $true (lane_size ty)))
 
 (rule (lower (has_type ty (uwiden_high x)))
@@ -2167,19 +2169,19 @@
 (rule (lower
        (has_type $I128 (load flags address offset)))
       (aarch64_loadp64 (pair_amode address offset) flags))
-(rule (lower
+(rule -1 (lower
        (has_type (ty_vec64 _)
                         (load flags address offset)))
       (aarch64_fpuload128 (amode $F64 address offset) flags))
-(rule (lower
+(rule -3 (lower
        (has_type (ty_vec128 _)
                         (load flags address offset)))
       (aarch64_fpuload128 (amode $I8X16 address offset) flags))
-(rule (lower
+(rule -2 (lower
        (has_type (ty_dyn_vec64 _)
                         (load flags address offset)))
       (aarch64_fpuload64 (amode $F64 address offset) flags))
-(rule (lower
+(rule -4 (lower
        (has_type (ty_dyn_vec128 _)
                         (load flags address offset)))
       (aarch64_fpuload128 (amode $I8X16 address offset) flags))
@@ -2292,19 +2294,19 @@
                          (value_regs_get value 0)
                          (value_regs_get value 1))))
 
-(rule (lower
+(rule -1 (lower
        (store flags value @ (value_type (ty_vec64 _)) address offset))
       (side_effect
        (aarch64_fpustore64 (amode $F64 address offset) flags value)))
-(rule (lower
+(rule -3 (lower
        (store flags value @ (value_type (ty_vec128 _)) address offset))
       (side_effect
        (aarch64_fpustore128 (amode $I8X16 address offset) flags value)))
-(rule (lower
+(rule -2 (lower
        (store flags value @ (value_type (ty_dyn_vec64 _)) address offset))
       (side_effect
        (aarch64_fpustore64 (amode $F64 address offset) flags value)))
-(rule (lower
+(rule -4 (lower
        (store flags value @ (value_type (ty_dyn_vec128 _)) address offset))
       (side_effect
        (aarch64_fpustore128 (amode $I8X16 address offset) flags value)))
@@ -2320,25 +2322,25 @@
 ;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ; SIMD&FP <=> SIMD&FP
-(rule (lower (has_type (ty_float_or_vec out_ty) (bitcast x @ (value_type (ty_float_or_vec _)))))
+(rule 5 (lower (has_type (ty_float_or_vec out_ty) (bitcast x @ (value_type (ty_float_or_vec _)))))
       (fpu_move out_ty x))
 
 ; GPR => SIMD&FP
-(rule (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
+(rule 4 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
       (if (ty_int_bool_ref_scalar_64 in_ty))
       (mov_to_fpu x (scalar_size in_ty)))
 
 ; SIMD&FP => GPR
-(rule (lower (has_type out_ty (bitcast x @ (value_type (fits_in_64 (ty_float_or_vec _))))))
+(rule 3 (lower (has_type out_ty (bitcast x @ (value_type (fits_in_64 (ty_float_or_vec _))))))
       (if (ty_int_bool_ref_scalar_64 out_ty))
       (mov_from_vec x 0 (scalar_size out_ty)))
 
 ; GPR <=> GPR
-(rule (lower (has_type out_ty (bitcast x @ (value_type in_ty))))
+(rule 2 (lower (has_type out_ty (bitcast x @ (value_type in_ty))))
       (if (ty_int_bool_ref_scalar_64 out_ty))
       (if (ty_int_bool_ref_scalar_64 in_ty))
       x)
-(rule (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)
+(rule 1 (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)
 
 ;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
@@ -2349,22 +2351,22 @@
 
 ;; extractlane with lane 0 can pass through the value unchanged; upper
 ;; bits are undefined when a narrower type is in a wider register.
-(rule (lower (has_type (ty_scalar_float _) (extractlane val (u8_from_uimm8 0))))
+(rule 2 (lower (has_type (ty_scalar_float _) (extractlane val (u8_from_uimm8 0))))
       val)
 
-(rule (lower (has_type (ty_int_bool ty)
+(rule 0 (lower (has_type (ty_int_bool ty)
                        (extractlane val
                                     (u8_from_uimm8 lane))))
       (mov_from_vec val lane (scalar_size ty)))
 
-(rule (lower (has_type (ty_scalar_float ty)
+(rule 1 (lower (has_type (ty_scalar_float ty)
                        (extractlane val @ (value_type vty)
                                     (u8_from_uimm8 lane))))
       (fpu_move_from_vec val lane (vector_size vty)))
 
 ;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (insertlane vec @ (value_type vty)
+(rule 1 (lower (insertlane vec @ (value_type vty)
                          val @ (value_type (ty_int_bool _))
                          (u8_from_uimm8 lane)))
       (mov_to_vec vec val lane (vector_size vty)))
@@ -2580,7 +2582,7 @@
                                 (cond_br taken not_taken
                                  (cond_br_cond cond))))))
 ;; standard `brz`
-(rule (lower_branch (brz c @ (value_type $I128) _ _) targets)
+(rule -1 (lower_branch (brz c @ (value_type $I128) _ _) targets)
       (let ((flags ProducesFlags (flags_to_producesflags c))
             (c ValueRegs (put_in_regs c))
             (c_lo Reg (value_regs_get c 0))
@@ -2591,7 +2593,7 @@
        (side_effect
         (with_flags_side_effect flags
          (cond_br taken not_taken (cond_br_zero rt))))))
-(rule (lower_branch (brz c @ (value_type ty) _ _) targets)
+(rule -2 (lower_branch (brz c @ (value_type ty) _ _) targets)
       (if (ty_int_bool_ref_scalar_64 ty))
       (let ((flags ProducesFlags (flags_to_producesflags c))
             (rt Reg (put_in_reg_zext64 c))
@@ -2601,7 +2603,7 @@
         (with_flags_side_effect flags
          (cond_br taken not_taken (cond_br_zero rt))))))
 ;; standard `brnz`
-(rule (lower_branch (brnz c @ (value_type $I128) _ _) targets)
+(rule -1 (lower_branch (brnz c @ (value_type $I128) _ _) targets)
       (let ((flags ProducesFlags (flags_to_producesflags c))
             (c ValueRegs (put_in_regs c))
             (c_lo Reg (value_regs_get c 0))
@@ -2612,7 +2614,7 @@
        (side_effect
         (with_flags_side_effect flags
          (cond_br taken not_taken (cond_br_not_zero rt))))))
-(rule (lower_branch (brnz c @ (value_type ty) _ _) targets)
+(rule -2 (lower_branch (brnz c @ (value_type ty) _ _) targets)
       (if (ty_int_bool_ref_scalar_64 ty))
       (let ((flags ProducesFlags (flags_to_producesflags c))
             (rt Reg (put_in_reg_zext64 c))
diff --git a/cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle b/cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle
index 71d1472e72..e97c398d2f 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle
@@ -1,90 +1,90 @@
 
 ;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty @ (dynamic_lane _ _) (iadd x y)))
+(rule -4 (lower (has_type ty @ (dynamic_lane _ _) (iadd x y)))
       (value_reg (add_vec (put_in_reg x) (put_in_reg y) (vector_size ty))))
 
 ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty @ (dynamic_lane _ _) (isub x y)))
+(rule -5 (lower (has_type ty @ (dynamic_lane _ _) (isub x y)))
       (value_reg (sub_vec (put_in_reg x) (put_in_reg y) (vector_size ty))))
 
 ;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type (lane_fits_in_32 ty @ (dynamic_lane _ _)) (imul x y)))
+(rule -4 (lower (has_type (lane_fits_in_32 ty @ (dynamic_lane _ _)) (imul x y)))
       (value_reg (vec_rrr (VecALUOp.Mul) (put_in_reg x) (put_in_reg y) (vector_size ty))))
 
 ;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty @ (dynamic_lane _ _) (fadd x y)))
+(rule -2 (lower (has_type ty @ (dynamic_lane _ _) (fadd x y)))
       (value_reg (vec_rrr (VecALUOp.Fadd) (put_in_reg x) (put_in_reg y) (vector_size ty))))
 
 ;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty @ (dynamic_lane _ _) (fsub x y)))
+(rule -2 (lower (has_type ty @ (dynamic_lane _ _) (fsub x y)))
       (value_reg (vec_rrr (VecALUOp.Fsub) (put_in_reg x) (put_in_reg y) (vector_size ty))))
 
 ;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty @ (dynamic_lane _ _) (fmul x y)))
+(rule -2 (lower (has_type ty @ (dynamic_lane _ _) (fmul x y)))
       (value_reg (vec_rrr (VecALUOp.Fmul) (put_in_reg x) (put_in_reg y) (vector_size ty))))
 
 ;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty @ (dynamic_lane _ _) (fdiv x y)))
+(rule -2 (lower (has_type ty @ (dynamic_lane _ _) (fdiv x y)))
       (value_reg (vec_rrr (VecALUOp.Fdiv) (put_in_reg x) (put_in_reg y) (vector_size ty))))
 
 ;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty @ (dynamic_lane _ _) (fmin x y)))
+(rule -2 (lower (has_type ty @ (dynamic_lane _ _) (fmin x y)))
       (value_reg (vec_rrr (VecALUOp.Fmin) (put_in_reg x) (put_in_reg y) (vector_size ty))))
 
 ;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty @ (dynamic_lane _ _) (fmax x y)))
+(rule -2 (lower (has_type ty @ (dynamic_lane _ _) (fmax x y)))
       (value_reg (vec_rrr (VecALUOp.Fmax) (put_in_reg x) (put_in_reg y) (vector_size ty))))
 
 ;;;; Rules for `fmin_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty @ (dynamic_lane _ _) (fmin_pseudo x y)))
+(rule -2 (lower (has_type ty @ (dynamic_lane _ _) (fmin_pseudo x y)))
       (value_reg (bsl ty
                   (vec_rrr (VecALUOp.Fcmgt) (put_in_reg x) (put_in_reg y)
                    (vector_size ty)) (put_in_reg y) (put_in_reg x))))
 
 ;;;; Rules for `fmax_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty @ (dynamic_lane _ _) (fmax_pseudo x y)))
+(rule -2 (lower (has_type ty @ (dynamic_lane _ _) (fmax_pseudo x y)))
       (value_reg (bsl ty
                   (vec_rrr (VecALUOp.Fcmgt) (put_in_reg y) (put_in_reg x)
                    (vector_size ty)) (put_in_reg y) (put_in_reg x))))
 
 ;;;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type (ty_dyn128_int ty) (snarrow x y)))
+(rule -2 (lower (has_type (ty_dyn128_int ty) (snarrow x y)))
       (if-let _ (zero_value y))
       (sqxtn x (lane_size ty)))
 
-(rule (lower (has_type (ty_dyn64_int ty) (snarrow x y)))
+(rule -1 (lower (has_type (ty_dyn64_int ty) (snarrow x y)))
       (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
             (sqxtn dst (lane_size ty))))
 
-(rule (lower (has_type (ty_dyn128_int ty) (snarrow x y)))
+(rule -3 (lower (has_type (ty_dyn128_int ty) (snarrow x y)))
       (let ((low_half Reg (sqxtn x (lane_size ty)))
             (result Reg (sqxtn2 low_half y (lane_size ty))))
         result))
 
 ;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type (ty_dyn128_int ty) (unarrow x y)))
+(rule -2 (lower (has_type (ty_dyn128_int ty) (unarrow x y)))
       (if-let _ (zero_value y))
       (sqxtun x (lane_size ty)))
 
-(rule (lower (has_type (ty_dyn64_int ty) (unarrow x y)))
+(rule -1 (lower (has_type (ty_dyn64_int ty) (unarrow x y)))
       (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
             (sqxtun dst (lane_size ty))))
 
-(rule (lower (has_type (ty_dyn128_int ty) (unarrow x y)))
+(rule -3 (lower (has_type (ty_dyn128_int ty) (unarrow x y)))
       (let ((low_half Reg (sqxtun x (lane_size ty)))
             (result Reg (sqxtun2 low_half y (lane_size ty))))
         result))
 
 ;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type (ty_dyn128_int ty) (uunarrow x y)))
+(rule -2 (lower (has_type (ty_dyn128_int ty) (uunarrow x y)))
       (if-let _ (zero_value y))
       (uqxtn x (lane_size ty)))
 
-(rule (lower (has_type (ty_dyn64_int ty) (uunarrow x y)))
+(rule -1 (lower (has_type (ty_dyn64_int ty) (uunarrow x y)))
       (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
             (uqxtn dst (lane_size ty))))
 
-(rule (lower (has_type (ty_dyn128_int ty) (uunarrow x y)))
+(rule -3 (lower (has_type (ty_dyn128_int ty) (uunarrow x y)))
       (let ((low_half Reg (uqxtn x (lane_size ty)))
             (result Reg (uqxtn2 low_half y (lane_size ty))))
         result))
@@ -101,20 +101,20 @@
 
 ;;;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty (swiden_low x)))
+(rule -1 (lower (has_type ty (swiden_low x)))
       (value_reg (vec_extend (VecExtendOp.Sxtl) x $false (lane_size ty))))
 
 ;;;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty (swiden_high x)))
+(rule -1 (lower (has_type ty (swiden_high x)))
       (value_reg (vec_extend (VecExtendOp.Sxtl) x $true (lane_size ty))))
 
 ;;;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty (uwiden_low x)))
+(rule -1 (lower (has_type ty (uwiden_low x)))
       (value_reg (vec_extend (VecExtendOp.Uxtl) x $false (lane_size ty))))
 
 ;;;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (has_type ty (uwiden_high x)))
+(rule -1 (lower (has_type ty (uwiden_high x)))
       (value_reg (vec_extend (VecExtendOp.Uxtl) x $true (lane_size ty))))