[AArch64] Port SIMD narrowing to ISLE (#4478)

* [AArch64] Port SIMD narrowing to ISLE Fvdemote, snarrow, unarrow and uunarrow. Also refactor the aarch64 instructions descriptions to parameterize on ScalarSize instead of using different opcodes. The zero_value pure constructor has been introduced and used by the integer narrow operations and it replaces, and extends, the compare zero patterns. Copright (c) 2022, Arm Limited. * use short 'if' patterns
2022-07-25 20:40:36 +01:00
parent dd40bf075a
commit c5ddb4b803
15 changed files with 1340 additions and 337 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -531,7 +531,8 @@
        (op VecRRNarrowOp)
        (rd WritableReg)
        (rn Reg)
-        (high_half bool))
+        (high_half bool)
+        (lane_size ScalarSize))

       ;; 1-operand vector instruction that operates on a pair of elements.
       (VecRRPair
@@ -905,6 +906,17 @@
 (rule (scalar_size $F32) (ScalarSize.Size32))
 (rule (scalar_size $F64) (ScalarSize.Size64))

+;; Helper for calculating the `ScalarSize` lane type from vector type
+(decl lane_size (Type) ScalarSize)
+(rule (lane_size (multi_lane 8 _)) (ScalarSize.Size8))
+(rule (lane_size (multi_lane 16 _)) (ScalarSize.Size16))
+(rule (lane_size (multi_lane 32 _)) (ScalarSize.Size32))
+(rule (lane_size (multi_lane 64 _)) (ScalarSize.Size64))
+(rule (lane_size (dynamic_lane 8 _)) (ScalarSize.Size8))
+(rule (lane_size (dynamic_lane 16 _)) (ScalarSize.Size16))
+(rule (lane_size (dynamic_lane 32 _)) (ScalarSize.Size32))
+(rule (lane_size (dynamic_lane 64 _)) (ScalarSize.Size64))
+
 (type Cond extern
  (enum
    (Eq)
@@ -936,17 +948,6 @@
    (Size64x2)
 ))

-(type DynamicVectorSize extern
-  (enum
-    (Size8x8xN)
-    (Size8x16xN)
-    (Size16x4xN)
-    (Size16x8xN)
-    (Size32x2xN)
-    (Size32x4xN)
-    (Size64x2xN)
-))
-
 ;; Helper for calculating the `VectorSize` corresponding to a type
 (decl vector_size (Type) VectorSize)
 (rule (vector_size (multi_lane 8 8)) (VectorSize.Size8x8))
@@ -1203,34 +1204,16 @@
 ;; A vector narrowing operation with one argument.
 (type VecRRNarrowOp
  (enum
-    ;; Extract narrow, 16-bit elements
-    (Xtn16)
-    ;; Extract narrow, 32-bit elements
-    (Xtn32)
-    ;; Extract narrow, 64-bit elements
-    (Xtn64)
-    ;; Signed saturating extract narrow, 16-bit elements
-    (Sqxtn16)
-    ;; Signed saturating extract narrow, 32-bit elements
-    (Sqxtn32)
-    ;; Signed saturating extract narrow, 64-bit elements
-    (Sqxtn64)
-    ;; Signed saturating extract unsigned narrow, 16-bit elements
-    (Sqxtun16)
-    ;; Signed saturating extract unsigned narrow, 32-bit elements
-    (Sqxtun32)
-    ;; Signed saturating extract unsigned narrow, 64-bit elements
-    (Sqxtun64)
-    ;; Unsigned saturating extract narrow, 16-bit elements
-    (Uqxtn16)
-    ;; Unsigned saturating extract narrow, 32-bit elements
-    (Uqxtn32)
-    ;; Unsigned saturating extract narrow, 64-bit elements
-    (Uqxtn64)
-    ;; Floating-point convert to lower precision narrow, 32-bit elements
-    (Fcvtn32)
-    ;; Floating-point convert to lower precision narrow, 64-bit elements
-    (Fcvtn64)
+    ;; Extract narrow.
+    (Xtn)
+    ;; Signed saturating extract narrow.
+    (Sqxtn)
+    ;; Signed saturating extract unsigned narrow.
+    (Sqxtun)
+    ;; Unsigned saturating extract narrow.
+    (Uqxtn)
+    ;; Floating-point convert to lower precision narrow.
+    (Fcvtn)
 ))

 (type VecRRRLongOp
@@ -1623,10 +1606,19 @@
        dst))

 ;; Helper for emitting `MInst.VecRRNarrow` instructions.
-(decl vec_rr_narrow (VecRRNarrowOp Reg bool) Reg)
-(rule (vec_rr_narrow op src high_half)
+(decl vec_rr_narrow (VecRRNarrowOp Reg ScalarSize) Reg)
+(rule (vec_rr_narrow op src size)
      (let ((dst WritableReg (temp_writable_reg $I8X16))
-            (_ Unit (emit (MInst.VecRRNarrow op dst src high_half))))
+            (_ Unit (emit (MInst.VecRRNarrow op dst src $false size))))
+        dst))
+
+;; Helper for emitting `MInst.VecRRNarrow` instructions which update the
+;; high half of the destination register.
+(decl vec_rr_narrow_high (VecRRNarrowOp Reg Reg ScalarSize) Reg)
+(rule (vec_rr_narrow_high op mod src size)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_1 Unit (emit (MInst.FpuMove128 dst mod)))
+            (_2 Unit (emit (MInst.VecRRNarrow op dst src $true size))))
        dst))

 ;; Helper for emitting `MInst.VecRRLong` instructions.
@@ -1673,6 +1665,14 @@
            (_2 Unit (emit (MInst.MovToVec dst src2 lane size))))
        dst))

+;; Helper for emitting `MInst.VecMovElement` instructions.
+(decl mov_vec_elem (Reg Reg u8 u8 VectorSize) Reg)
+(rule (mov_vec_elem src1 src2 dst_idx src_idx size)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_1 Unit (emit (MInst.FpuMove128 dst src1)))
+            (_2 Unit (emit (MInst.VecMovElement dst src2 dst_idx src_idx size))))
+        dst))
+
 ;; Helper for emitting `MInst.MovFromVec` instructions.
 (decl mov_from_vec (Reg u8 VectorSize) Reg)
 (rule (mov_from_vec rn idx size)
@@ -1830,9 +1830,37 @@
 (decl rev64 (Reg VectorSize) Reg)
 (rule (rev64 x size) (vec_misc (VecMisc2.Rev64) x size))

-;; Helper for generating `xtn64` instructions.
-(decl xtn64 (Reg bool) Reg)
-(rule (xtn64 x high_half) (vec_rr_narrow (VecRRNarrowOp.Xtn64) x high_half))
+;; Helper for generating `xtn` instructions.
+(decl xtn (Reg ScalarSize) Reg)
+(rule (xtn x size) (vec_rr_narrow (VecRRNarrowOp.Xtn) x size))
+
+;; Helper for generating `fcvtn` instructions.
+(decl fcvtn (Reg ScalarSize) Reg)
+(rule (fcvtn x size) (vec_rr_narrow (VecRRNarrowOp.Fcvtn) x size))
+
+;; Helper for generating `sqxtn` instructions.
+(decl sqxtn (Reg ScalarSize) Reg)
+(rule (sqxtn x size) (vec_rr_narrow (VecRRNarrowOp.Sqxtn) x size))
+
+;; Helper for generating `sqxtn2` instructions.
+(decl sqxtn2 (Reg Reg ScalarSize) Reg)
+(rule (sqxtn2 x y size) (vec_rr_narrow_high (VecRRNarrowOp.Sqxtn) x y size))
+
+;; Helper for generating `sqxtun` instructions.
+(decl sqxtun (Reg ScalarSize) Reg)
+(rule (sqxtun x size) (vec_rr_narrow (VecRRNarrowOp.Sqxtun) x size))
+
+;; Helper for generating `sqxtun2` instructions.
+(decl sqxtun2 (Reg Reg ScalarSize) Reg)
+(rule (sqxtun2 x y size) (vec_rr_narrow_high (VecRRNarrowOp.Sqxtun) x y size))
+
+;; Helper for generating `uqxtn` instructions.
+(decl uqxtn (Reg ScalarSize) Reg)
+(rule (uqxtn x size) (vec_rr_narrow (VecRRNarrowOp.Uqxtn) x size))
+
+;; Helper for generating `uqxtn2` instructions.
+(decl uqxtn2 (Reg Reg ScalarSize) Reg)
+(rule (uqxtn2 x y size) (vec_rr_narrow_high (VecRRNarrowOp.Uqxtn) x y size))

 ;; Helper for generating `addp` instructions.
 (decl addp (Reg Reg VectorSize) Reg)
@@ -2202,16 +2230,6 @@
          (alu_rrr op ty x_lo y_lo)
          (alu_rrr op ty x_hi y_hi))))

-;; Float vector compare helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Match 32 bit float 0 value
-(decl zero_value_f32 (Ieee32) Ieee32)
-(extern extractor zero_value_f32 zero_value_f32)
-
-;; Match 64 bit float 0 value
-(decl zero_value_f64 (Ieee64) Ieee64)
-(extern extractor zero_value_f64 zero_value_f64)
-
 ;; Generate comparison to zero operator from input condition code
 (decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
 (extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
@@ -2242,12 +2260,6 @@
 (rule (fcmeq0 rn size)
      (vec_misc (VecMisc2.Fcmeq0) rn size))

-;; Int vector compare helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Match integer 0 value
-(decl zero_value (Imm64) Imm64)
-(extern extractor zero_value zero_value)
-
 ;; Generate comparison to zero operator from input condition code
 (decl int_cc_cmp_zero_to_vec_misc_op (IntCC) VecMisc2)
 (extern constructor int_cc_cmp_zero_to_vec_misc_op int_cc_cmp_zero_to_vec_misc_op)
--- a/cranelift/codegen/src/isa/aarch64/inst/args.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs
@@ -643,6 +643,16 @@ impl ScalarSize {
            _ => panic!("Unexpected scalar FP operand size: {:?}", self),
        }
    }
+
+    pub fn widen(&self) -> ScalarSize {
+        match self {
+            ScalarSize::Size8 => ScalarSize::Size16,
+            ScalarSize::Size16 => ScalarSize::Size32,
+            ScalarSize::Size32 => ScalarSize::Size64,
+            ScalarSize::Size64 => ScalarSize::Size128,
+            ScalarSize::Size128 => panic!("can't widen 128-bits"),
+        }
+    }
 }

 /// Type used to communicate the size of a vector operand.
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -2252,15 +2252,17 @@ impl MachInstEmit for Inst {
            &Inst::VecDup { rd, rn, size } => {
                let rd = allocs.next_writable(rd);
                let rn = allocs.next(rn);
-                let imm5 = match size {
-                    VectorSize::Size8x16 => 0b00001,
-                    VectorSize::Size16x8 => 0b00010,
-                    VectorSize::Size32x4 => 0b00100,
-                    VectorSize::Size64x2 => 0b01000,
+                let q = size.is_128bits() as u32;
+                let imm5 = match size.lane_size() {
+                    ScalarSize::Size8 => 0b00001,
+                    ScalarSize::Size16 => 0b00010,
+                    ScalarSize::Size32 => 0b00100,
+                    ScalarSize::Size64 => 0b01000,
                    _ => unimplemented!("Unexpected VectorSize: {:?}", size),
                };
                sink.put4(
-                    0b010_01110000_00000_000011_00000_00000
+                    0b000_01110000_00000_000011_00000_00000
+                        | (q << 30)
                        | (imm5 << 16)
                        | (machreg_to_gpr(rn) << 5)
                        | machreg_to_vec(rd.to_reg()),
@@ -2395,24 +2397,30 @@ impl MachInstEmit for Inst {
                rd,
                rn,
                high_half,
+                lane_size,
            } => {
                let rn = allocs.next(rn);
                let rd = allocs.next_writable(rd);
-                let (u, size, bits_12_16) = match op {
-                    VecRRNarrowOp::Xtn16 => (0b0, 0b00, 0b10010),
-                    VecRRNarrowOp::Xtn32 => (0b0, 0b01, 0b10010),
-                    VecRRNarrowOp::Xtn64 => (0b0, 0b10, 0b10010),
-                    VecRRNarrowOp::Sqxtn16 => (0b0, 0b00, 0b10100),
-                    VecRRNarrowOp::Sqxtn32 => (0b0, 0b01, 0b10100),
-                    VecRRNarrowOp::Sqxtn64 => (0b0, 0b10, 0b10100),
-                    VecRRNarrowOp::Sqxtun16 => (0b1, 0b00, 0b10010),
-                    VecRRNarrowOp::Sqxtun32 => (0b1, 0b01, 0b10010),
-                    VecRRNarrowOp::Sqxtun64 => (0b1, 0b10, 0b10010),
-                    VecRRNarrowOp::Uqxtn16 => (0b1, 0b00, 0b10100),
-                    VecRRNarrowOp::Uqxtn32 => (0b1, 0b01, 0b10100),
-                    VecRRNarrowOp::Uqxtn64 => (0b1, 0b10, 0b10100),
-                    VecRRNarrowOp::Fcvtn32 => (0b0, 0b00, 0b10110),
-                    VecRRNarrowOp::Fcvtn64 => (0b0, 0b01, 0b10110),
+
+                let size = match lane_size {
+                    ScalarSize::Size8 => 0b00,
+                    ScalarSize::Size16 => 0b01,
+                    ScalarSize::Size32 => 0b10,
+                    _ => panic!("unsupported size: {:?}", lane_size),
+                };
+
+                // Floats use a single bit, to encode either half or single.
+                let size = match op {
+                    VecRRNarrowOp::Fcvtn => size >> 1,
+                    _ => size,
+                };
+
+                let (u, bits_12_16) = match op {
+                    VecRRNarrowOp::Xtn => (0b0, 0b10010),
+                    VecRRNarrowOp::Sqxtn => (0b0, 0b10100),
+                    VecRRNarrowOp::Sqxtun => (0b1, 0b10010),
+                    VecRRNarrowOp::Uqxtn => (0b1, 0b10100),
+                    VecRRNarrowOp::Fcvtn => (0b0, 0b10110),
                };

                sink.put4(enc_vec_rr_misc(
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -2338,6 +2338,15 @@ fn test_aarch64_binemit() {
        "1B423BD5",
        "mrs x27, nzcv",
    ));
+    insns.push((
+        Inst::VecDup {
+            rd: writable_vreg(24),
+            rn: xreg(8),
+            size: VectorSize::Size8x8,
+        },
+        "180D010E",
+        "dup v24.8b, w8",
+    ));
    insns.push((
        Inst::VecDup {
            rd: writable_vreg(25),
@@ -2347,6 +2356,15 @@ fn test_aarch64_binemit() {
        "F90C014E",
        "dup v25.16b, w7",
    ));
+    insns.push((
+        Inst::VecDup {
+            rd: writable_vreg(1),
+            rn: xreg(22),
+            size: VectorSize::Size16x4,
+        },
+        "C10E020E",
+        "dup v1.4h, w22",
+    ));
    insns.push((
        Inst::VecDup {
            rd: writable_vreg(2),
@@ -2356,6 +2374,15 @@ fn test_aarch64_binemit() {
        "E20E024E",
        "dup v2.8h, w23",
    ));
+    insns.push((
+        Inst::VecDup {
+            rd: writable_vreg(30),
+            rn: xreg(28),
+            size: VectorSize::Size32x2,
+        },
+        "9E0F040E",
+        "dup v30.2s, w28",
+    ));
    insns.push((
        Inst::VecDup {
            rd: writable_vreg(0),
@@ -2652,10 +2679,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Xtn16,
+            op: VecRRNarrowOp::Xtn,
            rd: writable_vreg(25),
            rn: vreg(17),
            high_half: false,
+            lane_size: ScalarSize::Size8,
        },
        "392A210E",
        "xtn v25.8b, v17.8h",
@@ -2663,10 +2691,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Xtn32,
+            op: VecRRNarrowOp::Xtn,
            rd: writable_vreg(3),
            rn: vreg(10),
            high_half: true,
+            lane_size: ScalarSize::Size16,
        },
        "4329614E",
        "xtn2 v3.8h, v10.4s",
@@ -2674,10 +2703,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Xtn64,
+            op: VecRRNarrowOp::Xtn,
            rd: writable_vreg(22),
            rn: vreg(8),
            high_half: false,
+            lane_size: ScalarSize::Size32,
        },
        "1629A10E",
        "xtn v22.2s, v8.2d",
@@ -2685,10 +2715,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Sqxtn16,
+            op: VecRRNarrowOp::Sqxtn,
            rd: writable_vreg(7),
            rn: vreg(22),
            high_half: true,
+            lane_size: ScalarSize::Size8,
        },
        "C74A214E",
        "sqxtn2 v7.16b, v22.8h",
@@ -2696,10 +2727,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Sqxtn32,
+            op: VecRRNarrowOp::Sqxtn,
            rd: writable_vreg(31),
            rn: vreg(0),
            high_half: true,
+            lane_size: ScalarSize::Size16,
        },
        "1F48614E",
        "sqxtn2 v31.8h, v0.4s",
@@ -2707,10 +2739,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Sqxtn64,
+            op: VecRRNarrowOp::Sqxtn,
            rd: writable_vreg(14),
            rn: vreg(20),
            high_half: false,
+            lane_size: ScalarSize::Size32,
        },
        "8E4AA10E",
        "sqxtn v14.2s, v20.2d",
@@ -2718,10 +2751,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Sqxtun16,
+            op: VecRRNarrowOp::Sqxtun,
            rd: writable_vreg(16),
            rn: vreg(23),
            high_half: false,
+            lane_size: ScalarSize::Size8,
        },
        "F02A212E",
        "sqxtun v16.8b, v23.8h",
@@ -2729,10 +2763,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Sqxtun32,
+            op: VecRRNarrowOp::Sqxtun,
            rd: writable_vreg(28),
            rn: vreg(9),
            high_half: true,
+            lane_size: ScalarSize::Size16,
        },
        "3C29616E",
        "sqxtun2 v28.8h, v9.4s",
@@ -2740,10 +2775,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Sqxtun64,
+            op: VecRRNarrowOp::Sqxtun,
            rd: writable_vreg(15),
            rn: vreg(15),
            high_half: false,
+            lane_size: ScalarSize::Size32,
        },
        "EF29A12E",
        "sqxtun v15.2s, v15.2d",
@@ -2751,10 +2787,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Uqxtn16,
+            op: VecRRNarrowOp::Uqxtn,
            rd: writable_vreg(21),
            rn: vreg(4),
            high_half: true,
+            lane_size: ScalarSize::Size8,
        },
        "9548216E",
        "uqxtn2 v21.16b, v4.8h",
@@ -2762,10 +2799,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Uqxtn32,
+            op: VecRRNarrowOp::Uqxtn,
            rd: writable_vreg(31),
            rn: vreg(31),
            high_half: false,
+            lane_size: ScalarSize::Size16,
        },
        "FF4B612E",
        "uqxtn v31.4h, v31.4s",
@@ -2773,10 +2811,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Uqxtn64,
+            op: VecRRNarrowOp::Uqxtn,
            rd: writable_vreg(11),
            rn: vreg(12),
            high_half: true,
+            lane_size: ScalarSize::Size32,
        },
        "8B49A16E",
        "uqxtn2 v11.4s, v12.2d",
@@ -2784,10 +2823,11 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Fcvtn32,
+            op: VecRRNarrowOp::Fcvtn,
            rd: writable_vreg(0),
            rn: vreg(0),
            high_half: false,
+            lane_size: ScalarSize::Size16,
        },
        "0068210E",
        "fcvtn v0.4h, v0.4s",
@@ -2795,10 +2835,23 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::VecRRNarrow {
-            op: VecRRNarrowOp::Fcvtn64,
+            op: VecRRNarrowOp::Fcvtn,
+            rd: writable_vreg(2),
+            rn: vreg(7),
+            high_half: false,
+            lane_size: ScalarSize::Size32,
+        },
+        "E268610E",
+        "fcvtn v2.2s, v7.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRNarrow {
+            op: VecRRNarrowOp::Fcvtn,
            rd: writable_vreg(31),
            rn: vreg(30),
            high_half: true,
+            lane_size: ScalarSize::Size32,
        },
        "DF6B614E",
        "fcvtn2 v31.4s, v30.2d",
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -2124,94 +2124,24 @@ impl Inst {
                rd,
                rn,
                high_half,
+                lane_size,
            } => {
-                let (op, rd_size, size) = match (op, high_half) {
-                    (VecRRNarrowOp::Xtn16, false) => {
-                        ("xtn", VectorSize::Size8x8, VectorSize::Size16x8)
-                    }
-                    (VecRRNarrowOp::Xtn16, true) => {
-                        ("xtn2", VectorSize::Size8x16, VectorSize::Size16x8)
-                    }
-                    (VecRRNarrowOp::Xtn32, false) => {
-                        ("xtn", VectorSize::Size16x4, VectorSize::Size32x4)
-                    }
-                    (VecRRNarrowOp::Xtn32, true) => {
-                        ("xtn2", VectorSize::Size16x8, VectorSize::Size32x4)
-                    }
-                    (VecRRNarrowOp::Xtn64, false) => {
-                        ("xtn", VectorSize::Size32x2, VectorSize::Size64x2)
-                    }
-                    (VecRRNarrowOp::Xtn64, true) => {
-                        ("xtn2", VectorSize::Size32x4, VectorSize::Size64x2)
-                    }
-                    (VecRRNarrowOp::Sqxtn16, false) => {
-                        ("sqxtn", VectorSize::Size8x8, VectorSize::Size16x8)
-                    }
-                    (VecRRNarrowOp::Sqxtn16, true) => {
-                        ("sqxtn2", VectorSize::Size8x16, VectorSize::Size16x8)
-                    }
-                    (VecRRNarrowOp::Sqxtn32, false) => {
-                        ("sqxtn", VectorSize::Size16x4, VectorSize::Size32x4)
-                    }
-                    (VecRRNarrowOp::Sqxtn32, true) => {
-                        ("sqxtn2", VectorSize::Size16x8, VectorSize::Size32x4)
-                    }
-                    (VecRRNarrowOp::Sqxtn64, false) => {
-                        ("sqxtn", VectorSize::Size32x2, VectorSize::Size64x2)
-                    }
-                    (VecRRNarrowOp::Sqxtn64, true) => {
-                        ("sqxtn2", VectorSize::Size32x4, VectorSize::Size64x2)
-                    }
-                    (VecRRNarrowOp::Sqxtun16, false) => {
-                        ("sqxtun", VectorSize::Size8x8, VectorSize::Size16x8)
-                    }
-                    (VecRRNarrowOp::Sqxtun16, true) => {
-                        ("sqxtun2", VectorSize::Size8x16, VectorSize::Size16x8)
-                    }
-                    (VecRRNarrowOp::Sqxtun32, false) => {
-                        ("sqxtun", VectorSize::Size16x4, VectorSize::Size32x4)
-                    }
-                    (VecRRNarrowOp::Sqxtun32, true) => {
-                        ("sqxtun2", VectorSize::Size16x8, VectorSize::Size32x4)
-                    }
-                    (VecRRNarrowOp::Sqxtun64, false) => {
-                        ("sqxtun", VectorSize::Size32x2, VectorSize::Size64x2)
-                    }
-                    (VecRRNarrowOp::Sqxtun64, true) => {
-                        ("sqxtun2", VectorSize::Size32x4, VectorSize::Size64x2)
-                    }
-                    (VecRRNarrowOp::Uqxtn16, false) => {
-                        ("uqxtn", VectorSize::Size8x8, VectorSize::Size16x8)
-                    }
-                    (VecRRNarrowOp::Uqxtn16, true) => {
-                        ("uqxtn2", VectorSize::Size8x16, VectorSize::Size16x8)
-                    }
-                    (VecRRNarrowOp::Uqxtn32, false) => {
-                        ("uqxtn", VectorSize::Size16x4, VectorSize::Size32x4)
-                    }
-                    (VecRRNarrowOp::Uqxtn32, true) => {
-                        ("uqxtn2", VectorSize::Size16x8, VectorSize::Size32x4)
-                    }
-                    (VecRRNarrowOp::Uqxtn64, false) => {
-                        ("uqxtn", VectorSize::Size32x2, VectorSize::Size64x2)
-                    }
-                    (VecRRNarrowOp::Uqxtn64, true) => {
-                        ("uqxtn2", VectorSize::Size32x4, VectorSize::Size64x2)
-                    }
-                    (VecRRNarrowOp::Fcvtn32, false) => {
-                        ("fcvtn", VectorSize::Size16x4, VectorSize::Size32x4)
-                    }
-                    (VecRRNarrowOp::Fcvtn32, true) => {
-                        ("fcvtn2", VectorSize::Size16x8, VectorSize::Size32x4)
-                    }
-                    (VecRRNarrowOp::Fcvtn64, false) => {
-                        ("fcvtn", VectorSize::Size32x2, VectorSize::Size64x2)
-                    }
-                    (VecRRNarrowOp::Fcvtn64, true) => {
-                        ("fcvtn2", VectorSize::Size32x4, VectorSize::Size64x2)
-                    }
+                let vec64 = VectorSize::from_lane_size(lane_size, false);
+                let vec128 = VectorSize::from_lane_size(lane_size, true);
+                let rn_size = VectorSize::from_lane_size(lane_size.widen(), true);
+                let (op, rd_size) = match (op, high_half) {
+                    (VecRRNarrowOp::Xtn, false) => ("xtn", vec64),
+                    (VecRRNarrowOp::Xtn, true) => ("xtn2", vec128),
+                    (VecRRNarrowOp::Sqxtn, false) => ("sqxtn", vec64),
+                    (VecRRNarrowOp::Sqxtn, true) => ("sqxtn2", vec128),
+                    (VecRRNarrowOp::Sqxtun, false) => ("sqxtun", vec64),
+                    (VecRRNarrowOp::Sqxtun, true) => ("sqxtun2", vec128),
+                    (VecRRNarrowOp::Uqxtn, false) => ("uqxtn", vec64),
+                    (VecRRNarrowOp::Uqxtn, true) => ("uqxtn2", vec128),
+                    (VecRRNarrowOp::Fcvtn, false) => ("fcvtn", vec64),
+                    (VecRRNarrowOp::Fcvtn, true) => ("fcvtn2", vec128),
                };
-                let rn = pretty_print_vreg_vector(rn, size, allocs);
+                let rn = pretty_print_vreg_vector(rn, rn_size, allocs);
                let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs);

                format!("{} {}, {}", op, rd, rn)
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -475,7 +475,7 @@

            ;; Extract the low half components of rn.
            ;;   tmp1 = |c|a|
-            (tmp1 Reg (xtn64 rn $false))
+            (tmp1 Reg (xtn rn (ScalarSize.Size32)))

            ;; Sum the respective high half components.
            ;;   rd = |dg+ch|be+af||dg+ch|be+af|
@@ -483,7 +483,7 @@

            ;; Extract the low half components of rm.
            ;;   tmp2 = |g|e|
-            (tmp2 Reg (xtn64 rm $false))
+            (tmp2 Reg (xtn rm (ScalarSize.Size32)))

            ;; Shift the high half components, into the high half.
            ;;   rd = |dg+ch << 32|be+af << 32|
@@ -1450,68 +1450,55 @@
      (value_regs_get src 0))


-;;;; Rules for `fcmp` 32 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f32const (zero_value_f32 y))))))
+(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x y)))
+      (if (zero_value y))
      (let ((rn Reg x)
            (vec_size VectorSize (vector_size ty)))
          (value_reg (not (fcmeq0 rn vec_size) vec_size))))

-(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x (splat (f32const (zero_value_f32 y))))))
+(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x y)))
+      (if (zero_value y))
      (let ((rn Reg x)
            (vec_size VectorSize (vector_size ty)))
          (value_reg (float_cmp_zero cond rn vec_size))))

-(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) (splat (f32const (zero_value_f32 x))) y)))
+(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x y)))
+      (if (zero_value x))
      (let ((rn Reg y)
            (vec_size VectorSize (vector_size ty)))
          (value_reg (not (fcmeq0 rn vec_size) vec_size))))

-(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) (splat (f32const (zero_value_f32 x))) y)))
+(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x y)))
+      (if (zero_value x))
      (let ((rn Reg y)
            (vec_size VectorSize (vector_size ty)))
          (value_reg (float_cmp_zero_swap cond rn vec_size))))

-;;;; Rules for `fcmp` 64 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f64const (zero_value_f64 y))))))
-      (let ((rn Reg x)
-            (vec_size VectorSize (vector_size ty)))
-          (value_reg (not (fcmeq0 rn vec_size) vec_size))))
-
-(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x (splat (f64const (zero_value_f64 y))))))
-      (let ((rn Reg x)
-            (vec_size VectorSize (vector_size ty)))
-          (value_reg (float_cmp_zero cond rn vec_size))))
-
-(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) (splat (f64const (zero_value_f64 x))) y)))
-      (let ((rn Reg y)
-            (vec_size VectorSize (vector_size ty)))
-          (value_reg (not (fcmeq0 rn vec_size) vec_size))))
-
-(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) (splat (f64const (zero_value_f64 x))) y)))
-      (let ((rn Reg y)
-            (vec_size VectorSize (vector_size ty)))
-          (value_reg (float_cmp_zero_swap cond rn vec_size))))

 ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) x (splat (iconst (zero_value y))))))
+(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) x y)))
+      (if (zero_value y))
      (let ((rn Reg x)
            (vec_size VectorSize (vector_size ty)))
          (value_reg (not (cmeq0 rn vec_size) vec_size))))

-(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) x (splat (iconst (zero_value y))))))
+(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) x y)))
+      (if (zero_value y))
      (let ((rn Reg x)
            (vec_size VectorSize (vector_size ty)))
          (value_reg (int_cmp_zero cond rn vec_size))))

-(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) (splat (iconst (zero_value x))) y)))
+(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) x y)))
+      (if (zero_value x))
      (let ((rn Reg y)
            (vec_size VectorSize (vector_size ty)))
          (value_reg (not (cmeq0 rn vec_size) vec_size))))

-(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) (splat (iconst (zero_value x))) y)))
+(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) x y)))
+      (if (zero_value x))
      (let ((rn Reg y)
            (vec_size VectorSize (vector_size ty)))
          (value_reg (int_cmp_zero_swap cond rn vec_size))))
@@ -1624,3 +1611,53 @@
 (rule (lower (and (has_type (valid_atomic_transaction ty)
                  (atomic_cas flags addr src1 src2))))
      (atomic_cas_loop addr src1 src2 ty))
+
+
+;;;; Rules for 'fvdemote' ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (fvdemote x))
+      (fcvtn x (ScalarSize.Size32)))
+
+
+;;;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (ty_vec128_int ty) (snarrow x y)))
+      (if (zero_value y))
+      (sqxtn x (lane_size ty)))
+
+(rule (lower (has_type (ty_vec64_int ty) (snarrow x y)))
+      (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
+            (sqxtn dst (lane_size ty))))
+
+(rule (lower (has_type (ty_vec128_int ty) (snarrow x y)))
+      (let ((low_half Reg (sqxtn x (lane_size ty)))
+            (result Reg (sqxtn2 low_half y (lane_size ty))))
+        result))
+
+
+;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (ty_vec128_int ty) (unarrow x y)))
+      (if (zero_value y))
+      (sqxtun x (lane_size ty)))
+
+(rule (lower (has_type (ty_vec64_int ty) (unarrow x y)))
+      (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
+            (sqxtun dst (lane_size ty))))
+
+(rule (lower (has_type (ty_vec128_int ty) (unarrow x y)))
+      (let ((low_half Reg (sqxtun x (lane_size ty)))
+            (result Reg (sqxtun2 low_half y (lane_size ty))))
+        result))
+
+
+;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (ty_vec128_int ty) (uunarrow x y)))
+      (if (zero_value y))
+      (uqxtn x (lane_size ty)))
+
+(rule (lower (has_type (ty_vec64_int ty) (uunarrow x y)))
+      (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
+            (uqxtn dst (lane_size ty))))
+
+(rule (lower (has_type (ty_vec128_int ty) (uunarrow x y)))
+      (let ((low_half Reg (uqxtn x (lane_size ty)))
+            (result Reg (uqxtn2 low_half y (lane_size ty))))
+        result))
--- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs
@@ -424,25 +424,4 @@ where
            _ => panic!(),
        }
    }
-
-    fn zero_value(&mut self, value: Imm64) -> Option<Imm64> {
-        if value.bits() == 0 {
-            return Some(value);
-        }
-        None
-    }
-
-    fn zero_value_f32(&mut self, value: Ieee32) -> Option<Ieee32> {
-        if value.bits() == 0 {
-            return Some(value);
-        }
-        None
-    }
-
-    fn zero_value_f64(&mut self, value: Ieee64) -> Option<Ieee64> {
-        if value.bits() == 0 {
-            return Some(value);
-        }
-        None
-    }
 }
--- a/cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle
@@ -47,6 +47,48 @@
                  (vec_rrr (VecALUOp.Fcmgt) (put_in_reg y) (put_in_reg x)
                   (vector_size ty)) (put_in_reg y) (put_in_reg x))))

+;;;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (ty_dyn128_int ty) (snarrow x y)))
+      (if-let _ (zero_value y))
+      (sqxtn x (lane_size ty)))
+
+(rule (lower (has_type (ty_dyn64_int ty) (snarrow x y)))
+      (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
+            (sqxtn dst (lane_size ty))))
+
+(rule (lower (has_type (ty_dyn128_int ty) (snarrow x y)))
+      (let ((low_half Reg (sqxtn x (lane_size ty)))
+            (result Reg (sqxtn2 low_half y (lane_size ty))))
+        result))
+
+;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (ty_dyn128_int ty) (unarrow x y)))
+      (if-let _ (zero_value y))
+      (sqxtun x (lane_size ty)))
+
+(rule (lower (has_type (ty_dyn64_int ty) (unarrow x y)))
+      (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
+            (sqxtun dst (lane_size ty))))
+
+(rule (lower (has_type (ty_dyn128_int ty) (unarrow x y)))
+      (let ((low_half Reg (sqxtun x (lane_size ty)))
+            (result Reg (sqxtun2 low_half y (lane_size ty))))
+        result))
+
+;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (ty_dyn128_int ty) (uunarrow x y)))
+      (if-let _ (zero_value y))
+      (uqxtn x (lane_size ty)))
+
+(rule (lower (has_type (ty_dyn64_int ty) (uunarrow x y)))
+      (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2))))
+            (uqxtn dst (lane_size ty))))
+
+(rule (lower (has_type (ty_dyn128_int ty) (uunarrow x y)))
+      (let ((low_half Reg (uqxtn x (lane_size ty)))
+            (result Reg (uqxtn2 low_half y (lane_size ty))))
+        result))
+
 ;;; Rules for `dynamic_stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (rule (lower (dynamic_stack_addr stack_slot))
      (let ((dst WritableReg (temp_writable_reg $I64))
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1767,57 +1767,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            });
        }

-        Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => {
-            let nonzero_high_half = maybe_input_insn(ctx, inputs[1], Opcode::Vconst)
-                .map_or(true, |insn| {
-                    const_param_to_u128(ctx, insn).expect("Invalid immediate bytes") != 0
-                });
-            let ty = ty.unwrap();
-            let ty = if ty.is_dynamic_vector() {
-                ty.dynamic_to_vector()
-                    .unwrap_or_else(|| panic!("Unsupported dynamic type: {}?", ty))
-            } else {
-                ty
-            };
-
-            let op = match (op, ty) {
-                (Opcode::Snarrow, I8X16) => VecRRNarrowOp::Sqxtn16,
-                (Opcode::Snarrow, I16X8) => VecRRNarrowOp::Sqxtn32,
-                (Opcode::Snarrow, I32X4) => VecRRNarrowOp::Sqxtn64,
-                (Opcode::Unarrow, I8X16) => VecRRNarrowOp::Sqxtun16,
-                (Opcode::Unarrow, I16X8) => VecRRNarrowOp::Sqxtun32,
-                (Opcode::Unarrow, I32X4) => VecRRNarrowOp::Sqxtun64,
-                (Opcode::Uunarrow, I8X16) => VecRRNarrowOp::Uqxtn16,
-                (Opcode::Uunarrow, I16X8) => VecRRNarrowOp::Uqxtn32,
-                (Opcode::Uunarrow, I32X4) => VecRRNarrowOp::Uqxtn64,
-                (_, ty) => {
-                    return Err(CodegenError::Unsupported(format!(
-                        "{}: Unsupported type: {:?}",
-                        op, ty
-                    )))
-                }
-            };
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-
-            ctx.emit(Inst::VecRRNarrow {
-                op,
-                rd,
-                rn,
-                high_half: false,
-            });
-
-            if nonzero_high_half {
-                let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-
-                ctx.emit(Inst::VecRRNarrow {
-                    op,
-                    rd,
-                    rn,
-                    high_half: true,
-                });
-            }
-        }
+        Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => implemented_in_isle(ctx),

        Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => {
            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
@@ -1940,19 +1890,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            });
        }

-        Opcode::Fvdemote => {
-            debug_assert_eq!(ty.unwrap(), F32X4);
-
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-
-            ctx.emit(Inst::VecRRNarrow {
-                op: VecRRNarrowOp::Fcvtn64,
-                rd,
-                rn,
-                high_half: false,
-            });
-        }
+        Opcode::Fvdemote => implemented_in_isle(ctx),

        Opcode::ExtractVector => implemented_in_isle(ctx),