Enable the simd_i32x4_trunc_sat_f64x2 test for AArch64

2021-06-28 19:22:57 +01:00
parent c5609bc364
commit 330f02aa09
11 changed files with 492 additions and 161 deletions
--- a/build.rs
+++ b/build.rs
@@ -233,7 +233,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
            | ("simd", "simd_i16x8_extmul_i8x16")
            | ("simd", "simd_i32x4_extadd_pairwise_i16x8")
            | ("simd", "simd_i32x4_extmul_i16x8")
-            | ("simd", "simd_i32x4_trunc_sat_f64x2")
            | ("simd", "simd_i64x2_extmul_i32x4") => return true,

            _ => {}
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -3985,19 +3985,19 @@ pub(crate) fn define(
        .constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
    );

-    let I16or32xN = &TypeVar::new(
-        "I16or32xN",
-        "A SIMD vector type containing integer lanes 16 or 32 bits wide",
+    let I16or32or64xN = &TypeVar::new(
+        "I16or32or64xN",
+        "A SIMD vector type containing integer lanes 16, 32, or 64 bits wide",
        TypeSetBuilder::new()
-            .ints(16..32)
-            .simd_lanes(4..8)
+            .ints(16..64)
+            .simd_lanes(2..8)
            .includes_scalars(false)
            .build(),
    );

-    let x = &Operand::new("x", I16or32xN);
-    let y = &Operand::new("y", I16or32xN);
-    let a = &Operand::new("a", &I16or32xN.split_lanes());
+    let x = &Operand::new("x", I16or32or64xN);
+    let y = &Operand::new("y", I16or32or64xN);
+    let a = &Operand::new("a", &I16or32or64xN.split_lanes());

    ig.push(
        Inst::new(
@@ -4036,6 +4036,25 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

+    ig.push(
+        Inst::new(
+            "uunarrow",
+            r#"
+        Combine `x` and `y` into a vector with twice the lanes but half the integer width while
+        saturating overflowing values to the unsigned maximum and minimum.
+
+        Note that all input lanes are considered unsigned.
+
+        The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
+        and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value
+        returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`.
+            "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
    let I8or16or32xN = &TypeVar::new(
        "I8or16or32xN",
        "A SIMD vector type containing integer lanes 8, 16, or 32 bits wide.",
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1677,11 +1677,6 @@ impl MachInstEmit for Inst {
                        debug_assert_ne!(VectorSize::Size64x2, size);
                        (0b0, 0b00000, enc_size)
                    }
-                    VecMisc2::Shll => {
-                        debug_assert_ne!(VectorSize::Size64x2, size);
-                        debug_assert!(!size.is_128bits());
-                        (0b1, 0b10011, enc_size)
-                    }
                    VecMisc2::Fcvtzs => {
                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
                        (0b0, 0b11011, enc_size)
@@ -2092,24 +2087,49 @@ impl MachInstEmit for Inst {
                        | machreg_to_vec(rd.to_reg()),
                );
            }
-            &Inst::VecMiscNarrow {
+            &Inst::VecRRLong {
                op,
                rd,
                rn,
-                size,
                high_half,
            } => {
-                let size = match size.lane_size() {
-                    ScalarSize::Size8 => 0b00,
-                    ScalarSize::Size16 => 0b01,
-                    ScalarSize::Size32 => 0b10,
-                    _ => panic!("Unexpected vector operand lane size!"),
+                let (u, size, bits_12_16) = match op {
+                    VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111),
+                    VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111),
+                    VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011),
+                    VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011),
+                    VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011),
                };
-                let (u, bits_12_16) = match op {
-                    VecMiscNarrowOp::Xtn => (0b0, 0b10010),
-                    VecMiscNarrowOp::Sqxtn => (0b0, 0b10100),
-                    VecMiscNarrowOp::Sqxtun => (0b1, 0b10010),
+
+                sink.put4(enc_vec_rr_misc(
+                    ((high_half as u32) << 1) | u,
+                    size,
+                    bits_12_16,
+                    rd,
+                    rn,
+                ));
+            }
+            &Inst::VecRRNarrow {
+                op,
+                rd,
+                rn,
+                high_half,
+            } => {
+                let (u, size, bits_12_16) = match op {
+                    VecRRNarrowOp::Xtn16 => (0b0, 0b00, 0b10010),
+                    VecRRNarrowOp::Xtn32 => (0b0, 0b01, 0b10010),
+                    VecRRNarrowOp::Xtn64 => (0b0, 0b10, 0b10010),
+                    VecRRNarrowOp::Sqxtn16 => (0b0, 0b00, 0b10100),
+                    VecRRNarrowOp::Sqxtn32 => (0b0, 0b01, 0b10100),
+                    VecRRNarrowOp::Sqxtn64 => (0b0, 0b10, 0b10100),
+                    VecRRNarrowOp::Sqxtun16 => (0b1, 0b00, 0b10010),
+                    VecRRNarrowOp::Sqxtun32 => (0b1, 0b01, 0b10010),
+                    VecRRNarrowOp::Sqxtun64 => (0b1, 0b10, 0b10010),
+                    VecRRNarrowOp::Uqxtn16 => (0b1, 0b00, 0b10100),
+                    VecRRNarrowOp::Uqxtn32 => (0b1, 0b01, 0b10100),
+                    VecRRNarrowOp::Uqxtn64 => (0b1, 0b10, 0b10100),
                };
+
                sink.put4(enc_vec_rr_misc(
                    ((high_half as u32) << 1) | u,
                    size,
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -2425,11 +2425,87 @@ fn test_aarch64_binemit() {
    ));

    insns.push((
-        Inst::VecMiscNarrow {
-            op: VecMiscNarrowOp::Xtn,
+        Inst::VecRRLong {
+            op: VecRRLongOp::Fcvtl16,
+            rd: writable_vreg(0),
+            rn: vreg(30),
+            high_half: false,
+        },
+        "C07B210E",
+        "fcvtl v0.4s, v30.4h",
+    ));
+
+    insns.push((
+        Inst::VecRRLong {
+            op: VecRRLongOp::Fcvtl32,
+            rd: writable_vreg(16),
+            rn: vreg(1),
+            high_half: true,
+        },
+        "3078614E",
+        "fcvtl2 v16.2d, v1.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRLong {
+            op: VecRRLongOp::Shll8,
+            rd: writable_vreg(12),
+            rn: vreg(5),
+            high_half: false,
+        },
+        "AC38212E",
+        "shll v12.8h, v5.8b, #8",
+    ));
+
+    insns.push((
+        Inst::VecRRLong {
+            op: VecRRLongOp::Shll16,
+            rd: writable_vreg(9),
+            rn: vreg(1),
+            high_half: true,
+        },
+        "2938616E",
+        "shll2 v9.4s, v1.8h, #16",
+    ));
+
+    insns.push((
+        Inst::VecRRLong {
+            op: VecRRLongOp::Shll32,
+            rd: writable_vreg(1),
+            rn: vreg(10),
+            high_half: false,
+        },
+        "4139A12E",
+        "shll v1.2d, v10.2s, #32",
+    ));
+
+    insns.push((
+        Inst::VecRRNarrow {
+            op: VecRRNarrowOp::Xtn16,
+            rd: writable_vreg(25),
+            rn: vreg(17),
+            high_half: false,
+        },
+        "392A210E",
+        "xtn v25.8b, v17.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRNarrow {
+            op: VecRRNarrowOp::Xtn32,
+            rd: writable_vreg(3),
+            rn: vreg(10),
+            high_half: true,
+        },
+        "4329614E",
+        "xtn2 v3.8h, v10.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRNarrow {
+            op: VecRRNarrowOp::Xtn64,
            rd: writable_vreg(22),
            rn: vreg(8),
-            size: VectorSize::Size32x2,
            high_half: false,
        },
        "1629A10E",
@@ -2437,11 +2513,21 @@ fn test_aarch64_binemit() {
    ));

    insns.push((
-        Inst::VecMiscNarrow {
-            op: VecMiscNarrowOp::Sqxtn,
+        Inst::VecRRNarrow {
+            op: VecRRNarrowOp::Sqxtn16,
+            rd: writable_vreg(7),
+            rn: vreg(22),
+            high_half: true,
+        },
+        "C74A214E",
+        "sqxtn2 v7.16b, v22.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRNarrow {
+            op: VecRRNarrowOp::Sqxtn32,
            rd: writable_vreg(31),
            rn: vreg(0),
-            size: VectorSize::Size16x8,
            high_half: true,
        },
        "1F48614E",
@@ -2449,17 +2535,82 @@ fn test_aarch64_binemit() {
    ));

    insns.push((
-        Inst::VecMiscNarrow {
-            op: VecMiscNarrowOp::Sqxtun,
+        Inst::VecRRNarrow {
+            op: VecRRNarrowOp::Sqxtn64,
+            rd: writable_vreg(14),
+            rn: vreg(20),
+            high_half: false,
+        },
+        "8E4AA10E",
+        "sqxtn v14.2s, v20.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRNarrow {
+            op: VecRRNarrowOp::Sqxtun16,
            rd: writable_vreg(16),
            rn: vreg(23),
-            size: VectorSize::Size8x16,
            high_half: false,
        },
        "F02A212E",
        "sqxtun v16.8b, v23.8h",
    ));

+    insns.push((
+        Inst::VecRRNarrow {
+            op: VecRRNarrowOp::Sqxtun32,
+            rd: writable_vreg(28),
+            rn: vreg(9),
+            high_half: true,
+        },
+        "3C29616E",
+        "sqxtun2 v28.8h, v9.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRNarrow {
+            op: VecRRNarrowOp::Sqxtun64,
+            rd: writable_vreg(15),
+            rn: vreg(15),
+            high_half: false,
+        },
+        "EF29A12E",
+        "sqxtun v15.2s, v15.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRNarrow {
+            op: VecRRNarrowOp::Uqxtn16,
+            rd: writable_vreg(21),
+            rn: vreg(4),
+            high_half: true,
+        },
+        "9548216E",
+        "uqxtn2 v21.16b, v4.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRNarrow {
+            op: VecRRNarrowOp::Uqxtn32,
+            rd: writable_vreg(31),
+            rn: vreg(31),
+            high_half: false,
+        },
+        "FF4B612E",
+        "uqxtn v31.4h, v31.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRNarrow {
+            op: VecRRNarrowOp::Uqxtn64,
+            rd: writable_vreg(11),
+            rn: vreg(12),
+            high_half: true,
+        },
+        "8B49A16E",
+        "uqxtn2 v11.4s, v12.2d",
+    ));
+
    insns.push((
        Inst::VecRRPair {
            op: VecPairOp::Addp,
@@ -3810,39 +3961,6 @@ fn test_aarch64_binemit() {
        "rev64 v1.4s, v10.4s",
    ));

-    insns.push((
-        Inst::VecMisc {
-            op: VecMisc2::Shll,
-            rd: writable_vreg(12),
-            rn: vreg(5),
-            size: VectorSize::Size8x8,
-        },
-        "AC38212E",
-        "shll v12.8h, v5.8b, #8",
-    ));
-
-    insns.push((
-        Inst::VecMisc {
-            op: VecMisc2::Shll,
-            rd: writable_vreg(9),
-            rn: vreg(1),
-            size: VectorSize::Size16x4,
-        },
-        "2938612E",
-        "shll v9.4s, v1.4h, #16",
-    ));
-
-    insns.push((
-        Inst::VecMisc {
-            op: VecMisc2::Shll,
-            rd: writable_vreg(1),
-            rn: vreg(10),
-            size: VectorSize::Size32x2,
-        },
-        "4139A12E",
-        "shll v1.2d, v10.2s, #32",
-    ));
-
    insns.push((
        Inst::VecMisc {
            op: VecMisc2::Fcvtzs,
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -332,8 +332,6 @@ pub enum VecMisc2 {
    Fsqrt,
    /// Reverse elements in 64-bit doublewords
    Rev64,
-    /// Shift left long (by element size)
-    Shll,
    /// Floating-point convert to signed integer, rounding toward zero
    Fcvtzs,
    /// Floating-point convert to unsigned integer, rounding toward zero
@@ -356,15 +354,48 @@ pub enum VecMisc2 {
    Cmeq0,
 }

-/// A Vector narrowing operation with two registers.
+/// A vector widening operation with one argument.
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
-pub enum VecMiscNarrowOp {
-    /// Extract Narrow
-    Xtn,
-    /// Signed saturating extract narrow
-    Sqxtn,
-    /// Signed saturating extract unsigned narrow
-    Sqxtun,
+pub enum VecRRLongOp {
+    /// Floating-point convert to higher precision long, 16-bit elements
+    Fcvtl16,
+    /// Floating-point convert to higher precision long, 32-bit elements
+    Fcvtl32,
+    /// Shift left long (by element size), 8-bit elements
+    Shll8,
+    /// Shift left long (by element size), 16-bit elements
+    Shll16,
+    /// Shift left long (by element size), 32-bit elements
+    Shll32,
+}
+
+/// A vector narrowing operation with one argument.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecRRNarrowOp {
+    /// Extract narrow, 16-bit elements
+    Xtn16,
+    /// Extract narrow, 32-bit elements
+    Xtn32,
+    /// Extract narrow, 64-bit elements
+    Xtn64,
+    /// Signed saturating extract narrow, 16-bit elements
+    Sqxtn16,
+    /// Signed saturating extract narrow, 32-bit elements
+    Sqxtn32,
+    /// Signed saturating extract narrow, 64-bit elements
+    Sqxtn64,
+    /// Signed saturating extract unsigned narrow, 16-bit elements
+    Sqxtun16,
+    /// Signed saturating extract unsigned narrow, 32-bit elements
+    Sqxtun32,
+    /// Signed saturating extract unsigned narrow, 64-bit elements
+    Sqxtun64,
+    /// Unsigned saturating extract narrow, 16-bit elements
+    Uqxtn16,
+    /// Unsigned saturating extract narrow, 32-bit elements
+    Uqxtn32,
+    /// Unsigned saturating extract narrow, 64-bit elements
+    Uqxtn64,
 }

 /// A vector operation on a pair of elements with one register.
@@ -1029,12 +1060,19 @@ pub enum Inst {
        size: VectorSize,
    },

-    /// Vector narrowing operation.
-    VecMiscNarrow {
-        op: VecMiscNarrowOp,
+    /// Vector widening operation.
+    VecRRLong {
+        op: VecRRLongOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        high_half: bool,
+    },
+
+    /// Vector narrowing operation.
+    VecRRNarrow {
+        op: VecRRNarrowOp,
        rd: Writable<Reg>,
        rn: Reg,
-        size: VectorSize,
        high_half: bool,
    },

@@ -2073,7 +2111,11 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_mod(rd);
            collector.add_use(rn);
        }
-        &Inst::VecMiscNarrow {
+        &Inst::VecRRLong { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
+        &Inst::VecRRNarrow {
            rd, rn, high_half, ..
        } => {
            collector.add_use(rn);
@@ -2868,7 +2910,15 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_mod(mapper, rd);
            map_use(mapper, rn);
        }
-        &mut Inst::VecMiscNarrow {
+        &mut Inst::VecRRLong {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
+        &mut Inst::VecRRNarrow {
            ref mut rd,
            ref mut rn,
            high_half,
@@ -3901,29 +3951,132 @@ impl Inst {
                let rn = show_vreg_element(rn, mb_rru, src_idx, size);
                format!("mov {}, {}", rd, rn)
            }
-            &Inst::VecMiscNarrow {
+            &Inst::VecRRLong {
                op,
                rd,
                rn,
-                size,
                high_half,
            } => {
-                let dest_size = if high_half {
-                    assert!(size.is_128bits());
-                    size
-                } else {
-                    size.halve()
+                let (op, rd_size, size, suffix) = match (op, high_half) {
+                    (VecRRLongOp::Fcvtl16, false) => {
+                        ("fcvtl", VectorSize::Size32x4, VectorSize::Size16x4, "")
+                    }
+                    (VecRRLongOp::Fcvtl16, true) => {
+                        ("fcvtl2", VectorSize::Size32x4, VectorSize::Size16x8, "")
+                    }
+                    (VecRRLongOp::Fcvtl32, false) => {
+                        ("fcvtl", VectorSize::Size64x2, VectorSize::Size32x2, "")
+                    }
+                    (VecRRLongOp::Fcvtl32, true) => {
+                        ("fcvtl2", VectorSize::Size64x2, VectorSize::Size32x4, "")
+                    }
+                    (VecRRLongOp::Shll8, false) => {
+                        ("shll", VectorSize::Size16x8, VectorSize::Size8x8, ", #8")
+                    }
+                    (VecRRLongOp::Shll8, true) => {
+                        ("shll2", VectorSize::Size16x8, VectorSize::Size8x16, ", #8")
+                    }
+                    (VecRRLongOp::Shll16, false) => {
+                        ("shll", VectorSize::Size32x4, VectorSize::Size16x4, ", #16")
+                    }
+                    (VecRRLongOp::Shll16, true) => {
+                        ("shll2", VectorSize::Size32x4, VectorSize::Size16x8, ", #16")
+                    }
+                    (VecRRLongOp::Shll32, false) => {
+                        ("shll", VectorSize::Size64x2, VectorSize::Size32x2, ", #32")
+                    }
+                    (VecRRLongOp::Shll32, true) => {
+                        ("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32")
+                    }
                };
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size);
-                let rn = show_vreg_vector(rn, mb_rru, size.widen());
-                let op = match (op, high_half) {
-                    (VecMiscNarrowOp::Xtn, false) => "xtn",
-                    (VecMiscNarrowOp::Xtn, true) => "xtn2",
-                    (VecMiscNarrowOp::Sqxtn, false) => "sqxtn",
-                    (VecMiscNarrowOp::Sqxtn, true) => "sqxtn2",
-                    (VecMiscNarrowOp::Sqxtun, false) => "sqxtun",
-                    (VecMiscNarrowOp::Sqxtun, true) => "sqxtun2",
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
+                let rn = show_vreg_vector(rn, mb_rru, size);
+
+                format!("{} {}, {}{}", op, rd, rn, suffix)
+            }
+            &Inst::VecRRNarrow {
+                op,
+                rd,
+                rn,
+                high_half,
+            } => {
+                let (op, rd_size, size) = match (op, high_half) {
+                    (VecRRNarrowOp::Xtn16, false) => {
+                        ("xtn", VectorSize::Size8x8, VectorSize::Size16x8)
+                    }
+                    (VecRRNarrowOp::Xtn16, true) => {
+                        ("xtn2", VectorSize::Size8x16, VectorSize::Size16x8)
+                    }
+                    (VecRRNarrowOp::Xtn32, false) => {
+                        ("xtn", VectorSize::Size16x4, VectorSize::Size32x4)
+                    }
+                    (VecRRNarrowOp::Xtn32, true) => {
+                        ("xtn2", VectorSize::Size16x8, VectorSize::Size32x4)
+                    }
+                    (VecRRNarrowOp::Xtn64, false) => {
+                        ("xtn", VectorSize::Size32x2, VectorSize::Size64x2)
+                    }
+                    (VecRRNarrowOp::Xtn64, true) => {
+                        ("xtn2", VectorSize::Size32x4, VectorSize::Size64x2)
+                    }
+                    (VecRRNarrowOp::Sqxtn16, false) => {
+                        ("sqxtn", VectorSize::Size8x8, VectorSize::Size16x8)
+                    }
+                    (VecRRNarrowOp::Sqxtn16, true) => {
+                        ("sqxtn2", VectorSize::Size8x16, VectorSize::Size16x8)
+                    }
+                    (VecRRNarrowOp::Sqxtn32, false) => {
+                        ("sqxtn", VectorSize::Size16x4, VectorSize::Size32x4)
+                    }
+                    (VecRRNarrowOp::Sqxtn32, true) => {
+                        ("sqxtn2", VectorSize::Size16x8, VectorSize::Size32x4)
+                    }
+                    (VecRRNarrowOp::Sqxtn64, false) => {
+                        ("sqxtn", VectorSize::Size32x2, VectorSize::Size64x2)
+                    }
+                    (VecRRNarrowOp::Sqxtn64, true) => {
+                        ("sqxtn2", VectorSize::Size32x4, VectorSize::Size64x2)
+                    }
+                    (VecRRNarrowOp::Sqxtun16, false) => {
+                        ("sqxtun", VectorSize::Size8x8, VectorSize::Size16x8)
+                    }
+                    (VecRRNarrowOp::Sqxtun16, true) => {
+                        ("sqxtun2", VectorSize::Size8x16, VectorSize::Size16x8)
+                    }
+                    (VecRRNarrowOp::Sqxtun32, false) => {
+                        ("sqxtun", VectorSize::Size16x4, VectorSize::Size32x4)
+                    }
+                    (VecRRNarrowOp::Sqxtun32, true) => {
+                        ("sqxtun2", VectorSize::Size16x8, VectorSize::Size32x4)
+                    }
+                    (VecRRNarrowOp::Sqxtun64, false) => {
+                        ("sqxtun", VectorSize::Size32x2, VectorSize::Size64x2)
+                    }
+                    (VecRRNarrowOp::Sqxtun64, true) => {
+                        ("sqxtun2", VectorSize::Size32x4, VectorSize::Size64x2)
+                    }
+                    (VecRRNarrowOp::Uqxtn16, false) => {
+                        ("uqxtn", VectorSize::Size8x8, VectorSize::Size16x8)
+                    }
+                    (VecRRNarrowOp::Uqxtn16, true) => {
+                        ("uqxtn2", VectorSize::Size8x16, VectorSize::Size16x8)
+                    }
+                    (VecRRNarrowOp::Uqxtn32, false) => {
+                        ("uqxtn", VectorSize::Size16x4, VectorSize::Size32x4)
+                    }
+                    (VecRRNarrowOp::Uqxtn32, true) => {
+                        ("uqxtn2", VectorSize::Size16x8, VectorSize::Size32x4)
+                    }
+                    (VecRRNarrowOp::Uqxtn64, false) => {
+                        ("uqxtn", VectorSize::Size32x2, VectorSize::Size64x2)
+                    }
+                    (VecRRNarrowOp::Uqxtn64, true) => {
+                        ("uqxtn2", VectorSize::Size32x4, VectorSize::Size64x2)
+                    }
                };
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
+                let rn = show_vreg_vector(rn, mb_rru, size);
+
                format!("{} {}, {}", op, rd, rn)
            }
            &Inst::VecRRPair { op, rd, rn } => {
@@ -3999,45 +4152,34 @@ impl Inst {
                format!("{} {}, {}, {}", op, rd, rn, rm)
            }
            &Inst::VecMisc { op, rd, rn, size } => {
-                let (op, rd_size, size, suffix) = match op {
-                    VecMisc2::Not => {
-                        let size = if size.is_128bits() {
+                let (op, size, suffix) = match op {
+                    VecMisc2::Not => (
+                        "mvn",
+                        if size.is_128bits() {
                            VectorSize::Size8x16
                        } else {
                            VectorSize::Size8x8
-                        };
-
-                        ("mvn", size, size, "")
-                    }
-                    VecMisc2::Neg => ("neg", size, size, ""),
-                    VecMisc2::Abs => ("abs", size, size, ""),
-                    VecMisc2::Fabs => ("fabs", size, size, ""),
-                    VecMisc2::Fneg => ("fneg", size, size, ""),
-                    VecMisc2::Fsqrt => ("fsqrt", size, size, ""),
-                    VecMisc2::Rev64 => ("rev64", size, size, ""),
-                    VecMisc2::Shll => (
-                        "shll",
-                        size.widen(),
-                        size,
-                        match size {
-                            VectorSize::Size8x8 => ", #8",
-                            VectorSize::Size16x4 => ", #16",
-                            VectorSize::Size32x2 => ", #32",
-                            _ => panic!("Unexpected vector size: {:?}", size),
                        },
+                        "",
                    ),
-                    VecMisc2::Fcvtzs => ("fcvtzs", size, size, ""),
-                    VecMisc2::Fcvtzu => ("fcvtzu", size, size, ""),
-                    VecMisc2::Scvtf => ("scvtf", size, size, ""),
-                    VecMisc2::Ucvtf => ("ucvtf", size, size, ""),
-                    VecMisc2::Frintn => ("frintn", size, size, ""),
-                    VecMisc2::Frintz => ("frintz", size, size, ""),
-                    VecMisc2::Frintm => ("frintm", size, size, ""),
-                    VecMisc2::Frintp => ("frintp", size, size, ""),
-                    VecMisc2::Cnt => ("cnt", size, size, ""),
-                    VecMisc2::Cmeq0 => ("cmeq", size, size, ", #0"),
+                    VecMisc2::Neg => ("neg", size, ""),
+                    VecMisc2::Abs => ("abs", size, ""),
+                    VecMisc2::Fabs => ("fabs", size, ""),
+                    VecMisc2::Fneg => ("fneg", size, ""),
+                    VecMisc2::Fsqrt => ("fsqrt", size, ""),
+                    VecMisc2::Rev64 => ("rev64", size, ""),
+                    VecMisc2::Fcvtzs => ("fcvtzs", size, ""),
+                    VecMisc2::Fcvtzu => ("fcvtzu", size, ""),
+                    VecMisc2::Scvtf => ("scvtf", size, ""),
+                    VecMisc2::Ucvtf => ("ucvtf", size, ""),
+                    VecMisc2::Frintn => ("frintn", size, ""),
+                    VecMisc2::Frintz => ("frintz", size, ""),
+                    VecMisc2::Frintm => ("frintm", size, ""),
+                    VecMisc2::Frintp => ("frintp", size, ""),
+                    VecMisc2::Cnt => ("cnt", size, ""),
+                    VecMisc2::Cmeq0 => ("cmeq", size, ", #0"),
                };
-                let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
+                let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
                let rn = show_vreg_vector(rn, mb_rru, size);
                format!("{} {}, {}{}", op, rd, rn, suffix)
            }
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -365,11 +365,10 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

                    // Extract the low half components of rn.
                    //   tmp1 = |c|a|
-                    ctx.emit(Inst::VecMiscNarrow {
-                        op: VecMiscNarrowOp::Xtn,
+                    ctx.emit(Inst::VecRRNarrow {
+                        op: VecRRNarrowOp::Xtn64,
                        rd: tmp1,
                        rn,
-                        size: VectorSize::Size32x2,
                        high_half: false,
                    });

@@ -385,21 +384,20 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

                    // Extract the low half components of rm.
                    //   tmp2 = |g|e|
-                    ctx.emit(Inst::VecMiscNarrow {
-                        op: VecMiscNarrowOp::Xtn,
+                    ctx.emit(Inst::VecRRNarrow {
+                        op: VecRRNarrowOp::Xtn64,
                        rd: tmp2,
                        rn: rm,
-                        size: VectorSize::Size32x2,
                        high_half: false,
                    });

                    // Shift the high half components, into the high half.
                    //   rd = |dg+ch << 32|be+af << 32|
-                    ctx.emit(Inst::VecMisc {
-                        op: VecMisc2::Shll,
+                    ctx.emit(Inst::VecRRLong {
+                        op: VecRRLongOp::Shll32,
                        rd,
                        rn: rd.to_reg(),
-                        size: VectorSize::Size32x2,
+                        high_half: false,
                    });

                    // Multiply the low components together, and accumulate with the high
@@ -3439,31 +3437,48 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            });
        }

-        Opcode::Snarrow | Opcode::Unarrow => {
-            let op = if op == Opcode::Snarrow {
-                VecMiscNarrowOp::Sqxtn
-            } else {
-                VecMiscNarrowOp::Sqxtun
+        Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => {
+            let nonzero_high_half = maybe_input_insn(ctx, inputs[1], Opcode::Vconst)
+                .map_or(true, |insn| {
+                    const_param_to_u128(ctx, insn).expect("Invalid immediate bytes") != 0
+                });
+            let op = match (op, ty.unwrap().lane_type()) {
+                (Opcode::Snarrow, I8) => VecRRNarrowOp::Sqxtn16,
+                (Opcode::Snarrow, I16) => VecRRNarrowOp::Sqxtn32,
+                (Opcode::Snarrow, I32) => VecRRNarrowOp::Sqxtn64,
+                (Opcode::Unarrow, I8) => VecRRNarrowOp::Sqxtun16,
+                (Opcode::Unarrow, I16) => VecRRNarrowOp::Sqxtun32,
+                (Opcode::Unarrow, I32) => VecRRNarrowOp::Sqxtun64,
+                (Opcode::Uunarrow, I8) => VecRRNarrowOp::Uqxtn16,
+                (Opcode::Uunarrow, I16) => VecRRNarrowOp::Uqxtn32,
+                (Opcode::Uunarrow, I32) => VecRRNarrowOp::Uqxtn64,
+                (_, lane_type) => {
+                    return Err(CodegenError::Unsupported(format!(
+                        "Unsupported SIMD vector lane type: {:?}",
+                        lane_type
+                    )))
+                }
            };
            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-            let ty = ty.unwrap();

-            ctx.emit(Inst::VecMiscNarrow {
+            ctx.emit(Inst::VecRRNarrow {
                op,
                rd,
                rn,
-                size: VectorSize::from_ty(ty),
                high_half: false,
            });
-            ctx.emit(Inst::VecMiscNarrow {
-                op,
-                rd,
-                rn: rn2,
-                size: VectorSize::from_ty(ty),
-                high_half: true,
-            });
+
+            if nonzero_high_half {
+                let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+
+                ctx.emit(Inst::VecRRNarrow {
+                    op,
+                    rd,
+                    rn,
+                    high_half: true,
+                });
+            }
        }

        Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => {
--- a/cranelift/codegen/src/isa/s390x/lower.rs
+++ b/cranelift/codegen/src/isa/s390x/lower.rs
@@ -2860,6 +2860,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::ScalarToVector
        | Opcode::Snarrow
        | Opcode::Unarrow
+        | Opcode::Uunarrow
        | Opcode::SwidenLow
        | Opcode::SwidenHigh
        | Opcode::UwidenLow
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -6001,7 +6001,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            unimplemented!("Vector split/concat ops not implemented.");
        }

-        Opcode::SqmulRoundSat => unimplemented!("unimplemented lowering for opcode {:?}", op),
+        Opcode::SqmulRoundSat | Opcode::Uunarrow => {
+            unimplemented!("unimplemented lowering for opcode {:?}", op)
+        }

        // Opcodes that should be removed by legalization. These should
        // eventually be removed if/when we replace in-situ legalization with
--- a/cranelift/codegen/src/preopt.serialized
+++ b/cranelift/codegen/src/preopt.serialized
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -530,6 +530,7 @@ where
            arg(0)?,
            ValueConversionKind::Truncate(ctrl_ty),
        )?),
+        Opcode::Uunarrow => unimplemented!("Uunarrow"),
        Opcode::Uextend => assign(Value::convert(
            arg(0)?,
            ValueConversionKind::ZeroExtend(ctrl_ty),
--- a/cranelift/wasm/src/code_translator.rs
+++ b/cranelift/wasm/src/code_translator.rs
@@ -1790,10 +1790,26 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
            let a = pop1_with_bitcast(state, F32X4, builder);
            state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a))
        }
+        Operator::I32x4TruncSatF64x2SZero => {
+            let a = pop1_with_bitcast(state, F64X2, builder);
+            let converted_a = builder.ins().fcvt_to_sint_sat(I64X2, a);
+            let handle = builder.func.dfg.constants.insert(vec![0u8; 16].into());
+            let zero = builder.ins().vconst(I64X2, handle);
+
+            state.push1(builder.ins().snarrow(converted_a, zero));
+        }
        Operator::I32x4TruncSatF32x4U => {
            let a = pop1_with_bitcast(state, F32X4, builder);
            state.push1(builder.ins().fcvt_to_uint_sat(I32X4, a))
        }
+        Operator::I32x4TruncSatF64x2UZero => {
+            let a = pop1_with_bitcast(state, F64X2, builder);
+            let converted_a = builder.ins().fcvt_to_uint_sat(I64X2, a);
+            let handle = builder.func.dfg.constants.insert(vec![0u8; 16].into());
+            let zero = builder.ins().vconst(I64X2, handle);
+
+            state.push1(builder.ins().uunarrow(converted_a, zero));
+        }
        Operator::I8x16NarrowI16x8S => {
            let (a, b) = pop2_with_bitcast(state, I16X8, builder);
            state.push1(builder.ins().snarrow(a, b))
@@ -1906,9 +1922,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
        | Operator::I16x8ExtAddPairwiseI8x16U
        | Operator::I32x4ExtAddPairwiseI16x8S
        | Operator::I32x4ExtAddPairwiseI16x8U
-        | Operator::F64x2ConvertLowI32x4U
-        | Operator::I32x4TruncSatF64x2SZero
-        | Operator::I32x4TruncSatF64x2UZero => {
+        | Operator::F64x2ConvertLowI32x4U => {
            return Err(wasm_unsupported!("proposed simd operator {:?}", op));
        }
        Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {