AArch64: Implement SIMD conversions

2020-08-19 20:46:08 +01:00
parent 6513e90914
commit b895ac0e40
6 changed files with 424 additions and 202 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -7,7 +7,7 @@ use crate::ir::Inst as IRInst;
 use crate::ir::{InstructionData, Opcode, TrapCode};
 use crate::machinst::lower::*;
 use crate::machinst::*;
-use crate::CodegenResult;
+use crate::{CodegenError, CodegenResult};

 use crate::isa::aarch64::abi::*;
 use crate::isa::aarch64::inst::*;
@@ -66,7 +66,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let rd = get_output_reg(ctx, outputs[0]);
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            let ty = ty.unwrap();
-            if ty_bits(ty) < 128 {
+            if !ty.is_vector() {
                let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
                    ctx,
                    inputs[1],
@@ -94,7 +94,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let rd = get_output_reg(ctx, outputs[0]);
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            let ty = ty.unwrap();
-            if ty_bits(ty) < 128 {
+            if !ty.is_vector() {
                let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
                    ctx,
                    inputs[1],
@@ -124,7 +124,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat;
            let ty = ty.unwrap();
            let rd = get_output_reg(ctx, outputs[0]);
-            if ty_bits(ty) < 128 {
+            if !ty.is_vector() {
                let narrow_mode = if is_signed {
                    NarrowValueMode::SignExtend64
                } else {
@@ -180,7 +180,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::Ineg => {
            let rd = get_output_reg(ctx, outputs[0]);
            let ty = ty.unwrap();
-            if ty_bits(ty) < 128 {
+            if !ty.is_vector() {
                let rn = zero_reg();
                let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None);
                let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
@@ -201,7 +201,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
            let ty = ty.unwrap();
-            if ty_bits(ty) < 128 {
+            if !ty.is_vector() {
                let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64);
                ctx.emit(Inst::AluRRRR {
                    alu_op,
@@ -274,6 +274,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        rd: tmp1,
                        rn,
                        size: VectorSize::Size32x2,
+                        high_half: false,
                    });

                    // Sum the respective high half components.
@@ -293,6 +294,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        rd: tmp2,
                        rn: rm,
                        size: VectorSize::Size32x2,
+                        high_half: false,
                    });

                    // Shift the high half components, into the high half.
@@ -570,7 +572,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::Bnot => {
            let rd = get_output_reg(ctx, outputs[0]);
            let ty = ty.unwrap();
-            if ty_bits(ty) < 128 {
+            if !ty.is_vector() {
                let rm = put_input_in_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
                let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64);
                // NOT rd, rm ==> ORR_NOT rd, zero, rm
@@ -594,7 +596,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::BxorNot => {
            let rd = get_output_reg(ctx, outputs[0]);
            let ty = ty.unwrap();
-            if ty_bits(ty) < 128 {
+            if !ty.is_vector() {
                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
                let rm = put_input_in_rs_immlogic(ctx, inputs[1], NarrowValueMode::None);
                let alu_op = match op {
@@ -633,7 +635,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
            let ty = ty.unwrap();
            let rd = get_output_reg(ctx, outputs[0]);
-            if ty_bits(ty) < 128 {
+            if !ty.is_vector() {
                let size = OperandSize::from_bits(ty_bits(ty));
                let narrow_mode = match (op, size) {
                    (Opcode::Ishl, _) => NarrowValueMode::None,
@@ -1159,6 +1161,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    t,
                    rd,
                    rn: rd.to_reg(),
+                    high_half: false,
                });
            }
        }
@@ -1433,7 +1436,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

        Opcode::Bitselect | Opcode::Vselect => {
            let ty = ty.unwrap();
-            if ty_bits(ty) < 128 {
+            if !ty.is_vector() {
                debug_assert_ne!(Opcode::Vselect, op);
                let tmp = ctx.alloc_tmp(RegClass::I64, I64);
                let rd = get_output_reg(ctx, outputs[0]);
@@ -1696,7 +1699,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            };
            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);

-            if ty_bits(ty) < 128 {
+            if !ty.is_vector() {
                let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
                let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
                ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
@@ -1716,7 +1719,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
            let rd = get_output_reg(ctx, outputs[0]);

-            if ty_bits(ty) < 128 {
+            if !ty.is_vector() {
                match ty_bits(ty) {
                    32 => {
                        ctx.emit(Inst::FpuCmp32 { rn, rm });
@@ -2106,7 +2109,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
            let rd = get_output_reg(ctx, outputs[0]);
-            if bits < 128 {
+            if !ty.is_vector() {
                let fpu_op = match (op, bits) {
                    (Opcode::Fadd, 32) => FPUOp2::Add32,
                    (Opcode::Fadd, 64) => FPUOp2::Add64,
@@ -2149,7 +2152,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let bits = ty_bits(ty);
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            let rd = get_output_reg(ctx, outputs[0]);
-            if bits < 128 {
+            if !ty.is_vector() {
                let fpu_op = match (op, bits) {
                    (Opcode::Sqrt, 32) => FPUOp1::Sqrt32,
                    (Opcode::Sqrt, 64) => FPUOp1::Sqrt64,
@@ -2414,153 +2417,186 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::FcvtFromUint | Opcode::FcvtFromSint => {
-            let in_bits = ty_bits(ctx.input_ty(insn, 0));
-            let out_bits = ty_bits(ctx.output_ty(insn, 0));
+            let ty = ty.unwrap();
            let signed = op == Opcode::FcvtFromSint;
-            let op = match (signed, in_bits, out_bits) {
-                (false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
-                (true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
-                (false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
-                (true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
-                (false, 64, 32) => IntToFpuOp::U64ToF32,
-                (true, 64, 32) => IntToFpuOp::I64ToF32,
-                (false, 64, 64) => IntToFpuOp::U64ToF64,
-                (true, 64, 64) => IntToFpuOp::I64ToF64,
-                _ => panic!("Unknown input/output-bits combination"),
-            };
-            let narrow_mode = match (signed, in_bits) {
-                (false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
-                (true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
-                (false, 64) => NarrowValueMode::ZeroExtend64,
-                (true, 64) => NarrowValueMode::SignExtend64,
-                _ => panic!("Unknown input size"),
-            };
-            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
            let rd = get_output_reg(ctx, outputs[0]);
-            ctx.emit(Inst::IntToFpu { op, rd, rn });
+
+            if ty.is_vector() {
+                let op = if signed {
+                    VecMisc2::Scvtf
+                } else {
+                    VecMisc2::Ucvtf
+                };
+                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+
+                ctx.emit(Inst::VecMisc {
+                    op,
+                    rd,
+                    rn,
+                    size: VectorSize::from_ty(ty),
+                });
+            } else {
+                let in_bits = ty_bits(ctx.input_ty(insn, 0));
+                let out_bits = ty_bits(ty);
+                let op = match (signed, in_bits, out_bits) {
+                    (false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
+                    (true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
+                    (false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
+                    (true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
+                    (false, 64, 32) => IntToFpuOp::U64ToF32,
+                    (true, 64, 32) => IntToFpuOp::I64ToF32,
+                    (false, 64, 64) => IntToFpuOp::U64ToF64,
+                    (true, 64, 64) => IntToFpuOp::I64ToF64,
+                    _ => panic!("Unknown input/output-bits combination"),
+                };
+                let narrow_mode = match (signed, in_bits) {
+                    (false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
+                    (true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
+                    (false, 64) => NarrowValueMode::ZeroExtend64,
+                    (true, 64) => NarrowValueMode::SignExtend64,
+                    _ => panic!("Unknown input size"),
+                };
+                let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+                ctx.emit(Inst::IntToFpu { op, rd, rn });
+            }
        }

        Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => {
-            let in_ty = ctx.input_ty(insn, 0);
-            let in_bits = ty_bits(in_ty);
-            let out_ty = ctx.output_ty(insn, 0);
-            let out_bits = ty_bits(out_ty);
+            let ty = ty.unwrap();
            let out_signed = op == Opcode::FcvtToSintSat;
            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            let rd = get_output_reg(ctx, outputs[0]);

-            // FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
-            // FMIN Vtmp2, Vin, Vtmp1
-            // FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
-            // FMAX Vtmp2, Vtmp2, Vtmp1
-            // (if signed) FIMM Vtmp1, 0
-            // FCMP Vin, Vin
-            // FCSEL Vtmp2, Vtmp1, Vtmp2, NE  // on NaN, select 0
-            // convert Rout, Vtmp2
+            if ty.is_vector() {
+                let op = if out_signed {
+                    VecMisc2::Fcvtzs
+                } else {
+                    VecMisc2::Fcvtzu
+                };

-            assert!(in_bits == 32 || in_bits == 64);
-            assert!(out_bits == 32 || out_bits == 64);
-
-            let min: f64 = match (out_bits, out_signed) {
-                (32, true) => std::i32::MIN as f64,
-                (32, false) => 0.0,
-                (64, true) => std::i64::MIN as f64,
-                (64, false) => 0.0,
-                _ => unreachable!(),
-            };
-
-            let max = match (out_bits, out_signed) {
-                (32, true) => std::i32::MAX as f64,
-                (32, false) => std::u32::MAX as f64,
-                (64, true) => std::i64::MAX as f64,
-                (64, false) => std::u64::MAX as f64,
-                _ => unreachable!(),
-            };
-
-            let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
-            let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
-
-            if in_bits == 32 {
-                ctx.emit(Inst::LoadFpuConst32 {
-                    rd: rtmp1,
-                    const_data: max as f32,
+                ctx.emit(Inst::VecMisc {
+                    op,
+                    rd,
+                    rn,
+                    size: VectorSize::from_ty(ty),
                });
            } else {
-                ctx.emit(Inst::LoadFpuConst64 {
-                    rd: rtmp1,
-                    const_data: max,
-                });
-            }
-            ctx.emit(Inst::FpuRRR {
-                fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64),
-                rd: rtmp2,
-                rn: rn,
-                rm: rtmp1.to_reg(),
-            });
-            if in_bits == 32 {
-                ctx.emit(Inst::LoadFpuConst32 {
-                    rd: rtmp1,
-                    const_data: min as f32,
-                });
-            } else {
-                ctx.emit(Inst::LoadFpuConst64 {
-                    rd: rtmp1,
-                    const_data: min,
-                });
-            }
-            ctx.emit(Inst::FpuRRR {
-                fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64),
-                rd: rtmp2,
-                rn: rtmp2.to_reg(),
-                rm: rtmp1.to_reg(),
-            });
-            if out_signed {
+                let in_ty = ctx.input_ty(insn, 0);
+                let in_bits = ty_bits(in_ty);
+                let out_bits = ty_bits(ty);
+                // FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
+                // FMIN Vtmp2, Vin, Vtmp1
+                // FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
+                // FMAX Vtmp2, Vtmp2, Vtmp1
+                // (if signed) FIMM Vtmp1, 0
+                // FCMP Vin, Vin
+                // FCSEL Vtmp2, Vtmp1, Vtmp2, NE  // on NaN, select 0
+                // convert Rout, Vtmp2
+
+                assert!(in_bits == 32 || in_bits == 64);
+                assert!(out_bits == 32 || out_bits == 64);
+
+                let min: f64 = match (out_bits, out_signed) {
+                    (32, true) => std::i32::MIN as f64,
+                    (32, false) => 0.0,
+                    (64, true) => std::i64::MIN as f64,
+                    (64, false) => 0.0,
+                    _ => unreachable!(),
+                };
+
+                let max = match (out_bits, out_signed) {
+                    (32, true) => std::i32::MAX as f64,
+                    (32, false) => std::u32::MAX as f64,
+                    (64, true) => std::i64::MAX as f64,
+                    (64, false) => std::u64::MAX as f64,
+                    _ => unreachable!(),
+                };
+
+                let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
+                let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
+
                if in_bits == 32 {
                    ctx.emit(Inst::LoadFpuConst32 {
                        rd: rtmp1,
-                        const_data: 0.0,
+                        const_data: max as f32,
                    });
                } else {
                    ctx.emit(Inst::LoadFpuConst64 {
                        rd: rtmp1,
-                        const_data: 0.0,
+                        const_data: max,
                    });
                }
-            }
-            if in_bits == 32 {
-                ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn });
-                ctx.emit(Inst::FpuCSel32 {
+                ctx.emit(Inst::FpuRRR {
+                    fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64),
                    rd: rtmp2,
-                    rn: rtmp1.to_reg(),
-                    rm: rtmp2.to_reg(),
-                    cond: Cond::Ne,
+                    rn: rn,
+                    rm: rtmp1.to_reg(),
                });
-            } else {
-                ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn });
-                ctx.emit(Inst::FpuCSel64 {
+                if in_bits == 32 {
+                    ctx.emit(Inst::LoadFpuConst32 {
+                        rd: rtmp1,
+                        const_data: min as f32,
+                    });
+                } else {
+                    ctx.emit(Inst::LoadFpuConst64 {
+                        rd: rtmp1,
+                        const_data: min,
+                    });
+                }
+                ctx.emit(Inst::FpuRRR {
+                    fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64),
                    rd: rtmp2,
-                    rn: rtmp1.to_reg(),
-                    rm: rtmp2.to_reg(),
-                    cond: Cond::Ne,
+                    rn: rtmp2.to_reg(),
+                    rm: rtmp1.to_reg(),
                });
-            }
+                if out_signed {
+                    if in_bits == 32 {
+                        ctx.emit(Inst::LoadFpuConst32 {
+                            rd: rtmp1,
+                            const_data: 0.0,
+                        });
+                    } else {
+                        ctx.emit(Inst::LoadFpuConst64 {
+                            rd: rtmp1,
+                            const_data: 0.0,
+                        });
+                    }
+                }
+                if in_bits == 32 {
+                    ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn });
+                    ctx.emit(Inst::FpuCSel32 {
+                        rd: rtmp2,
+                        rn: rtmp1.to_reg(),
+                        rm: rtmp2.to_reg(),
+                        cond: Cond::Ne,
+                    });
+                } else {
+                    ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn });
+                    ctx.emit(Inst::FpuCSel64 {
+                        rd: rtmp2,
+                        rn: rtmp1.to_reg(),
+                        rm: rtmp2.to_reg(),
+                        cond: Cond::Ne,
+                    });
+                }

-            let cvt = match (in_bits, out_bits, out_signed) {
-                (32, 32, false) => FpuToIntOp::F32ToU32,
-                (32, 32, true) => FpuToIntOp::F32ToI32,
-                (32, 64, false) => FpuToIntOp::F32ToU64,
-                (32, 64, true) => FpuToIntOp::F32ToI64,
-                (64, 32, false) => FpuToIntOp::F64ToU32,
-                (64, 32, true) => FpuToIntOp::F64ToI32,
-                (64, 64, false) => FpuToIntOp::F64ToU64,
-                (64, 64, true) => FpuToIntOp::F64ToI64,
-                _ => unreachable!(),
-            };
-            ctx.emit(Inst::FpuToInt {
-                op: cvt,
-                rd,
-                rn: rtmp2.to_reg(),
-            });
+                let cvt = match (in_bits, out_bits, out_signed) {
+                    (32, 32, false) => FpuToIntOp::F32ToU32,
+                    (32, 32, true) => FpuToIntOp::F32ToI32,
+                    (32, 64, false) => FpuToIntOp::F32ToU64,
+                    (32, 64, true) => FpuToIntOp::F32ToI64,
+                    (64, 32, false) => FpuToIntOp::F64ToU32,
+                    (64, 32, true) => FpuToIntOp::F64ToI32,
+                    (64, 64, false) => FpuToIntOp::F64ToU64,
+                    (64, 64, true) => FpuToIntOp::F64ToI64,
+                    _ => unreachable!(),
+                };
+                ctx.emit(Inst::FpuToInt {
+                    op: cvt,
+                    rd,
+                    rn: rtmp2.to_reg(),
+                });
+            }
        }

        Opcode::IaddIfcout => {
@@ -2689,12 +2725,62 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            });
        }

-        Opcode::Snarrow
-        | Opcode::Unarrow
-        | Opcode::SwidenLow
-        | Opcode::SwidenHigh
-        | Opcode::UwidenLow
-        | Opcode::UwidenHigh => unimplemented!(),
+        Opcode::Snarrow | Opcode::Unarrow => {
+            let op = if op == Opcode::Snarrow {
+                VecMiscNarrowOp::Sqxtn
+            } else {
+                VecMiscNarrowOp::Sqxtun
+            };
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+            let ty = ty.unwrap();
+
+            ctx.emit(Inst::VecMiscNarrow {
+                op,
+                rd,
+                rn,
+                size: VectorSize::from_ty(ty),
+                high_half: false,
+            });
+            ctx.emit(Inst::VecMiscNarrow {
+                op,
+                rd,
+                rn: rn2,
+                size: VectorSize::from_ty(ty),
+                high_half: true,
+            });
+        }
+
+        Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => {
+            let lane_type = ty.unwrap().lane_type();
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let (t, high_half) = match (lane_type, op) {
+                (I16, Opcode::SwidenLow) => (VecExtendOp::Sxtl8, false),
+                (I16, Opcode::SwidenHigh) => (VecExtendOp::Sxtl8, true),
+                (I16, Opcode::UwidenLow) => (VecExtendOp::Uxtl8, false),
+                (I16, Opcode::UwidenHigh) => (VecExtendOp::Uxtl8, true),
+                (I32, Opcode::SwidenLow) => (VecExtendOp::Sxtl16, false),
+                (I32, Opcode::SwidenHigh) => (VecExtendOp::Sxtl16, true),
+                (I32, Opcode::UwidenLow) => (VecExtendOp::Uxtl16, false),
+                (I32, Opcode::UwidenHigh) => (VecExtendOp::Uxtl16, true),
+                _ => {
+                    return Err(CodegenError::Unsupported(format!(
+                        "Unsupported SIMD vector lane type: {:?}",
+                        lane_type
+                    )));
+                }
+            };
+
+            ctx.emit(Inst::VecExtend {
+                t,
+                rd,
+                rn,
+                high_half,
+            });
+        }
+
        Opcode::TlsValue => unimplemented!(),
    }