From ca1b76421ac3b8a37d7e2ddad096f21db959bfbf Mon Sep 17 00:00:00 2001
From: Andrew Brown <andrew.brown@intel.com>
Date: Thu, 1 Oct 2020 12:23:48 -0700
Subject: [PATCH] [machinst x64]: remove duplicate code to insert a lane

---
 cranelift/codegen/src/isa/x64/lower.rs | 114 ++++++++++---------------
 1 file changed, 43 insertions(+), 71 deletions(-)
diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
index aaac19eacd..4fa2000299 100644
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -202,6 +202,48 @@ fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
     }
 }
 
+/// Emit an instruction to insert a value `src` into a lane of `dst`.
+fn emit_insert_lane<C: LowerCtx<I = Inst>>(
+    ctx: &mut C,
+    src: RegMem,
+    dst: Writable<Reg>,
+    lane: u8,
+    ty: Type,
+) {
+    if !ty.is_float() {
+        let (sse_op, is64) = match ty.lane_bits() {
+            8 => (SseOpcode::Pinsrb, false),
+            16 => (SseOpcode::Pinsrw, false),
+            32 => (SseOpcode::Pinsrd, false),
+            64 => (SseOpcode::Pinsrd, true),
+            _ => panic!("Unable to insertlane for lane size: {}", ty.lane_bits()),
+        };
+        ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, is64));
+    } else if ty == types::F32 {
+        let sse_op = SseOpcode::Insertps;
+        // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
+        // shifted into bits 5:6).
+        let lane = 0b00_00_00_00 | lane << 4;
+        ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, false));
+    } else if ty == types::F64 {
+        let sse_op = match lane {
+            // Move the lowest quadword in replacement to vector without changing
+            // the upper bits.
+            0 => SseOpcode::Movsd,
+            // Move the low 64 bits of replacement vector to the high 64 bits of the
+            // vector.
+            1 => SseOpcode::Movlhps,
+            _ => unreachable!(),
+        };
+        // Here we use the `xmm_rm_r` encoding because it correctly tells the register
+        // allocator how we are using `dst`: we are using `dst` as a `mod` whereas other
+        // encoding formats like `xmm_unary_rm_r` treat it as a `def`.
+        ctx.emit(Inst::xmm_rm_r(sse_op, src, dst));
+    } else {
+        panic!("unable to emit insertlane for type: {}", ty)
+    }
+}
+
 /// Emits an int comparison instruction.
 ///
 /// Note: make sure that there are no instructions modifying the flags between a call to this
@@ -2861,38 +2903,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             debug_assert!(lane < ty.lane_count() as u8);
 
             ctx.emit(Inst::gen_move(dst, in_vec, ty));
-            if !src_ty.is_float() {
-                let (sse_op, is64) = match ty.lane_bits() {
-                    8 => (SseOpcode::Pinsrb, false),
-                    16 => (SseOpcode::Pinsrw, false),
-                    32 => (SseOpcode::Pinsrd, false),
-                    64 => (SseOpcode::Pinsrd, true),
-                    _ => panic!("Unable to insertlane for lane size: {}", ty.lane_bits()),
-                };
-                ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, is64));
-            } else if src_ty == types::F32 {
-                let sse_op = SseOpcode::Insertps;
-                // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
-                // shifted into bits 5:6).
-                let lane = 0b00_00_00_00 | lane << 4;
-                ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, false));
-            } else if src_ty == types::F64 {
-                let sse_op = match lane {
-                    // Move the lowest quadword in replacement to vector without changing
-                    // the upper bits.
-                    0 => SseOpcode::Movsd,
-                    // Move the low 64 bits of replacement vector to the high 64 bits of the
-                    // vector.
-                    1 => SseOpcode::Movlhps,
-                    _ => unreachable!(),
-                };
-                // Here we use the `xmm_rm_r` encoding because it correctly tells the register
-                // allocator how we are using `dst`: we are using `dst` as a `mod` whereas other
-                // encoding formats like `xmm_unary_rm_r` treat it as a `def`.
-                ctx.emit(Inst::xmm_rm_r(sse_op, src, dst));
-            } else {
-                panic!("Unable to insertlane for type: {}", ty);
-            }
+            emit_insert_lane(ctx, src, dst, lane, ty.lane_type());
         }
 
         Opcode::Extractlane => {
@@ -2953,45 +2964,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let src = input_to_reg_mem(ctx, inputs[0]);
             let dst = get_output_reg(ctx, outputs[0]);
 
-            fn emit_insert_lane<C: LowerCtx<I = Inst>>(
-                ctx: &mut C,
-                src: RegMem,
-                dst: Writable<Reg>,
-                lane: u8,
-                ty: Type,
-            ) {
-                if !ty.is_float() {
-                    let (sse_op, is64) = match ty.lane_bits() {
-                        8 => (SseOpcode::Pinsrb, false),
-                        16 => (SseOpcode::Pinsrw, false),
-                        32 => (SseOpcode::Pinsrd, false),
-                        64 => (SseOpcode::Pinsrd, true),
-                        _ => panic!("Unable to insertlane for lane size: {}", ty.lane_bits()),
-                    };
-                    ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, is64));
-                } else if ty == types::F32 {
-                    let sse_op = SseOpcode::Insertps;
-                    // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
-                    // shifted into bits 5:6).
-                    let lane = 0b00_00_00_00 | lane << 4;
-                    ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, false));
-                } else if ty == types::F64 {
-                    let sse_op = match lane {
-                        // Move the lowest quadword in replacement to vector without changing
-                        // the upper bits.
-                        0 => SseOpcode::Movsd,
-                        // Move the low 64 bits of replacement vector to the high 64 bits of the
-                        // vector.
-                        1 => SseOpcode::Movlhps,
-                        _ => unreachable!(),
-                    };
-                    // Here we use the `xmm_rm_r` encoding because it correctly tells the register
-                    // allocator how we are using `dst`: we are using `dst` as a `mod` whereas other
-                    // encoding formats like `xmm_unary_rm_r` treat it as a `def`.
-                    ctx.emit(Inst::xmm_rm_r(sse_op, src, dst));
-                }
-            };
-
             // We know that splat will overwrite all of the lanes of `dst` but it takes several
             // instructions to do so. Because of the multiple instructions, there is no good way to
             // declare `dst` a `def` except with the following pseudo-instruction.