Remove uses of reg_mod from s390x (#5073)

Remove uses of reg_mod from the s390x backend. This required moving away from using r0/r1 as the result registers from a few different pseudo instructions, standardizing instead on r2/r3. That change was necessary as regalloc2 will not correctly allocate registers that aren't listed in the allocatable set, which r0/r1 are not. Co-authored-by: Ulrich Weigand <ulrich.weigand@de.ibm.com> Co-authored-by: Chris Fallin <chris@cfallin.org>
2022-10-21 09:22:16 -07:00
parent 204d4c332c
commit d9753fac2b
19 changed files with 1215 additions and 758 deletions
--- a/cranelift/codegen/src/isa/s390x/inst/mod.rs
+++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs
@@ -68,6 +68,29 @@ fn inst_size_test() {
    assert_eq!(32, std::mem::size_of::<Inst>());
 }

+/// A register pair. Enum so it can be destructured in ISLE.
+#[derive(Clone, Copy, Debug)]
+pub struct RegPair {
+    pub hi: Reg,
+    pub lo: Reg,
+}
+
+/// A writable register pair. Enum so it can be destructured in ISLE.
+#[derive(Clone, Copy, Debug)]
+pub struct WritableRegPair {
+    pub hi: Writable<Reg>,
+    pub lo: Writable<Reg>,
+}
+
+impl WritableRegPair {
+    pub fn to_regpair(&self) -> RegPair {
+        RegPair {
+            hi: self.hi.to_reg(),
+            lo: self.lo.to_reg(),
+        }
+    }
+}
+
 /// Supported instruction sets
 #[allow(non_camel_case_types)]
 #[derive(Debug)]
@@ -342,10 +365,18 @@ impl Inst {

            if let Some(imm) = UImm16Shifted::maybe_from_u64(lo) {
                // 16-bit shifted immediate
-                insts.push(Inst::Insert64UImm16Shifted { rd, imm });
+                insts.push(Inst::Insert64UImm16Shifted {
+                    rd,
+                    ri: rd.to_reg(),
+                    imm,
+                });
            } else if let Some(imm) = UImm32Shifted::maybe_from_u64(lo) {
                // 32-bit shifted immediate
-                insts.push(Inst::Insert64UImm32Shifted { rd, imm });
+                insts.push(Inst::Insert64UImm32Shifted {
+                    rd,
+                    ri: rd.to_reg(),
+                    imm,
+                });
            } else {
                unreachable!();
            }
@@ -508,31 +539,37 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
            collector.reg_reuse_def(rd, 1);
            collector.reg_use(ri);
        }
-        &Inst::SMulWide { rn, rm, .. } => {
+        &Inst::SMulWide { rd, rn, rm } => {
            collector.reg_use(rn);
            collector.reg_use(rm);
-            collector.reg_def(writable_gpr(0));
-            collector.reg_def(writable_gpr(1));
+            // FIXME: The pair is hard-coded as %r2/%r3 because regalloc cannot handle pairs. If
+            // that changes, all the hard-coded uses of %r2/%r3 can be changed.
+            collector.reg_fixed_def(rd.hi, gpr(2));
+            collector.reg_fixed_def(rd.lo, gpr(3));
        }
-        &Inst::UMulWide { rn, .. } => {
+        &Inst::UMulWide { rd, ri, rn } => {
            collector.reg_use(rn);
-            collector.reg_def(writable_gpr(0));
-            collector.reg_mod(writable_gpr(1));
+            collector.reg_fixed_def(rd.hi, gpr(2));
+            collector.reg_fixed_def(rd.lo, gpr(3));
+            collector.reg_fixed_use(ri, gpr(3));
        }
-        &Inst::SDivMod32 { rn, .. } | &Inst::SDivMod64 { rn, .. } => {
+        &Inst::SDivMod32 { rd, ri, rn } | &Inst::SDivMod64 { rd, ri, rn } => {
            collector.reg_use(rn);
-            collector.reg_def(writable_gpr(0));
-            collector.reg_mod(writable_gpr(1));
+            collector.reg_fixed_def(rd.hi, gpr(2));
+            collector.reg_fixed_def(rd.lo, gpr(3));
+            collector.reg_fixed_use(ri, gpr(3));
        }
-        &Inst::UDivMod32 { rn, .. } | &Inst::UDivMod64 { rn, .. } => {
+        &Inst::UDivMod32 { rd, ri, rn } | &Inst::UDivMod64 { rd, ri, rn } => {
            collector.reg_use(rn);
-            collector.reg_mod(writable_gpr(0));
-            collector.reg_mod(writable_gpr(1));
+            collector.reg_fixed_def(rd.hi, gpr(2));
+            collector.reg_fixed_def(rd.lo, gpr(3));
+            collector.reg_fixed_use(ri.hi, gpr(2));
+            collector.reg_fixed_use(ri.lo, gpr(3));
        }
-        &Inst::Flogr { rn, .. } => {
+        &Inst::Flogr { rd, rn } => {
            collector.reg_use(rn);
-            collector.reg_def(writable_gpr(0));
-            collector.reg_def(writable_gpr(1));
+            collector.reg_fixed_def(rd.hi, gpr(2));
+            collector.reg_fixed_def(rd.lo, gpr(3));
        }
        &Inst::ShiftRR {
            rd, rn, shift_reg, ..
@@ -543,8 +580,9 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
                collector.reg_use(shift_reg);
            }
        }
-        &Inst::RxSBG { rd, rn, .. } => {
-            collector.reg_mod(rd);
+        &Inst::RxSBG { rd, ri, rn, .. } => {
+            collector.reg_reuse_def(rd, 1);
+            collector.reg_use(ri);
            collector.reg_use(rn);
        }
        &Inst::RxSBGTest { rd, rn, .. } => {
@@ -590,12 +628,21 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
            memarg_operands(mem, collector);
        }
        &Inst::AtomicCas32 {
-            rd, rn, ref mem, ..
+            rd,
+            ri,
+            rn,
+            ref mem,
+            ..
        }
        | &Inst::AtomicCas64 {
-            rd, rn, ref mem, ..
+            rd,
+            ri,
+            rn,
+            ref mem,
+            ..
        } => {
-            collector.reg_mod(rd);
+            collector.reg_reuse_def(rd, 1);
+            collector.reg_use(ri);
            collector.reg_use(rn);
            memarg_operands(mem, collector);
        }
@@ -681,28 +728,34 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
        | &Inst::Mov64UImm32Shifted { rd, .. } => {
            collector.reg_def(rd);
        }
-        &Inst::CMov32 { rd, rm, .. } | &Inst::CMov64 { rd, rm, .. } => {
-            collector.reg_mod(rd);
+        &Inst::CMov32 { rd, ri, rm, .. } | &Inst::CMov64 { rd, ri, rm, .. } => {
+            collector.reg_reuse_def(rd, 1);
+            collector.reg_use(ri);
            collector.reg_use(rm);
        }
-        &Inst::CMov32SImm16 { rd, .. } | &Inst::CMov64SImm16 { rd, .. } => {
-            collector.reg_mod(rd);
+        &Inst::CMov32SImm16 { rd, ri, .. } | &Inst::CMov64SImm16 { rd, ri, .. } => {
+            collector.reg_reuse_def(rd, 1);
+            collector.reg_use(ri);
        }
-        &Inst::Insert64UImm16Shifted { rd, .. } | &Inst::Insert64UImm32Shifted { rd, .. } => {
-            collector.reg_mod(rd);
+        &Inst::Insert64UImm16Shifted { rd, ri, .. }
+        | &Inst::Insert64UImm32Shifted { rd, ri, .. } => {
+            collector.reg_reuse_def(rd, 1);
+            collector.reg_use(ri);
        }
        &Inst::LoadAR { rd, .. } => {
            collector.reg_def(rd);
        }
-        &Inst::InsertAR { rd, .. } => {
-            collector.reg_mod(rd);
+        &Inst::InsertAR { rd, ri, .. } => {
+            collector.reg_reuse_def(rd, 1);
+            collector.reg_use(ri);
        }
        &Inst::FpuMove32 { rd, rn } | &Inst::FpuMove64 { rd, rn } => {
            collector.reg_def(rd);
            collector.reg_use(rn);
        }
-        &Inst::FpuCMov32 { rd, rm, .. } | &Inst::FpuCMov64 { rd, rm, .. } => {
-            collector.reg_mod(rd);
+        &Inst::FpuCMov32 { rd, ri, rm, .. } | &Inst::FpuCMov64 { rd, ri, rm, .. } => {
+            collector.reg_reuse_def(rd, 1);
+            collector.reg_use(ri);
            collector.reg_use(rm);
        }
        &Inst::FpuRR { rd, rn, .. } => {
@@ -858,8 +911,9 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
            collector.reg_def(rd);
            collector.reg_use(rn);
        }
-        &Inst::VecCMov { rd, rm, .. } => {
-            collector.reg_mod(rd);
+        &Inst::VecCMov { rd, ri, rm, .. } => {
+            collector.reg_reuse_def(rd, 1);
+            collector.reg_use(ri);
            collector.reg_use(rm);
        }
        &Inst::MovToVec128 { rd, rn, rm } => {
@@ -880,8 +934,11 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
        &Inst::VecImmReplicate { rd, .. } => {
            collector.reg_def(rd);
        }
-        &Inst::VecLoadLane { rd, ref mem, .. } => {
-            collector.reg_mod(rd);
+        &Inst::VecLoadLane {
+            rd, ri, ref mem, ..
+        } => {
+            collector.reg_reuse_def(rd, 1);
+            collector.reg_use(ri);
            memarg_operands(mem, collector);
        }
        &Inst::VecLoadLaneUndef { rd, ref mem, .. } => {
@@ -900,14 +957,22 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
            collector.reg_use(rd);
            memarg_operands(mem, collector);
        }
-        &Inst::VecLoadLaneRev { rd, ref mem, .. } => {
-            collector.reg_mod(rd);
+        &Inst::VecLoadLaneRev {
+            rd, ri, ref mem, ..
+        } => {
+            collector.reg_reuse_def(rd, 1);
+            collector.reg_use(ri);
            memarg_operands(mem, collector);
        }
        &Inst::VecInsertLane {
-            rd, rn, lane_reg, ..
+            rd,
+            ri,
+            rn,
+            lane_reg,
+            ..
        } => {
-            collector.reg_mod(rd);
+            collector.reg_reuse_def(rd, 1);
+            collector.reg_use(ri);
            collector.reg_use(rn);
            collector.reg_use(lane_reg);
        }
@@ -925,8 +990,9 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
            collector.reg_use(rn);
            collector.reg_use(lane_reg);
        }
-        &Inst::VecInsertLaneImm { rd, .. } => {
-            collector.reg_def(rd);
+        &Inst::VecInsertLaneImm { rd, ri, .. } => {
+            collector.reg_reuse_def(rd, 1);
+            collector.reg_use(ri);
        }
        &Inst::VecReplicateLane { rd, rn, .. } => {
            collector.reg_def(rd);
@@ -1470,54 +1536,47 @@ impl Inst {
                let rd = pretty_print_reg_mod(rd, ri, allocs);
                format!("{} {}, {}", op, rd, imm.bits)
            }
-            &Inst::SMulWide { rn, rm } => {
+            &Inst::SMulWide { rd, rn, rm } => {
                let op = "mgrk";
                let rn = pretty_print_reg(rn, allocs);
                let rm = pretty_print_reg(rm, allocs);
-                let rd = pretty_print_reg(gpr(0), allocs);
-                let _r1 = allocs.next(gpr(1));
+                let rd = pretty_print_regpair(rd.to_regpair(), allocs);
                format!("{} {}, {}, {}", op, rd, rn, rm)
            }
-            &Inst::UMulWide { rn } => {
+            &Inst::UMulWide { rd, ri, rn } => {
                let op = "mlgr";
                let rn = pretty_print_reg(rn, allocs);
-                let rd = pretty_print_reg(gpr(0), allocs);
-                let _r1 = allocs.next(gpr(1));
+                let rd = pretty_print_regpair_mod_lo(rd, ri, allocs);
                format!("{} {}, {}", op, rd, rn)
            }
-            &Inst::SDivMod32 { rn, .. } => {
+            &Inst::SDivMod32 { rd, ri, rn } => {
                let op = "dsgfr";
                let rn = pretty_print_reg(rn, allocs);
-                let rd = pretty_print_reg(gpr(0), allocs);
-                let _r1 = allocs.next(gpr(1));
+                let rd = pretty_print_regpair_mod_lo(rd, ri, allocs);
                format!("{} {}, {}", op, rd, rn)
            }
-            &Inst::SDivMod64 { rn, .. } => {
+            &Inst::SDivMod64 { rd, ri, rn } => {
                let op = "dsgr";
                let rn = pretty_print_reg(rn, allocs);
-                let rd = pretty_print_reg(gpr(0), allocs);
-                let _r1 = allocs.next(gpr(1));
+                let rd = pretty_print_regpair_mod_lo(rd, ri, allocs);
                format!("{} {}, {}", op, rd, rn)
            }
-            &Inst::UDivMod32 { rn, .. } => {
+            &Inst::UDivMod32 { rd, ri, rn } => {
                let op = "dlr";
                let rn = pretty_print_reg(rn, allocs);
-                let rd = pretty_print_reg(gpr(0), allocs);
-                let _r1 = allocs.next(gpr(1));
+                let rd = pretty_print_regpair_mod(rd, ri, allocs);
                format!("{} {}, {}", op, rd, rn)
            }
-            &Inst::UDivMod64 { rn, .. } => {
+            &Inst::UDivMod64 { rd, ri, rn } => {
                let op = "dlgr";
                let rn = pretty_print_reg(rn, allocs);
-                let rd = pretty_print_reg(gpr(0), allocs);
-                let _r1 = allocs.next(gpr(1));
+                let rd = pretty_print_regpair_mod(rd, ri, allocs);
                format!("{} {}, {}", op, rd, rn)
            }
-            &Inst::Flogr { rn } => {
+            &Inst::Flogr { rd, rn } => {
                let op = "flogr";
                let rn = pretty_print_reg(rn, allocs);
-                let rd = pretty_print_reg(gpr(0), allocs);
-                let _r1 = allocs.next(gpr(1));
+                let rd = pretty_print_regpair(rd.to_regpair(), allocs);
                format!("{} {}, {}", op, rd, rn)
            }
            &Inst::ShiftRR {
@@ -1549,6 +1608,7 @@ impl Inst {
            &Inst::RxSBG {
                op,
                rd,
+                ri,
                rn,
                start_bit,
                end_bit,
@@ -1560,7 +1620,7 @@ impl Inst {
                    RxSBGOp::Or => "rosbg",
                    RxSBGOp::Xor => "rxsbg",
                };
-                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rd = pretty_print_reg_mod(rd, ri, allocs);
                let rn = pretty_print_reg(rn, allocs);
                format!(
                    "{} {}, {}, {}, {}, {}",
@@ -1769,14 +1829,25 @@ impl Inst {
                let mem = mem.pretty_print_default();
                format!("{}{} {}, {}, {}", mem_str, op, rd, rn, mem)
            }
-            &Inst::AtomicCas32 { rd, rn, ref mem } | &Inst::AtomicCas64 { rd, rn, ref mem } => {
+            &Inst::AtomicCas32 {
+                rd,
+                ri,
+                rn,
+                ref mem,
+            }
+            | &Inst::AtomicCas64 {
+                rd,
+                ri,
+                rn,
+                ref mem,
+            } => {
                let (opcode_rs, opcode_rsy) = match self {
                    &Inst::AtomicCas32 { .. } => (Some("cs"), Some("csy")),
                    &Inst::AtomicCas64 { .. } => (None, Some("csg")),
                    _ => unreachable!(),
                };

-                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rd = pretty_print_reg_mod(rd, ri, allocs);
                let rn = pretty_print_reg(rn, allocs);
                let mem = mem.with_allocs(allocs);
                let (mem_str, mem) = mem_finalize_for_show(
@@ -2047,8 +2118,8 @@ impl Inst {
                };
                format!("{} {}, {}", op, rd, imm.bits)
            }
-            &Inst::Insert64UImm16Shifted { rd, ref imm } => {
-                let rd = pretty_print_reg(rd.to_reg(), allocs);
+            &Inst::Insert64UImm16Shifted { rd, ri, ref imm } => {
+                let rd = pretty_print_reg_mod(rd, ri, allocs);
                let op = match imm.shift {
                    0 => "iill",
                    1 => "iilh",
@@ -2058,8 +2129,8 @@ impl Inst {
                };
                format!("{} {}, {}", op, rd, imm.bits)
            }
-            &Inst::Insert64UImm32Shifted { rd, ref imm } => {
-                let rd = pretty_print_reg(rd.to_reg(), allocs);
+            &Inst::Insert64UImm32Shifted { rd, ri, ref imm } => {
+                let rd = pretty_print_reg_mod(rd, ri, allocs);
                let op = match imm.shift {
                    0 => "iilf",
                    1 => "iihf",
@@ -2067,29 +2138,43 @@ impl Inst {
                };
                format!("{} {}, {}", op, rd, imm.bits)
            }
-            &Inst::LoadAR { rd, ar } | &Inst::InsertAR { rd, ar } => {
+            &Inst::LoadAR { rd, ar } => {
                let rd = pretty_print_reg(rd.to_reg(), allocs);
                format!("ear {}, %a{}", rd, ar)
            }
-            &Inst::CMov32 { rd, cond, rm } => {
-                let rd = pretty_print_reg(rd.to_reg(), allocs);
+            &Inst::InsertAR { rd, ri, ar } => {
+                let rd = pretty_print_reg_mod(rd, ri, allocs);
+                format!("ear {}, %a{}", rd, ar)
+            }
+            &Inst::CMov32 { rd, cond, ri, rm } => {
+                let rd = pretty_print_reg_mod(rd, ri, allocs);
                let rm = pretty_print_reg(rm, allocs);
                let cond = cond.pretty_print_default();
                format!("locr{} {}, {}", cond, rd, rm)
            }
-            &Inst::CMov64 { rd, cond, rm } => {
-                let rd = pretty_print_reg(rd.to_reg(), allocs);
+            &Inst::CMov64 { rd, cond, ri, rm } => {
+                let rd = pretty_print_reg_mod(rd, ri, allocs);
                let rm = pretty_print_reg(rm, allocs);
                let cond = cond.pretty_print_default();
                format!("locgr{} {}, {}", cond, rd, rm)
            }
-            &Inst::CMov32SImm16 { rd, cond, ref imm } => {
-                let rd = pretty_print_reg(rd.to_reg(), allocs);
+            &Inst::CMov32SImm16 {
+                rd,
+                cond,
+                ri,
+                ref imm,
+            } => {
+                let rd = pretty_print_reg_mod(rd, ri, allocs);
                let cond = cond.pretty_print_default();
                format!("lochi{} {}, {}", cond, rd, imm)
            }
-            &Inst::CMov64SImm16 { rd, cond, ref imm } => {
-                let rd = pretty_print_reg(rd.to_reg(), allocs);
+            &Inst::CMov64SImm16 {
+                rd,
+                cond,
+                ri,
+                ref imm,
+            } => {
+                let rd = pretty_print_reg_mod(rd, ri, allocs);
                let cond = cond.pretty_print_default();
                format!("locghi{} {}, {}", cond, rd, imm)
            }
@@ -2111,8 +2196,9 @@ impl Inst {
                    format!("vlr {}, {}", rd, rn)
                }
            }
-            &Inst::FpuCMov32 { rd, cond, rm } => {
+            &Inst::FpuCMov32 { rd, cond, ri, rm } => {
                let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
+                let _ri = allocs.next(ri);
                let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
                if rd_fpr.is_some() && rm_fpr.is_some() {
                    let cond = cond.invert().pretty_print_default();
@@ -2122,8 +2208,9 @@ impl Inst {
                    format!("j{} 10 ; vlr {}, {}", cond, rd, rm)
                }
            }
-            &Inst::FpuCMov64 { rd, cond, rm } => {
+            &Inst::FpuCMov64 { rd, cond, ri, rm } => {
                let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
+                let _ri = allocs.next(ri);
                let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
                if rd_fpr.is_some() && rm_fpr.is_some() {
                    let cond = cond.invert().pretty_print_default();
@@ -2753,8 +2840,8 @@ impl Inst {
                let rn = pretty_print_reg(rn, allocs);
                format!("vlr {}, {}", rd, rn)
            }
-            &Inst::VecCMov { rd, cond, rm } => {
-                let rd = pretty_print_reg(rd.to_reg(), allocs);
+            &Inst::VecCMov { rd, cond, ri, rm } => {
+                let rd = pretty_print_reg_mod(rd, ri, allocs);
                let rm = pretty_print_reg(rm, allocs);
                let cond = cond.invert().pretty_print_default();
                format!("j{} 10 ; vlr {}, {}", cond, rd, rm)
@@ -2830,16 +2917,46 @@ impl Inst {
            &Inst::VecLoadLane {
                size,
                rd,
+                ri,
                ref mem,
                lane_imm,
            }
            | &Inst::VecLoadLaneRev {
                size,
                rd,
+                ri,
                ref mem,
                lane_imm,
+            } => {
+                let opcode_vrx = match (self, size) {
+                    (&Inst::VecLoadLane { .. }, 8) => "vleb",
+                    (&Inst::VecLoadLane { .. }, 16) => "vleh",
+                    (&Inst::VecLoadLane { .. }, 32) => "vlef",
+                    (&Inst::VecLoadLane { .. }, 64) => "vleg",
+                    (&Inst::VecLoadLaneRev { .. }, 16) => "vlebrh",
+                    (&Inst::VecLoadLaneRev { .. }, 32) => "vlebrf",
+                    (&Inst::VecLoadLaneRev { .. }, 64) => "vlebrg",
+                    _ => unreachable!(),
+                };
+
+                let (rd, _) = pretty_print_fpr(rd.to_reg(), allocs);
+                let _ri = allocs.next(ri);
+                let mem = mem.with_allocs(allocs);
+                let (mem_str, mem) = mem_finalize_for_show(
+                    &mem,
+                    state,
+                    MemInstType {
+                        have_d12: true,
+                        have_d20: false,
+                        have_pcrel: false,
+                        have_unaligned_pcrel: false,
+                        have_index: true,
+                    },
+                );
+                let mem = mem.pretty_print_default();
+                format!("{}{} {}, {}, {}", mem_str, opcode_vrx, rd, mem, lane_imm)
            }
-            | &Inst::VecLoadLaneUndef {
+            &Inst::VecLoadLaneUndef {
                size,
                rd,
                ref mem,
@@ -2852,13 +2969,6 @@ impl Inst {
                lane_imm,
            } => {
                let (opcode_vrx, opcode_rx, opcode_rxy) = match (self, size) {
-                    (&Inst::VecLoadLane { .. }, 8) => ("vleb", None, None),
-                    (&Inst::VecLoadLane { .. }, 16) => ("vleh", None, None),
-                    (&Inst::VecLoadLane { .. }, 32) => ("vlef", None, None),
-                    (&Inst::VecLoadLane { .. }, 64) => ("vleg", None, None),
-                    (&Inst::VecLoadLaneRev { .. }, 16) => ("vlebrh", None, None),
-                    (&Inst::VecLoadLaneRev { .. }, 32) => ("vlebrf", None, None),
-                    (&Inst::VecLoadLaneRev { .. }, 64) => ("vlebrg", None, None),
                    (&Inst::VecLoadLaneUndef { .. }, 8) => ("vleb", None, None),
                    (&Inst::VecLoadLaneUndef { .. }, 16) => ("vleh", None, None),
                    (&Inst::VecLoadLaneUndef { .. }, 32) => ("vlef", Some("le"), Some("ley")),
@@ -2969,6 +3079,7 @@ impl Inst {
            &Inst::VecInsertLane {
                size,
                rd,
+                ri,
                rn,
                lane_imm,
                lane_reg,
@@ -2980,7 +3091,7 @@ impl Inst {
                    64 => "vlvgg",
                    _ => unreachable!(),
                };
-                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rd = pretty_print_reg_mod(rd, ri, allocs);
                let rn = pretty_print_reg(rn, allocs);
                let lane_reg = if lane_reg != zero_reg() {
                    format!("({})", pretty_print_reg(lane_reg, allocs))
@@ -3048,6 +3159,7 @@ impl Inst {
            &Inst::VecInsertLaneImm {
                size,
                rd,
+                ri,
                imm,
                lane_imm,
            } => {
@@ -3058,7 +3170,7 @@ impl Inst {
                    64 => "vleig",
                    _ => unreachable!(),
                };
-                let rd = pretty_print_reg(rd.to_reg(), allocs);
+                let rd = pretty_print_reg_mod(rd, ri, allocs);
                format!("{} {}, {}, {}", op, rd, imm, lane_imm)
            }
            &Inst::VecReplicateLane {