riscv64: Don't reuse registers when loading constants (#5376)

Rework the constant loading functions in the riscv64 backend to generate fresh temporaries instead of reusing the destination register.
2022-12-05 16:51:52 -08:00
parent 28cfa57533
commit 7d28d586da
12 changed files with 164 additions and 136 deletions
--- a/cranelift/codegen/src/isa/riscv64/abi.rs
+++ b/cranelift/codegen/src/isa/riscv64/abi.rs
@@ -256,6 +256,7 @@ impl ABIMachineSpec for Riscv64MachineDeps {
            insts.extend(Inst::load_constant_u32(
                writable_spilltmp_reg2(),
                imm as u64,
+                &mut |_| writable_spilltmp_reg2(),
            ));
            insts.push(Inst::AluRRR {
                alu_op: AluOPRRR::Add,
@@ -361,7 +362,11 @@ impl ABIMachineSpec for Riscv64MachineDeps {
    }

    fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
-        insts.extend(Inst::load_constant_u32(writable_a0(), frame_size as u64));
+        insts.extend(Inst::load_constant_u32(
+            writable_a0(),
+            frame_size as u64,
+            &mut |_| writable_a0(),
+        ));
        insts.push(Inst::Call {
            info: Box::new(CallInfo {
                dest: ExternalName::LibCall(LibCall::Probestack),
@@ -537,7 +542,7 @@ impl ABIMachineSpec for Riscv64MachineDeps {
        let arg1 = Writable::from_reg(x_reg(11));
        let arg2 = Writable::from_reg(x_reg(12));
        let tmp = alloc_tmp(Self::word_type());
-        insts.extend(Inst::load_constant_u64(tmp, size as u64).into_iter());
+        insts.extend(Inst::load_constant_u64(tmp, size as u64, &mut alloc_tmp).into_iter());
        insts.push(Inst::Call {
            info: Box::new(CallInfo {
                dest: ExternalName::LibCall(LibCall::Memcpy),
--- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs
@@ -140,7 +140,7 @@ impl MachInstEmitState<Inst> for EmitState {
 impl Inst {
    /// construct a "imm - rs".
    pub(crate) fn construct_imm_sub_rs(rd: Writable<Reg>, imm: u64, rs: Reg) -> SmallInstVec<Inst> {
-        let mut insts = Inst::load_constant_u64(rd, imm);
+        let mut insts = Inst::load_constant_u64(rd, imm, &mut |_| rd);
        insts.push(Inst::AluRRR {
            alu_op: AluOPRRR::Sub,
            rd,
@@ -930,7 +930,7 @@ impl MachInstEmit for Inst {
                    .emit(&[], sink, emit_info, state);
                } else {
                    let tmp = writable_spilltmp_reg();
-                    let mut insts = Inst::load_constant_u64(tmp, amount as u64);
+                    let mut insts = Inst::load_constant_u64(tmp, amount as u64, &mut |_| tmp);
                    insts.push(Inst::AluRRR {
                        alu_op: AluOPRRR::Add,
                        rd: writable_stack_reg(),
@@ -1111,9 +1111,11 @@ impl MachInstEmit for Inst {
            } => {
                let index = allocs.next(index);
                // load
-                Inst::load_constant_u32(writable_spilltmp_reg(), targets_len as u64)
-                    .iter()
-                    .for_each(|i| i.emit(&[], sink, emit_info, state));
+                Inst::load_constant_u32(writable_spilltmp_reg(), targets_len as u64, &mut |_| {
+                    writable_spilltmp_reg()
+                })
+                .iter()
+                .for_each(|i| i.emit(&[], sink, emit_info, state));
                Inst::CondBr {
                    taken: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 3),
                    not_taken: BranchTarget::zero(),
@@ -1254,7 +1256,7 @@ impl MachInstEmit for Inst {
                if let Some(offset) = Imm12::maybe_from_u64(offset as u64) {
                    Inst::AluRRImm12 {
                        alu_op: AluOPRRI::Addi,
-                        rd: rd,
+                        rd,
                        rs: base,
                        imm12: offset,
                    }
@@ -1835,17 +1837,13 @@ impl MachInstEmit for Inst {
                    let f32_bounds = f32_cvt_to_int_bounds(is_signed, out_type.bits() as u8);
                    let f64_bounds = f64_cvt_to_int_bounds(is_signed, out_type.bits() as u8);
                    if in_type == F32 {
-                        Inst::load_fp_constant32(
-                            tmp,
-                            f32_bits(f32_bounds.0),
-                            writable_spilltmp_reg(),
-                        )
+                        Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.0), |_| {
+                            writable_spilltmp_reg()
+                        })
                    } else {
-                        Inst::load_fp_constant64(
-                            tmp,
-                            f64_bits(f64_bounds.0),
-                            writable_spilltmp_reg(),
-                        )
+                        Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.0), |_| {
+                            writable_spilltmp_reg()
+                        })
                    }
                    .iter()
                    .for_each(|i| i.emit(&[], sink, emit_info, state));
@@ -1859,17 +1857,13 @@ impl MachInstEmit for Inst {
                    }
                    .emit(&[], sink, emit_info, state);
                    if in_type == F32 {
-                        Inst::load_fp_constant32(
-                            tmp,
-                            f32_bits(f32_bounds.1),
-                            writable_spilltmp_reg(),
-                        )
+                        Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.1), |_| {
+                            writable_spilltmp_reg()
+                        })
                    } else {
-                        Inst::load_fp_constant64(
-                            tmp,
-                            f64_bits(f64_bounds.1),
-                            writable_spilltmp_reg(),
-                        )
+                        Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.1), |_| {
+                            writable_spilltmp_reg()
+                        })
                    }
                    .iter()
                    .for_each(|i| i.emit(&[], sink, emit_info, state));
@@ -2160,17 +2154,13 @@ impl MachInstEmit for Inst {
                }
                // load max value need to round.
                if ty == F32 {
-                    Inst::load_fp_constant32(
-                        f_tmp,
-                        max_value_need_round(ty) as u32,
-                        writable_spilltmp_reg(),
-                    )
+                    Inst::load_fp_constant32(f_tmp, max_value_need_round(ty) as u32, &mut |_| {
+                        writable_spilltmp_reg()
+                    })
                } else {
-                    Inst::load_fp_constant64(
-                        f_tmp,
-                        max_value_need_round(ty),
-                        writable_spilltmp_reg(),
-                    )
+                    Inst::load_fp_constant64(f_tmp, max_value_need_round(ty), &mut |_| {
+                        writable_spilltmp_reg()
+                    })
                }
                .into_iter()
                .for_each(|i| i.emit(&[], sink, emit_info, state));
@@ -2843,10 +2833,14 @@ impl MachInstEmit for Inst {
                tmp: guard_size_tmp,
            } => {
                let step = writable_spilltmp_reg();
-                Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
-                    .iter()
-                    .for_each(|i| i.emit(&[], sink, emit_info, state));
-                Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
+                Inst::load_constant_u64(
+                    step,
+                    (guard_size as u64) * (probe_count as u64),
+                    &mut |_| step,
+                )
+                .iter()
+                .for_each(|i| i.emit(&[], sink, emit_info, state));
+                Inst::load_constant_u64(guard_size_tmp, guard_size as u64, &mut |_| guard_size_tmp)
                    .iter()
                    .for_each(|i| i.emit(&[], sink, emit_info, state));

--- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
@@ -10,7 +10,6 @@ use crate::ir::types::{F32, F64, FFLAGS, I128, I16, I32, I64, I8, IFLAGS, R32, R

 pub use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel};
 use crate::isa::CallConv;
-use crate::machinst::isle::WritableReg;
 use crate::machinst::*;
 use crate::{settings, CodegenError, CodegenResult};

@@ -194,43 +193,57 @@ impl Inst {
    pub(crate) fn load_imm12(rd: Writable<Reg>, imm: Imm12) -> Inst {
        Inst::AluRRImm12 {
            alu_op: AluOPRRI::Addi,
-            rd: rd,
+            rd,
            rs: zero_reg(),
            imm12: imm,
        }
    }

    /// Immediates can be loaded using lui and addi instructions.
-    fn load_const_imm(rd: Writable<Reg>, value: u64) -> Option<SmallInstVec<Inst>> {
+    fn load_const_imm<F: FnMut(Type) -> Writable<Reg>>(
+        rd: Writable<Reg>,
+        value: u64,
+        alloc_tmp: &mut F,
+    ) -> Option<SmallInstVec<Inst>> {
        Inst::generate_imm(value, |imm20, imm12| {
            let mut insts = SmallVec::new();
-            imm20.map(|x| insts.push(Inst::Lui { rd, imm: x }));
-            imm12.map(|x| {
-                let imm20_is_none = imm20.is_none();
-                let rs = if imm20_is_none {
-                    zero_reg()
-                } else {
-                    rd.to_reg()
-                };
+
+            let rs = if let Some(imm) = imm20 {
+                let rd = if imm12.is_some() { alloc_tmp(I64) } else { rd };
+                insts.push(Inst::Lui { rd, imm });
+                rd.to_reg()
+            } else {
+                zero_reg()
+            };
+
+            if let Some(imm12) = imm12 {
                insts.push(Inst::AluRRImm12 {
                    alu_op: AluOPRRI::Addi,
                    rd,
                    rs,
-                    imm12: x,
+                    imm12,
                })
-            });
+            }

            insts
        })
    }

-    pub(crate) fn load_constant_u32(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
-        let insts = Inst::load_const_imm(rd, value);
+    pub(crate) fn load_constant_u32<F: FnMut(Type) -> Writable<Reg>>(
+        rd: Writable<Reg>,
+        value: u64,
+        alloc_tmp: &mut F,
+    ) -> SmallInstVec<Inst> {
+        let insts = Inst::load_const_imm(rd, value, alloc_tmp);
        insts.unwrap_or(LoadConstant::U32(value as u32).load_constant(rd))
    }

-    pub fn load_constant_u64(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
-        let insts = Inst::load_const_imm(rd, value);
+    pub fn load_constant_u64<F: FnMut(Type) -> Writable<Reg>>(
+        rd: Writable<Reg>,
+        value: u64,
+        alloc_tmp: &mut F,
+    ) -> SmallInstVec<Inst> {
+        let insts = Inst::load_const_imm(rd, value, alloc_tmp);
        insts.unwrap_or(LoadConstant::U64(value).load_constant(rd))
    }

@@ -255,13 +268,18 @@ impl Inst {
    }

    /// Create instructions that load a 32-bit floating-point constant.
-    pub fn load_fp_constant32(
+    pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
        rd: Writable<Reg>,
        const_data: u32,
-        tmp: Writable<Reg>,
+        mut alloc_tmp: F,
    ) -> SmallVec<[Inst; 4]> {
        let mut insts = SmallVec::new();
-        insts.extend(Self::load_constant_u32(tmp, const_data as u64));
+        let tmp = alloc_tmp(I64);
+        insts.extend(Self::load_constant_u32(
+            tmp,
+            const_data as u64,
+            &mut alloc_tmp,
+        ));
        insts.push(Inst::FpuRR {
            frm: None,
            alu_op: FpuOPRR::move_x_to_f_op(F32),
@@ -272,13 +290,14 @@ impl Inst {
    }

    /// Create instructions that load a 64-bit floating-point constant.
-    pub fn load_fp_constant64(
+    pub fn load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>(
        rd: Writable<Reg>,
        const_data: u64,
-        tmp: WritableReg,
+        mut alloc_tmp: F,
    ) -> SmallVec<[Inst; 4]> {
        let mut insts = SmallInstVec::new();
-        insts.extend(Self::load_constant_u64(tmp, const_data));
+        let tmp = alloc_tmp(I64);
+        insts.extend(Self::load_constant_u64(tmp, const_data, &mut alloc_tmp));
        insts.push(Inst::FpuRR {
            frm: None,
            alu_op: FpuOPRR::move_x_to_f_op(F64),
@@ -699,22 +718,31 @@ impl MachInst for Inst {
        mut alloc_tmp: F,
    ) -> SmallVec<[Inst; 4]> {
        if (ty.bits() <= 64 && ty.is_int()) || ty == R32 || ty == R64 {
-            return Inst::load_constant_u64(to_regs.only_reg().unwrap(), value as u64);
+            return Inst::load_constant_u64(
+                to_regs.only_reg().unwrap(),
+                value as u64,
+                &mut alloc_tmp,
+            );
        };
        match ty {
            F32 => {
-                Inst::load_fp_constant32(to_regs.only_reg().unwrap(), value as u32, alloc_tmp(I64))
+                Inst::load_fp_constant32(to_regs.only_reg().unwrap(), value as u32, &mut alloc_tmp)
            }
            F64 => {
-                Inst::load_fp_constant64(to_regs.only_reg().unwrap(), value as u64, alloc_tmp(I64))
+                Inst::load_fp_constant64(to_regs.only_reg().unwrap(), value as u64, &mut alloc_tmp)
            }
            I128 => {
                let mut insts = SmallInstVec::new();
                insts.extend(Inst::load_constant_u64(
                    to_regs.regs()[0],
                    (value >> 64) as u64,
+                    &mut alloc_tmp,
+                ));
+                insts.extend(Inst::load_constant_u64(
+                    to_regs.regs()[1],
+                    value as u64,
+                    &mut alloc_tmp,
                ));
-                insts.extend(Inst::load_constant_u64(to_regs.regs()[1], value as u64));
                return insts;
            }
            _ => unreachable!("vector type not implemented now."),
--- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs
@@ -198,7 +198,8 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>

    fn imm(&mut self, ty: Type, val: u64) -> Reg {
        let tmp = self.temp_writable_reg(ty);
-        self.emit_list(&MInst::load_constant_u64(tmp, val));
+        let insts = &MInst::load_constant_u64(tmp, val, &mut |ty| self.temp_writable_reg(ty));
+        self.emit_list(insts);
        tmp.to_reg()
    }
    #[inline]
@@ -249,7 +250,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
    }
    fn load_u64_constant(&mut self, val: u64) -> Reg {
        let rd = self.temp_writable_reg(I64);
-        MInst::load_constant_u64(rd, val)
+        MInst::load_constant_u64(rd, val, &mut |ty| self.temp_writable_reg(ty))
            .iter()
            .for_each(|i| self.emit(i));
        rd.to_reg()
@@ -439,7 +440,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
        self.emit(&MInst::BrTableCheck {
            index,
            targets_len: targets.len() as i32,
-            default_: default_,
+            default_,
        });
        self.emit(&MInst::BrTable {
            index,
--- a/cranelift/codegen/src/isa/riscv64/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/mod.rs
@@ -244,13 +244,13 @@ mod test {
        // on it to update:
        // > riscv64-linux-gnu-objdump -b binary -D <file> -m riscv
        //
-        // 0:   000013b7                lui     t2,0x1
-        // 4:   23438393                addi    t2,t2,564 # 0x1234
-        // 8:   0075053b                .4byte  0x75053b
+        // 0:   000015b7                lui     a1,0x1
+        // 4:   23458593                addi    a1,a1,564 # 0x1234
+        // 8:   00b5053b                .4byte  0xb5053b
        // c:   00008067                ret

        let golden = vec![
-            183, 19, 0, 0, 147, 131, 67, 35, 59, 5, 117, 0, 103, 128, 0, 0,
+            183, 21, 0, 0, 147, 133, 69, 35, 59, 5, 181, 0, 103, 128, 0, 0,
        ];
        assert_eq!(code, &golden[..]);
    }
--- a/cranelift/filetests/filetests/isa/riscv64/amodes.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/amodes.clif
@@ -93,12 +93,12 @@ block0(v0: i64, v1: i64, v2: i64):
 }

 ; block0:
-;   add a5,a0,a1
-;   add a5,a5,a2
-;   lui a4,1
-;   addi a4,a4,4
-;   add a7,a5,a4
-;   lw a0,0(a7)
+;   add a6,a0,a1
+;   add a6,a6,a2
+;   lui a5,1
+;   addi a5,a5,4
+;   add t3,a6,a5
+;   lw a0,0(t3)
 ;   ret

 function %f10() -> i32 {
@@ -151,8 +151,8 @@ block0(v0: i64):
 ; block0:
 ;   lui a1,244141
 ;   addi a1,a1,2560
-;   add a3,a0,a1
-;   lw a0,0(a3)
+;   add a4,a0,a1
+;   lw a0,0(a4)
 ;   ret

 function %f14(i32) -> i32 {
@@ -192,10 +192,10 @@ block0(v0: i64, v1: i64, v2: i64):
 }

 ; block0:
-;   lui a2,1048575
-;   addi a2,a2,4094
-;   uext.w a5,a2
-;   lh a0,0(a5)
+;   lui a3,1048575
+;   addi a3,a3,4094
+;   uext.w a6,a3
+;   lh a0,0(a6)
 ;   ret

 function %f19(i64, i64, i64) -> i32 {
@@ -207,10 +207,10 @@ block0(v0: i64, v1: i64, v2: i64):
 }

 ; block0:
-;   lui a2,1
-;   addi a2,a2,2
-;   uext.w a5,a2
-;   lh a0,0(a5)
+;   lui a3,1
+;   addi a3,a3,2
+;   uext.w a6,a3
+;   lh a0,0(a6)
 ;   ret

 function %f20(i64, i64, i64) -> i32 {
@@ -222,10 +222,10 @@ block0(v0: i64, v1: i64, v2: i64):
 }

 ; block0:
-;   lui a2,1048575
-;   addi a2,a2,4094
-;   sext.w a5,a2
-;   lh a0,0(a5)
+;   lui a3,1048575
+;   addi a3,a3,4094
+;   sext.w a6,a3
+;   lh a0,0(a6)
 ;   ret

 function %f21(i64, i64, i64) -> i32 {
@@ -237,10 +237,10 @@ block0(v0: i64, v1: i64, v2: i64):
 }

 ; block0:
-;   lui a2,1
-;   addi a2,a2,2
-;   sext.w a5,a2
-;   lh a0,0(a5)
+;   lui a3,1
+;   addi a3,a3,2
+;   sext.w a6,a3
+;   lh a0,0(a6)
 ;   ret

 function %i128(i64) -> i128 {
--- a/cranelift/filetests/filetests/isa/riscv64/atomic_store.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/atomic_store.clif
@@ -33,9 +33,9 @@ block0(v0: i64):
 }

 ; block0:
-;   lui t2,3
-;   addi t2,t2,57
-;   atomic_store.i64 t2,(a0)
+;   lui a1,3
+;   addi a1,a1,57
+;   atomic_store.i64 a1,(a0)
 ;   ret

 function %atomic_store_i32(i32, i64) {
@@ -69,8 +69,8 @@ block0(v0: i64):
 }

 ; block0:
-;   lui t2,3
-;   addi t2,t2,57
-;   atomic_store.i32 t2,(a0)
+;   lui a1,3
+;   addi a1,a1,57
+;   atomic_store.i32 a1,(a0)
 ;   ret

--- a/cranelift/filetests/filetests/isa/riscv64/condbr.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/condbr.clif
@@ -452,10 +452,10 @@ block1:
 }

 ; block0:
-;   lui t2,16
-;   addi t2,t2,4095
-;   and a2,a0,t2
-;   beq a2,zero,taken(label1),not_taken(label2)
+;   lui a1,16
+;   addi a1,a1,4095
+;   and a3,a0,a1
+;   beq a3,zero,taken(label1),not_taken(label2)
 ; block1:
 ;   j label3
 ; block2:
@@ -474,10 +474,10 @@ block1:
 }

 ; block0:
-;   lui t2,16
-;   addi t2,t2,4095
-;   and a2,a0,t2
-;   bne a2,zero,taken(label1),not_taken(label2)
+;   lui a1,16
+;   addi a1,a1,4095
+;   and a3,a0,a1
+;   bne a3,zero,taken(label1),not_taken(label2)
 ; block1:
 ;   j label3
 ; block2:
--- a/cranelift/filetests/filetests/isa/riscv64/constants.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/constants.clif
@@ -39,8 +39,8 @@ block0:
 }

 ; block0:
-;   lui a0,16
-;   addi a0,a0,4095
+;   lui t1,16
+;   addi a0,t1,4095
 ;   ret

 function %f() -> i64 {
--- a/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif
@@ -74,13 +74,13 @@ block0(v0: i64, v1: i32):
 }

 ; block0:
-;   uext.w a6,a1
-;   add a7,a0,a6
-;   addi a7,a7,16
-;   lui a4,16
-;   addi a4,a4,4072
-;   ugt t3,a6,a4##ty=i64
-;   li t4,0
-;   selectif_spectre_guard a0,t4,a7##test=t3
+;   uext.w a7,a1
+;   add t3,a0,a7
+;   addi t3,t3,16
+;   lui a5,16
+;   addi a5,a5,4072
+;   ugt t4,a7,a5##ty=i64
+;   li t0,0
+;   selectif_spectre_guard a0,t0,t3##test=t4
 ;   ret

--- a/cranelift/filetests/filetests/isa/riscv64/i128-bmask.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/i128-bmask.clif
@@ -95,11 +95,11 @@ block0(v0: i16):
 }

 ; block0:
-;   lui t2,16
-;   addi t2,t2,4095
-;   and a2,a0,t2
-;   li a4,-1
-;   select_reg a1,zero,a4##condition=(zero eq a2)
+;   lui a1,16
+;   addi a1,a1,4095
+;   and a3,a0,a1
+;   li a5,-1
+;   select_reg a1,zero,a5##condition=(zero eq a3)
 ;   mv a0,a1
 ;   ret

--- a/cranelift/filetests/filetests/isa/riscv64/iconst-icmp-small.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/iconst-icmp-small.clif
@@ -11,12 +11,12 @@ block0:
 }

 ; block0:
-;   lui t0,14
-;   addi t0,t0,3532
-;   lui a0,14
-;   addi a0,a0,3532
-;   uext.h a3,t0
-;   uext.h a5,a0
-;   ne a0,a3,a5##ty=i16
+;   lui t1,14
+;   addi t1,t1,3532
+;   lui a2,14
+;   addi a2,a2,3532
+;   uext.h a5,t1
+;   uext.h a7,a2
+;   ne a0,a5,a7##ty=i16
 ;   ret