riscv64: Don't reuse registers when loading constants (#5376)

Rework the constant loading functions in the riscv64 backend to generate fresh temporaries instead of reusing the destination register.
This commit is contained in:
Trevor Elliott
2022-12-05 16:51:52 -08:00
committed by GitHub
parent 28cfa57533
commit 7d28d586da
12 changed files with 164 additions and 136 deletions

View File

@@ -256,6 +256,7 @@ impl ABIMachineSpec for Riscv64MachineDeps {
insts.extend(Inst::load_constant_u32(
writable_spilltmp_reg2(),
imm as u64,
&mut |_| writable_spilltmp_reg2(),
));
insts.push(Inst::AluRRR {
alu_op: AluOPRRR::Add,
@@ -361,7 +362,11 @@ impl ABIMachineSpec for Riscv64MachineDeps {
}
fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
insts.extend(Inst::load_constant_u32(writable_a0(), frame_size as u64));
insts.extend(Inst::load_constant_u32(
writable_a0(),
frame_size as u64,
&mut |_| writable_a0(),
));
insts.push(Inst::Call {
info: Box::new(CallInfo {
dest: ExternalName::LibCall(LibCall::Probestack),
@@ -537,7 +542,7 @@ impl ABIMachineSpec for Riscv64MachineDeps {
let arg1 = Writable::from_reg(x_reg(11));
let arg2 = Writable::from_reg(x_reg(12));
let tmp = alloc_tmp(Self::word_type());
insts.extend(Inst::load_constant_u64(tmp, size as u64).into_iter());
insts.extend(Inst::load_constant_u64(tmp, size as u64, &mut alloc_tmp).into_iter());
insts.push(Inst::Call {
info: Box::new(CallInfo {
dest: ExternalName::LibCall(LibCall::Memcpy),

View File

@@ -140,7 +140,7 @@ impl MachInstEmitState<Inst> for EmitState {
impl Inst {
/// construct a "imm - rs".
pub(crate) fn construct_imm_sub_rs(rd: Writable<Reg>, imm: u64, rs: Reg) -> SmallInstVec<Inst> {
let mut insts = Inst::load_constant_u64(rd, imm);
let mut insts = Inst::load_constant_u64(rd, imm, &mut |_| rd);
insts.push(Inst::AluRRR {
alu_op: AluOPRRR::Sub,
rd,
@@ -930,7 +930,7 @@ impl MachInstEmit for Inst {
.emit(&[], sink, emit_info, state);
} else {
let tmp = writable_spilltmp_reg();
let mut insts = Inst::load_constant_u64(tmp, amount as u64);
let mut insts = Inst::load_constant_u64(tmp, amount as u64, &mut |_| tmp);
insts.push(Inst::AluRRR {
alu_op: AluOPRRR::Add,
rd: writable_stack_reg(),
@@ -1111,9 +1111,11 @@ impl MachInstEmit for Inst {
} => {
let index = allocs.next(index);
// load
Inst::load_constant_u32(writable_spilltmp_reg(), targets_len as u64)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
Inst::load_constant_u32(writable_spilltmp_reg(), targets_len as u64, &mut |_| {
writable_spilltmp_reg()
})
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
Inst::CondBr {
taken: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 3),
not_taken: BranchTarget::zero(),
@@ -1254,7 +1256,7 @@ impl MachInstEmit for Inst {
if let Some(offset) = Imm12::maybe_from_u64(offset as u64) {
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: rd,
rd,
rs: base,
imm12: offset,
}
@@ -1835,17 +1837,13 @@ impl MachInstEmit for Inst {
let f32_bounds = f32_cvt_to_int_bounds(is_signed, out_type.bits() as u8);
let f64_bounds = f64_cvt_to_int_bounds(is_signed, out_type.bits() as u8);
if in_type == F32 {
Inst::load_fp_constant32(
tmp,
f32_bits(f32_bounds.0),
writable_spilltmp_reg(),
)
Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.0), |_| {
writable_spilltmp_reg()
})
} else {
Inst::load_fp_constant64(
tmp,
f64_bits(f64_bounds.0),
writable_spilltmp_reg(),
)
Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.0), |_| {
writable_spilltmp_reg()
})
}
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
@@ -1859,17 +1857,13 @@ impl MachInstEmit for Inst {
}
.emit(&[], sink, emit_info, state);
if in_type == F32 {
Inst::load_fp_constant32(
tmp,
f32_bits(f32_bounds.1),
writable_spilltmp_reg(),
)
Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.1), |_| {
writable_spilltmp_reg()
})
} else {
Inst::load_fp_constant64(
tmp,
f64_bits(f64_bounds.1),
writable_spilltmp_reg(),
)
Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.1), |_| {
writable_spilltmp_reg()
})
}
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
@@ -2160,17 +2154,13 @@ impl MachInstEmit for Inst {
}
// load max value need to round.
if ty == F32 {
Inst::load_fp_constant32(
f_tmp,
max_value_need_round(ty) as u32,
writable_spilltmp_reg(),
)
Inst::load_fp_constant32(f_tmp, max_value_need_round(ty) as u32, &mut |_| {
writable_spilltmp_reg()
})
} else {
Inst::load_fp_constant64(
f_tmp,
max_value_need_round(ty),
writable_spilltmp_reg(),
)
Inst::load_fp_constant64(f_tmp, max_value_need_round(ty), &mut |_| {
writable_spilltmp_reg()
})
}
.into_iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
@@ -2843,10 +2833,14 @@ impl MachInstEmit for Inst {
tmp: guard_size_tmp,
} => {
let step = writable_spilltmp_reg();
Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
Inst::load_constant_u64(
step,
(guard_size as u64) * (probe_count as u64),
&mut |_| step,
)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
Inst::load_constant_u64(guard_size_tmp, guard_size as u64, &mut |_| guard_size_tmp)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));

View File

@@ -10,7 +10,6 @@ use crate::ir::types::{F32, F64, FFLAGS, I128, I16, I32, I64, I8, IFLAGS, R32, R
pub use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel};
use crate::isa::CallConv;
use crate::machinst::isle::WritableReg;
use crate::machinst::*;
use crate::{settings, CodegenError, CodegenResult};
@@ -194,43 +193,57 @@ impl Inst {
pub(crate) fn load_imm12(rd: Writable<Reg>, imm: Imm12) -> Inst {
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: rd,
rd,
rs: zero_reg(),
imm12: imm,
}
}
/// Immediates can be loaded using lui and addi instructions.
fn load_const_imm(rd: Writable<Reg>, value: u64) -> Option<SmallInstVec<Inst>> {
fn load_const_imm<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
value: u64,
alloc_tmp: &mut F,
) -> Option<SmallInstVec<Inst>> {
Inst::generate_imm(value, |imm20, imm12| {
let mut insts = SmallVec::new();
imm20.map(|x| insts.push(Inst::Lui { rd, imm: x }));
imm12.map(|x| {
let imm20_is_none = imm20.is_none();
let rs = if imm20_is_none {
zero_reg()
} else {
rd.to_reg()
};
let rs = if let Some(imm) = imm20 {
let rd = if imm12.is_some() { alloc_tmp(I64) } else { rd };
insts.push(Inst::Lui { rd, imm });
rd.to_reg()
} else {
zero_reg()
};
if let Some(imm12) = imm12 {
insts.push(Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd,
rs,
imm12: x,
imm12,
})
});
}
insts
})
}
pub(crate) fn load_constant_u32(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
let insts = Inst::load_const_imm(rd, value);
pub(crate) fn load_constant_u32<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
value: u64,
alloc_tmp: &mut F,
) -> SmallInstVec<Inst> {
let insts = Inst::load_const_imm(rd, value, alloc_tmp);
insts.unwrap_or(LoadConstant::U32(value as u32).load_constant(rd))
}
pub fn load_constant_u64(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
let insts = Inst::load_const_imm(rd, value);
pub fn load_constant_u64<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
value: u64,
alloc_tmp: &mut F,
) -> SmallInstVec<Inst> {
let insts = Inst::load_const_imm(rd, value, alloc_tmp);
insts.unwrap_or(LoadConstant::U64(value).load_constant(rd))
}
@@ -255,13 +268,18 @@ impl Inst {
}
/// Create instructions that load a 32-bit floating-point constant.
pub fn load_fp_constant32(
pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
const_data: u32,
tmp: Writable<Reg>,
mut alloc_tmp: F,
) -> SmallVec<[Inst; 4]> {
let mut insts = SmallVec::new();
insts.extend(Self::load_constant_u32(tmp, const_data as u64));
let tmp = alloc_tmp(I64);
insts.extend(Self::load_constant_u32(
tmp,
const_data as u64,
&mut alloc_tmp,
));
insts.push(Inst::FpuRR {
frm: None,
alu_op: FpuOPRR::move_x_to_f_op(F32),
@@ -272,13 +290,14 @@ impl Inst {
}
/// Create instructions that load a 64-bit floating-point constant.
pub fn load_fp_constant64(
pub fn load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
const_data: u64,
tmp: WritableReg,
mut alloc_tmp: F,
) -> SmallVec<[Inst; 4]> {
let mut insts = SmallInstVec::new();
insts.extend(Self::load_constant_u64(tmp, const_data));
let tmp = alloc_tmp(I64);
insts.extend(Self::load_constant_u64(tmp, const_data, &mut alloc_tmp));
insts.push(Inst::FpuRR {
frm: None,
alu_op: FpuOPRR::move_x_to_f_op(F64),
@@ -699,22 +718,31 @@ impl MachInst for Inst {
mut alloc_tmp: F,
) -> SmallVec<[Inst; 4]> {
if (ty.bits() <= 64 && ty.is_int()) || ty == R32 || ty == R64 {
return Inst::load_constant_u64(to_regs.only_reg().unwrap(), value as u64);
return Inst::load_constant_u64(
to_regs.only_reg().unwrap(),
value as u64,
&mut alloc_tmp,
);
};
match ty {
F32 => {
Inst::load_fp_constant32(to_regs.only_reg().unwrap(), value as u32, alloc_tmp(I64))
Inst::load_fp_constant32(to_regs.only_reg().unwrap(), value as u32, &mut alloc_tmp)
}
F64 => {
Inst::load_fp_constant64(to_regs.only_reg().unwrap(), value as u64, alloc_tmp(I64))
Inst::load_fp_constant64(to_regs.only_reg().unwrap(), value as u64, &mut alloc_tmp)
}
I128 => {
let mut insts = SmallInstVec::new();
insts.extend(Inst::load_constant_u64(
to_regs.regs()[0],
(value >> 64) as u64,
&mut alloc_tmp,
));
insts.extend(Inst::load_constant_u64(
to_regs.regs()[1],
value as u64,
&mut alloc_tmp,
));
insts.extend(Inst::load_constant_u64(to_regs.regs()[1], value as u64));
return insts;
}
_ => unreachable!("vector type not implemented now."),

View File

@@ -198,7 +198,8 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
fn imm(&mut self, ty: Type, val: u64) -> Reg {
let tmp = self.temp_writable_reg(ty);
self.emit_list(&MInst::load_constant_u64(tmp, val));
let insts = &MInst::load_constant_u64(tmp, val, &mut |ty| self.temp_writable_reg(ty));
self.emit_list(insts);
tmp.to_reg()
}
#[inline]
@@ -249,7 +250,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
}
fn load_u64_constant(&mut self, val: u64) -> Reg {
let rd = self.temp_writable_reg(I64);
MInst::load_constant_u64(rd, val)
MInst::load_constant_u64(rd, val, &mut |ty| self.temp_writable_reg(ty))
.iter()
.for_each(|i| self.emit(i));
rd.to_reg()
@@ -439,7 +440,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
self.emit(&MInst::BrTableCheck {
index,
targets_len: targets.len() as i32,
default_: default_,
default_,
});
self.emit(&MInst::BrTable {
index,

View File

@@ -244,13 +244,13 @@ mod test {
// on it to update:
// > riscv64-linux-gnu-objdump -b binary -D <file> -m riscv
//
// 0: 000013b7 lui t2,0x1
// 4: 23438393 addi t2,t2,564 # 0x1234
// 8: 0075053b .4byte 0x75053b
// 0: 000015b7 lui a1,0x1
// 4: 23458593 addi a1,a1,564 # 0x1234
// 8: 00b5053b .4byte 0xb5053b
// c: 00008067 ret
let golden = vec![
183, 19, 0, 0, 147, 131, 67, 35, 59, 5, 117, 0, 103, 128, 0, 0,
183, 21, 0, 0, 147, 133, 69, 35, 59, 5, 181, 0, 103, 128, 0, 0,
];
assert_eq!(code, &golden[..]);
}

View File

@@ -93,12 +93,12 @@ block0(v0: i64, v1: i64, v2: i64):
}
; block0:
; add a5,a0,a1
; add a5,a5,a2
; lui a4,1
; addi a4,a4,4
; add a7,a5,a4
; lw a0,0(a7)
; add a6,a0,a1
; add a6,a6,a2
; lui a5,1
; addi a5,a5,4
; add t3,a6,a5
; lw a0,0(t3)
; ret
function %f10() -> i32 {
@@ -151,8 +151,8 @@ block0(v0: i64):
; block0:
; lui a1,244141
; addi a1,a1,2560
; add a3,a0,a1
; lw a0,0(a3)
; add a4,a0,a1
; lw a0,0(a4)
; ret
function %f14(i32) -> i32 {
@@ -192,10 +192,10 @@ block0(v0: i64, v1: i64, v2: i64):
}
; block0:
; lui a2,1048575
; addi a2,a2,4094
; uext.w a5,a2
; lh a0,0(a5)
; lui a3,1048575
; addi a3,a3,4094
; uext.w a6,a3
; lh a0,0(a6)
; ret
function %f19(i64, i64, i64) -> i32 {
@@ -207,10 +207,10 @@ block0(v0: i64, v1: i64, v2: i64):
}
; block0:
; lui a2,1
; addi a2,a2,2
; uext.w a5,a2
; lh a0,0(a5)
; lui a3,1
; addi a3,a3,2
; uext.w a6,a3
; lh a0,0(a6)
; ret
function %f20(i64, i64, i64) -> i32 {
@@ -222,10 +222,10 @@ block0(v0: i64, v1: i64, v2: i64):
}
; block0:
; lui a2,1048575
; addi a2,a2,4094
; sext.w a5,a2
; lh a0,0(a5)
; lui a3,1048575
; addi a3,a3,4094
; sext.w a6,a3
; lh a0,0(a6)
; ret
function %f21(i64, i64, i64) -> i32 {
@@ -237,10 +237,10 @@ block0(v0: i64, v1: i64, v2: i64):
}
; block0:
; lui a2,1
; addi a2,a2,2
; sext.w a5,a2
; lh a0,0(a5)
; lui a3,1
; addi a3,a3,2
; sext.w a6,a3
; lh a0,0(a6)
; ret
function %i128(i64) -> i128 {

View File

@@ -33,9 +33,9 @@ block0(v0: i64):
}
; block0:
; lui t2,3
; addi t2,t2,57
; atomic_store.i64 t2,(a0)
; lui a1,3
; addi a1,a1,57
; atomic_store.i64 a1,(a0)
; ret
function %atomic_store_i32(i32, i64) {
@@ -69,8 +69,8 @@ block0(v0: i64):
}
; block0:
; lui t2,3
; addi t2,t2,57
; atomic_store.i32 t2,(a0)
; lui a1,3
; addi a1,a1,57
; atomic_store.i32 a1,(a0)
; ret

View File

@@ -452,10 +452,10 @@ block1:
}
; block0:
; lui t2,16
; addi t2,t2,4095
; and a2,a0,t2
; beq a2,zero,taken(label1),not_taken(label2)
; lui a1,16
; addi a1,a1,4095
; and a3,a0,a1
; beq a3,zero,taken(label1),not_taken(label2)
; block1:
; j label3
; block2:
@@ -474,10 +474,10 @@ block1:
}
; block0:
; lui t2,16
; addi t2,t2,4095
; and a2,a0,t2
; bne a2,zero,taken(label1),not_taken(label2)
; lui a1,16
; addi a1,a1,4095
; and a3,a0,a1
; bne a3,zero,taken(label1),not_taken(label2)
; block1:
; j label3
; block2:

View File

@@ -39,8 +39,8 @@ block0:
}
; block0:
; lui a0,16
; addi a0,a0,4095
; lui t1,16
; addi a0,t1,4095
; ret
function %f() -> i64 {

View File

@@ -74,13 +74,13 @@ block0(v0: i64, v1: i32):
}
; block0:
; uext.w a6,a1
; add a7,a0,a6
; addi a7,a7,16
; lui a4,16
; addi a4,a4,4072
; ugt t3,a6,a4##ty=i64
; li t4,0
; selectif_spectre_guard a0,t4,a7##test=t3
; uext.w a7,a1
; add t3,a0,a7
; addi t3,t3,16
; lui a5,16
; addi a5,a5,4072
; ugt t4,a7,a5##ty=i64
; li t0,0
; selectif_spectre_guard a0,t0,t3##test=t4
; ret

View File

@@ -95,11 +95,11 @@ block0(v0: i16):
}
; block0:
; lui t2,16
; addi t2,t2,4095
; and a2,a0,t2
; li a4,-1
; select_reg a1,zero,a4##condition=(zero eq a2)
; lui a1,16
; addi a1,a1,4095
; and a3,a0,a1
; li a5,-1
; select_reg a1,zero,a5##condition=(zero eq a3)
; mv a0,a1
; ret

View File

@@ -11,12 +11,12 @@ block0:
}
; block0:
; lui t0,14
; addi t0,t0,3532
; lui a0,14
; addi a0,a0,3532
; uext.h a3,t0
; uext.h a5,a0
; ne a0,a3,a5##ty=i16
; lui t1,14
; addi t1,t1,3532
; lui a2,14
; addi a2,a2,3532
; uext.h a5,t1
; uext.h a7,a2
; ne a0,a5,a7##ty=i16
; ret