riscv64: Only emit jumps at the end of basic blocks (#5381)

This PR fixes two bugs in the riscv64 backend, where branch instructions were emitted in the middle of a basic block:

Constant emission, where the constants are inlined into the vcode and are jumped over at runtime,
The BrTableCheck pseudo-instruction, which was always emitted before a BrTable instruction, and would handle jumping to the default label.
The first bug was resolved by introducing two new psuedo instructions, LoadConst32 and LoadConst64. Both of these instructions serve to delay the original encoding to emission time, after regalloc2 has run.

The second bug was fixed by removing the BrTableCheck instruction. As it was always emitted directly before BrTable, it was easier to remove it and merge the two into a single instruction.
This commit is contained in:
Trevor Elliott
2022-12-06 10:54:10 -08:00
committed by GitHub
parent feaa7ca75f
commit 293bb5b334
5 changed files with 88 additions and 115 deletions

View File

@@ -10,6 +10,14 @@
(rd WritableReg) (rd WritableReg)
(imm Imm20)) (imm Imm20))
(LoadConst32
(rd WritableReg)
(imm u32))
(LoadConst64
(rd WritableReg)
(imm u64))
(Auipc (Auipc
(rd WritableReg) (rd WritableReg)
(imm Imm20)) (imm Imm20))
@@ -210,10 +218,6 @@
(op ReferenceCheckOP) (op ReferenceCheckOP)
(x Reg)) (x Reg))
(BrTableCheck
(index Reg)
(targets_len i32)
(default_ BranchTarget))
(BrTable (BrTable
(index Reg) (index Reg)
(tmp1 WritableReg) (tmp1 WritableReg)

View File

@@ -635,6 +635,20 @@ impl MachInstEmit for Inst {
let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12); let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12);
sink.put4(x); sink.put4(x);
} }
&Inst::LoadConst32 { rd, imm } => {
let rd = allocs.next_writable(rd);
LoadConstant::U32(imm)
.load_constant(rd, &mut |_| rd)
.into_iter()
.for_each(|inst| inst.emit(&[], sink, emit_info, state));
}
&Inst::LoadConst64 { rd, imm } => {
let rd = allocs.next_writable(rd);
LoadConstant::U64(imm)
.load_constant(rd, &mut |_| rd)
.into_iter()
.for_each(|inst| inst.emit(&[], sink, emit_info, state));
}
&Inst::FpuRR { &Inst::FpuRR {
frm, frm,
alu_op, alu_op,
@@ -1109,14 +1123,15 @@ impl MachInstEmit for Inst {
} }
} }
} }
&Inst::BrTableCheck { &Inst::BrTable {
index, index,
targets_len, tmp1,
default_, ref targets,
} => { } => {
let index = allocs.next(index); let index = allocs.next(index);
// load let tmp1 = allocs.next_writable(tmp1);
Inst::load_constant_u32(writable_spilltmp_reg(), targets_len as u64, &mut |_| {
Inst::load_constant_u32(writable_spilltmp_reg(), targets.len() as u64, &mut |_| {
writable_spilltmp_reg() writable_spilltmp_reg()
}) })
.iter() .iter()
@@ -1133,20 +1148,13 @@ impl MachInstEmit for Inst {
.emit(&[], sink, emit_info, state); .emit(&[], sink, emit_info, state);
sink.use_label_at_offset( sink.use_label_at_offset(
sink.cur_offset(), sink.cur_offset(),
default_.as_label().unwrap(), targets[0].as_label().unwrap(),
LabelUse::PCRel32, LabelUse::PCRel32,
); );
Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0) Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
.iter() .iter()
.for_each(|i| i.emit(&[], sink, emit_info, state)); .for_each(|i| i.emit(&[], sink, emit_info, state));
}
&Inst::BrTable {
index,
tmp1,
ref targets,
} => {
let index = allocs.next(index);
let tmp1 = allocs.next_writable(tmp1);
let mut insts = SmallInstVec::new(); let mut insts = SmallInstVec::new();
// get current pc. // get current pc.
insts.push(Inst::Auipc { insts.push(Inst::Auipc {
@@ -1175,7 +1183,7 @@ impl MachInstEmit for Inst {
// here is all the jumps. // here is all the jumps.
let mut need_label_use = vec![]; let mut need_label_use = vec![];
for t in targets { for t in targets.iter().skip(1) {
need_label_use.push((insts.len(), t.clone())); need_label_use.push((insts.len(), t.clone()));
insts.extend(Inst::construct_auipc_and_jalr( insts.extend(Inst::construct_auipc_and_jalr(
None, None,

View File

@@ -17,7 +17,7 @@ pub use crate::ir::condcodes::FloatCC;
use alloc::vec::Vec; use alloc::vec::Vec;
use regalloc2::{PRegSet, VReg}; use regalloc2::{PRegSet, VReg};
use smallvec::SmallVec; use smallvec::{smallvec, SmallVec};
use std::boxed::Box; use std::boxed::Box;
use std::string::{String, ToString}; use std::string::{String, ToString};
@@ -235,7 +235,12 @@ impl Inst {
alloc_tmp: &mut F, alloc_tmp: &mut F,
) -> SmallInstVec<Inst> { ) -> SmallInstVec<Inst> {
let insts = Inst::load_const_imm(rd, value, alloc_tmp); let insts = Inst::load_const_imm(rd, value, alloc_tmp);
insts.unwrap_or(LoadConstant::U32(value as u32).load_constant(rd, alloc_tmp)) insts.unwrap_or_else(|| {
smallvec![Inst::LoadConst32 {
rd,
imm: value as u32
}]
})
} }
pub fn load_constant_u64<F: FnMut(Type) -> Writable<Reg>>( pub fn load_constant_u64<F: FnMut(Type) -> Writable<Reg>>(
@@ -244,7 +249,7 @@ impl Inst {
alloc_tmp: &mut F, alloc_tmp: &mut F,
) -> SmallInstVec<Inst> { ) -> SmallInstVec<Inst> {
let insts = Inst::load_const_imm(rd, value, alloc_tmp); let insts = Inst::load_const_imm(rd, value, alloc_tmp);
insts.unwrap_or(LoadConstant::U64(value).load_constant(rd, alloc_tmp)) insts.unwrap_or_else(|| smallvec![Inst::LoadConst64 { rd, imm: value }])
} }
pub(crate) fn construct_auipc_and_jalr( pub(crate) fn construct_auipc_and_jalr(
@@ -337,11 +342,10 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_use(index); collector.reg_use(index);
collector.reg_early_def(tmp1); collector.reg_early_def(tmp1);
} }
&Inst::BrTableCheck { index, .. } => {
collector.reg_use(index);
}
&Inst::Auipc { rd, .. } => collector.reg_def(rd), &Inst::Auipc { rd, .. } => collector.reg_def(rd),
&Inst::Lui { rd, .. } => collector.reg_def(rd), &Inst::Lui { rd, .. } => collector.reg_def(rd),
&Inst::LoadConst32 { rd, .. } => collector.reg_def(rd),
&Inst::LoadConst64 { rd, .. } => collector.reg_def(rd),
&Inst::AluRRR { rd, rs1, rs2, .. } => { &Inst::AluRRR { rd, rs1, rs2, .. } => {
collector.reg_use(rs1); collector.reg_use(rs1);
collector.reg_use(rs2); collector.reg_use(rs2);
@@ -695,9 +699,7 @@ impl MachInst for Inst {
&Inst::CondBr { .. } => MachTerminator::Cond, &Inst::CondBr { .. } => MachTerminator::Cond,
&Inst::Jalr { .. } => MachTerminator::Uncond, &Inst::Jalr { .. } => MachTerminator::Uncond,
&Inst::Ret { .. } => MachTerminator::Ret, &Inst::Ret { .. } => MachTerminator::Ret,
// BrTableCheck is a check before BrTable &Inst::BrTable { .. } => MachTerminator::Indirect,
// can lead transfer to default_.
&Inst::BrTable { .. } | &Inst::BrTableCheck { .. } => MachTerminator::Indirect,
_ => MachTerminator::None, _ => MachTerminator::None,
} }
} }
@@ -1202,17 +1204,6 @@ impl Inst {
let dst = format_regs(&dst[..], allocs); let dst = format_regs(&dst[..], allocs);
format!("{} {},{},{}##ty={}", op.op_name(), dst, x, y, ty,) format!("{} {},{},{}##ty={}", op.op_name(), dst, x, y, ty,)
} }
&Inst::BrTableCheck {
index,
targets_len,
default_,
} => {
let index = format_reg(index, allocs);
format!(
"br_table_check {}##targets_len={} default_={}",
index, targets_len, default_
)
}
&Inst::BrTable { &Inst::BrTable {
index, index,
tmp1, tmp1,
@@ -1249,7 +1240,28 @@ impl Inst {
&Inst::Lui { rd, ref imm } => { &Inst::Lui { rd, ref imm } => {
format!("{} {},{}", "lui", format_reg(rd.to_reg(), allocs), imm.bits) format!("{} {},{}", "lui", format_reg(rd.to_reg(), allocs), imm.bits)
} }
&Inst::LoadConst32 { rd, imm } => {
use std::fmt::Write;
let rd = format_reg(rd.to_reg(), allocs);
let mut buf = String::new();
write!(&mut buf, "auipc {},0; ", rd).unwrap();
write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap();
write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 4).unwrap();
write!(&mut buf, ".4byte 0x{:x}", imm).unwrap();
buf
}
&Inst::LoadConst64 { rd, imm } => {
use std::fmt::Write;
let rd = format_reg(rd.to_reg(), allocs);
let mut buf = String::new();
write!(&mut buf, "auipc {},0; ", rd).unwrap();
write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap();
write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 8).unwrap();
write!(&mut buf, ".8byte 0x{:x}", imm).unwrap();
buf
}
&Inst::AluRRR { &Inst::AluRRR {
alu_op, alu_op,
rd, rd,

View File

@@ -430,21 +430,15 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
} }
} }
fn lower_br_table(&mut self, index: Reg, targets: &VecMachLabel) -> InstOutput { fn lower_br_table(&mut self, index: Reg, targets: &VecMachLabel) -> InstOutput {
let tmp = self.temp_writable_reg(I64); let tmp1 = self.temp_writable_reg(I64);
let default_ = BranchTarget::Label(targets[0]);
let targets: Vec<BranchTarget> = targets let targets: Vec<BranchTarget> = targets
.iter() .into_iter()
.skip(1) .copied()
.map(|bix| BranchTarget::Label(*bix)) .map(BranchTarget::Label)
.collect(); .collect();
self.emit(&MInst::BrTableCheck {
index,
targets_len: targets.len() as i32,
default_,
});
self.emit(&MInst::BrTable { self.emit(&MInst::BrTable {
index, index,
tmp1: tmp, tmp1,
targets, targets,
}); });
InstOutput::default() InstOutput::default()

View File

@@ -50,10 +50,7 @@ block0:
} }
; block0: ; block0:
; auipc t1,0 ; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffff0000
; ld a0,12(t1)
; j 12
; .8byte 0xffff0000
; ret ; ret
function %f() -> i64 { function %f() -> i64 {
@@ -63,10 +60,7 @@ block0:
} }
; block0: ; block0:
; auipc t1,0 ; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffff00000000
; ld a0,12(t1)
; j 12
; .8byte 0xffff00000000
; ret ; ret
function %f() -> i64 { function %f() -> i64 {
@@ -76,10 +70,7 @@ block0:
} }
; block0: ; block0:
; auipc t1,0 ; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffff000000000000
; ld a0,12(t1)
; j 12
; .8byte 0xffff000000000000
; ret ; ret
function %f() -> i64 { function %f() -> i64 {
@@ -109,10 +100,7 @@ block0:
} }
; block0: ; block0:
; auipc t1,0 ; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffffffff0000ffff
; ld a0,12(t1)
; j 12
; .8byte 0xffffffff0000ffff
; ret ; ret
function %f() -> i64 { function %f() -> i64 {
@@ -122,10 +110,7 @@ block0:
} }
; block0: ; block0:
; auipc t1,0 ; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffff0000ffffffff
; ld a0,12(t1)
; j 12
; .8byte 0xffff0000ffffffff
; ret ; ret
function %f() -> i64 { function %f() -> i64 {
@@ -135,10 +120,7 @@ block0:
} }
; block0: ; block0:
; auipc t1,0 ; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffffffffffff
; ld a0,12(t1)
; j 12
; .8byte 0xffffffffffff
; ret ; ret
function %f() -> i64 { function %f() -> i64 {
@@ -148,10 +130,7 @@ block0:
} }
; block0: ; block0:
; auipc t1,0 ; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xf34bf0a31212003a
; ld a0,12(t1)
; j 12
; .8byte 0xf34bf0a31212003a
; ret ; ret
function %f() -> i64 { function %f() -> i64 {
@@ -161,10 +140,7 @@ block0:
} }
; block0: ; block0:
; auipc t1,0 ; auipc a0,0; ld a0,12(a0); j 12; .8byte 0x12e900001ef40000
; ld a0,12(t1)
; j 12
; .8byte 0x12e900001ef40000
; ret ; ret
function %f() -> i64 { function %f() -> i64 {
@@ -174,10 +150,7 @@ block0:
} }
; block0: ; block0:
; auipc t1,0 ; auipc a0,0; ld a0,12(a0); j 12; .8byte 0x12e9ffff1ef4ffff
; ld a0,12(t1)
; j 12
; .8byte 0x12e9ffff1ef4ffff
; ret ; ret
function %f() -> i32 { function %f() -> i32 {
@@ -197,10 +170,7 @@ block0:
} }
; block0: ; block0:
; auipc t1,0 ; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xfffffff7
; ld a0,12(t1)
; j 12
; .8byte 0xfffffff7
; ret ; ret
function %f() -> i64 { function %f() -> i64 {
@@ -210,10 +180,7 @@ block0:
} }
; block0: ; block0:
; auipc t1,0 ; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xfffffff7
; ld a0,12(t1)
; j 12
; .8byte 0xfffffff7
; ret ; ret
function %f() -> i64 { function %f() -> i64 {
@@ -233,11 +200,8 @@ block0:
} }
; block0: ; block0:
; auipc t2,0 ; auipc t1,0; ld t1,12(t1); j 12; .8byte 0x3ff0000000000000
; ld t2,12(t2) ; fmv.d.x fa0,t1
; j 12
; .8byte 0x3ff0000000000000
; fmv.d.x fa0,t2
; ret ; ret
function %f() -> f32 { function %f() -> f32 {
@@ -258,11 +222,8 @@ block0:
} }
; block0: ; block0:
; auipc t2,0 ; auipc t1,0; ld t1,12(t1); j 12; .8byte 0x4049000000000000
; ld t2,12(t2) ; fmv.d.x fa0,t1
; j 12
; .8byte 0x4049000000000000
; fmv.d.x fa0,t2
; ret ; ret
function %f() -> f32 { function %f() -> f32 {
@@ -305,11 +266,8 @@ block0:
} }
; block0: ; block0:
; auipc t2,0 ; auipc t1,0; ld t1,12(t1); j 12; .8byte 0xc030000000000000
; ld t2,12(t2) ; fmv.d.x fa0,t1
; j 12
; .8byte 0xc030000000000000
; fmv.d.x fa0,t2
; ret ; ret
function %f() -> f32 { function %f() -> f32 {
@@ -319,10 +277,7 @@ block0:
} }
; block0: ; block0:
; auipc t2,0 ; auipc t1,0; ld t1,12(t1); j 8; .4byte 0xc1800000
; lwu t2,12(t2) ; fmv.w.x fa0,t1
; j 8
; .4byte 0xc1800000
; fmv.w.x fa0,t2
; ret ; ret