riscv64: Only emit jumps at the end of basic blocks (#5381)
This PR fixes two bugs in the riscv64 backend, where branch instructions were emitted in the middle of a basic block: Constant emission, where the constants are inlined into the vcode and are jumped over at runtime, The BrTableCheck pseudo-instruction, which was always emitted before a BrTable instruction, and would handle jumping to the default label. The first bug was resolved by introducing two new psuedo instructions, LoadConst32 and LoadConst64. Both of these instructions serve to delay the original encoding to emission time, after regalloc2 has run. The second bug was fixed by removing the BrTableCheck instruction. As it was always emitted directly before BrTable, it was easier to remove it and merge the two into a single instruction.
This commit is contained in:
@@ -10,6 +10,14 @@
|
||||
(rd WritableReg)
|
||||
(imm Imm20))
|
||||
|
||||
(LoadConst32
|
||||
(rd WritableReg)
|
||||
(imm u32))
|
||||
|
||||
(LoadConst64
|
||||
(rd WritableReg)
|
||||
(imm u64))
|
||||
|
||||
(Auipc
|
||||
(rd WritableReg)
|
||||
(imm Imm20))
|
||||
@@ -209,11 +217,7 @@
|
||||
(rd WritableReg)
|
||||
(op ReferenceCheckOP)
|
||||
(x Reg))
|
||||
|
||||
(BrTableCheck
|
||||
(index Reg)
|
||||
(targets_len i32)
|
||||
(default_ BranchTarget))
|
||||
|
||||
(BrTable
|
||||
(index Reg)
|
||||
(tmp1 WritableReg)
|
||||
|
||||
@@ -635,6 +635,20 @@ impl MachInstEmit for Inst {
|
||||
let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12);
|
||||
sink.put4(x);
|
||||
}
|
||||
&Inst::LoadConst32 { rd, imm } => {
|
||||
let rd = allocs.next_writable(rd);
|
||||
LoadConstant::U32(imm)
|
||||
.load_constant(rd, &mut |_| rd)
|
||||
.into_iter()
|
||||
.for_each(|inst| inst.emit(&[], sink, emit_info, state));
|
||||
}
|
||||
&Inst::LoadConst64 { rd, imm } => {
|
||||
let rd = allocs.next_writable(rd);
|
||||
LoadConstant::U64(imm)
|
||||
.load_constant(rd, &mut |_| rd)
|
||||
.into_iter()
|
||||
.for_each(|inst| inst.emit(&[], sink, emit_info, state));
|
||||
}
|
||||
&Inst::FpuRR {
|
||||
frm,
|
||||
alu_op,
|
||||
@@ -1109,14 +1123,15 @@ impl MachInstEmit for Inst {
|
||||
}
|
||||
}
|
||||
}
|
||||
&Inst::BrTableCheck {
|
||||
&Inst::BrTable {
|
||||
index,
|
||||
targets_len,
|
||||
default_,
|
||||
tmp1,
|
||||
ref targets,
|
||||
} => {
|
||||
let index = allocs.next(index);
|
||||
// load
|
||||
Inst::load_constant_u32(writable_spilltmp_reg(), targets_len as u64, &mut |_| {
|
||||
let tmp1 = allocs.next_writable(tmp1);
|
||||
|
||||
Inst::load_constant_u32(writable_spilltmp_reg(), targets.len() as u64, &mut |_| {
|
||||
writable_spilltmp_reg()
|
||||
})
|
||||
.iter()
|
||||
@@ -1133,20 +1148,13 @@ impl MachInstEmit for Inst {
|
||||
.emit(&[], sink, emit_info, state);
|
||||
sink.use_label_at_offset(
|
||||
sink.cur_offset(),
|
||||
default_.as_label().unwrap(),
|
||||
targets[0].as_label().unwrap(),
|
||||
LabelUse::PCRel32,
|
||||
);
|
||||
Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
|
||||
.iter()
|
||||
.for_each(|i| i.emit(&[], sink, emit_info, state));
|
||||
}
|
||||
&Inst::BrTable {
|
||||
index,
|
||||
tmp1,
|
||||
ref targets,
|
||||
} => {
|
||||
let index = allocs.next(index);
|
||||
let tmp1 = allocs.next_writable(tmp1);
|
||||
|
||||
let mut insts = SmallInstVec::new();
|
||||
// get current pc.
|
||||
insts.push(Inst::Auipc {
|
||||
@@ -1175,7 +1183,7 @@ impl MachInstEmit for Inst {
|
||||
|
||||
// here is all the jumps.
|
||||
let mut need_label_use = vec![];
|
||||
for t in targets {
|
||||
for t in targets.iter().skip(1) {
|
||||
need_label_use.push((insts.len(), t.clone()));
|
||||
insts.extend(Inst::construct_auipc_and_jalr(
|
||||
None,
|
||||
|
||||
@@ -17,7 +17,7 @@ pub use crate::ir::condcodes::FloatCC;
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use regalloc2::{PRegSet, VReg};
|
||||
use smallvec::SmallVec;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::boxed::Box;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
@@ -235,7 +235,12 @@ impl Inst {
|
||||
alloc_tmp: &mut F,
|
||||
) -> SmallInstVec<Inst> {
|
||||
let insts = Inst::load_const_imm(rd, value, alloc_tmp);
|
||||
insts.unwrap_or(LoadConstant::U32(value as u32).load_constant(rd, alloc_tmp))
|
||||
insts.unwrap_or_else(|| {
|
||||
smallvec![Inst::LoadConst32 {
|
||||
rd,
|
||||
imm: value as u32
|
||||
}]
|
||||
})
|
||||
}
|
||||
|
||||
pub fn load_constant_u64<F: FnMut(Type) -> Writable<Reg>>(
|
||||
@@ -244,7 +249,7 @@ impl Inst {
|
||||
alloc_tmp: &mut F,
|
||||
) -> SmallInstVec<Inst> {
|
||||
let insts = Inst::load_const_imm(rd, value, alloc_tmp);
|
||||
insts.unwrap_or(LoadConstant::U64(value).load_constant(rd, alloc_tmp))
|
||||
insts.unwrap_or_else(|| smallvec![Inst::LoadConst64 { rd, imm: value }])
|
||||
}
|
||||
|
||||
pub(crate) fn construct_auipc_and_jalr(
|
||||
@@ -337,11 +342,10 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
||||
collector.reg_use(index);
|
||||
collector.reg_early_def(tmp1);
|
||||
}
|
||||
&Inst::BrTableCheck { index, .. } => {
|
||||
collector.reg_use(index);
|
||||
}
|
||||
&Inst::Auipc { rd, .. } => collector.reg_def(rd),
|
||||
&Inst::Lui { rd, .. } => collector.reg_def(rd),
|
||||
&Inst::LoadConst32 { rd, .. } => collector.reg_def(rd),
|
||||
&Inst::LoadConst64 { rd, .. } => collector.reg_def(rd),
|
||||
&Inst::AluRRR { rd, rs1, rs2, .. } => {
|
||||
collector.reg_use(rs1);
|
||||
collector.reg_use(rs2);
|
||||
@@ -695,9 +699,7 @@ impl MachInst for Inst {
|
||||
&Inst::CondBr { .. } => MachTerminator::Cond,
|
||||
&Inst::Jalr { .. } => MachTerminator::Uncond,
|
||||
&Inst::Ret { .. } => MachTerminator::Ret,
|
||||
// BrTableCheck is a check before BrTable
|
||||
// can lead transfer to default_.
|
||||
&Inst::BrTable { .. } | &Inst::BrTableCheck { .. } => MachTerminator::Indirect,
|
||||
&Inst::BrTable { .. } => MachTerminator::Indirect,
|
||||
_ => MachTerminator::None,
|
||||
}
|
||||
}
|
||||
@@ -1202,17 +1204,6 @@ impl Inst {
|
||||
let dst = format_regs(&dst[..], allocs);
|
||||
format!("{} {},{},{}##ty={}", op.op_name(), dst, x, y, ty,)
|
||||
}
|
||||
&Inst::BrTableCheck {
|
||||
index,
|
||||
targets_len,
|
||||
default_,
|
||||
} => {
|
||||
let index = format_reg(index, allocs);
|
||||
format!(
|
||||
"br_table_check {}##targets_len={} default_={}",
|
||||
index, targets_len, default_
|
||||
)
|
||||
}
|
||||
&Inst::BrTable {
|
||||
index,
|
||||
tmp1,
|
||||
@@ -1249,7 +1240,28 @@ impl Inst {
|
||||
&Inst::Lui { rd, ref imm } => {
|
||||
format!("{} {},{}", "lui", format_reg(rd.to_reg(), allocs), imm.bits)
|
||||
}
|
||||
&Inst::LoadConst32 { rd, imm } => {
|
||||
use std::fmt::Write;
|
||||
|
||||
let rd = format_reg(rd.to_reg(), allocs);
|
||||
let mut buf = String::new();
|
||||
write!(&mut buf, "auipc {},0; ", rd).unwrap();
|
||||
write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap();
|
||||
write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 4).unwrap();
|
||||
write!(&mut buf, ".4byte 0x{:x}", imm).unwrap();
|
||||
buf
|
||||
}
|
||||
&Inst::LoadConst64 { rd, imm } => {
|
||||
use std::fmt::Write;
|
||||
|
||||
let rd = format_reg(rd.to_reg(), allocs);
|
||||
let mut buf = String::new();
|
||||
write!(&mut buf, "auipc {},0; ", rd).unwrap();
|
||||
write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap();
|
||||
write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 8).unwrap();
|
||||
write!(&mut buf, ".8byte 0x{:x}", imm).unwrap();
|
||||
buf
|
||||
}
|
||||
&Inst::AluRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
|
||||
@@ -430,21 +430,15 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
|
||||
}
|
||||
}
|
||||
fn lower_br_table(&mut self, index: Reg, targets: &VecMachLabel) -> InstOutput {
|
||||
let tmp = self.temp_writable_reg(I64);
|
||||
let default_ = BranchTarget::Label(targets[0]);
|
||||
let tmp1 = self.temp_writable_reg(I64);
|
||||
let targets: Vec<BranchTarget> = targets
|
||||
.iter()
|
||||
.skip(1)
|
||||
.map(|bix| BranchTarget::Label(*bix))
|
||||
.into_iter()
|
||||
.copied()
|
||||
.map(BranchTarget::Label)
|
||||
.collect();
|
||||
self.emit(&MInst::BrTableCheck {
|
||||
index,
|
||||
targets_len: targets.len() as i32,
|
||||
default_,
|
||||
});
|
||||
self.emit(&MInst::BrTable {
|
||||
index,
|
||||
tmp1: tmp,
|
||||
tmp1,
|
||||
targets,
|
||||
});
|
||||
InstOutput::default()
|
||||
|
||||
@@ -50,10 +50,7 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t1,0
|
||||
; ld a0,12(t1)
|
||||
; j 12
|
||||
; .8byte 0xffff0000
|
||||
; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffff0000
|
||||
; ret
|
||||
|
||||
function %f() -> i64 {
|
||||
@@ -63,10 +60,7 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t1,0
|
||||
; ld a0,12(t1)
|
||||
; j 12
|
||||
; .8byte 0xffff00000000
|
||||
; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffff00000000
|
||||
; ret
|
||||
|
||||
function %f() -> i64 {
|
||||
@@ -76,10 +70,7 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t1,0
|
||||
; ld a0,12(t1)
|
||||
; j 12
|
||||
; .8byte 0xffff000000000000
|
||||
; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffff000000000000
|
||||
; ret
|
||||
|
||||
function %f() -> i64 {
|
||||
@@ -109,10 +100,7 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t1,0
|
||||
; ld a0,12(t1)
|
||||
; j 12
|
||||
; .8byte 0xffffffff0000ffff
|
||||
; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffffffff0000ffff
|
||||
; ret
|
||||
|
||||
function %f() -> i64 {
|
||||
@@ -122,10 +110,7 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t1,0
|
||||
; ld a0,12(t1)
|
||||
; j 12
|
||||
; .8byte 0xffff0000ffffffff
|
||||
; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffff0000ffffffff
|
||||
; ret
|
||||
|
||||
function %f() -> i64 {
|
||||
@@ -135,10 +120,7 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t1,0
|
||||
; ld a0,12(t1)
|
||||
; j 12
|
||||
; .8byte 0xffffffffffff
|
||||
; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xffffffffffff
|
||||
; ret
|
||||
|
||||
function %f() -> i64 {
|
||||
@@ -148,10 +130,7 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t1,0
|
||||
; ld a0,12(t1)
|
||||
; j 12
|
||||
; .8byte 0xf34bf0a31212003a
|
||||
; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xf34bf0a31212003a
|
||||
; ret
|
||||
|
||||
function %f() -> i64 {
|
||||
@@ -161,10 +140,7 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t1,0
|
||||
; ld a0,12(t1)
|
||||
; j 12
|
||||
; .8byte 0x12e900001ef40000
|
||||
; auipc a0,0; ld a0,12(a0); j 12; .8byte 0x12e900001ef40000
|
||||
; ret
|
||||
|
||||
function %f() -> i64 {
|
||||
@@ -174,10 +150,7 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t1,0
|
||||
; ld a0,12(t1)
|
||||
; j 12
|
||||
; .8byte 0x12e9ffff1ef4ffff
|
||||
; auipc a0,0; ld a0,12(a0); j 12; .8byte 0x12e9ffff1ef4ffff
|
||||
; ret
|
||||
|
||||
function %f() -> i32 {
|
||||
@@ -197,10 +170,7 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t1,0
|
||||
; ld a0,12(t1)
|
||||
; j 12
|
||||
; .8byte 0xfffffff7
|
||||
; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xfffffff7
|
||||
; ret
|
||||
|
||||
function %f() -> i64 {
|
||||
@@ -210,10 +180,7 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t1,0
|
||||
; ld a0,12(t1)
|
||||
; j 12
|
||||
; .8byte 0xfffffff7
|
||||
; auipc a0,0; ld a0,12(a0); j 12; .8byte 0xfffffff7
|
||||
; ret
|
||||
|
||||
function %f() -> i64 {
|
||||
@@ -233,11 +200,8 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t2,0
|
||||
; ld t2,12(t2)
|
||||
; j 12
|
||||
; .8byte 0x3ff0000000000000
|
||||
; fmv.d.x fa0,t2
|
||||
; auipc t1,0; ld t1,12(t1); j 12; .8byte 0x3ff0000000000000
|
||||
; fmv.d.x fa0,t1
|
||||
; ret
|
||||
|
||||
function %f() -> f32 {
|
||||
@@ -258,11 +222,8 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t2,0
|
||||
; ld t2,12(t2)
|
||||
; j 12
|
||||
; .8byte 0x4049000000000000
|
||||
; fmv.d.x fa0,t2
|
||||
; auipc t1,0; ld t1,12(t1); j 12; .8byte 0x4049000000000000
|
||||
; fmv.d.x fa0,t1
|
||||
; ret
|
||||
|
||||
function %f() -> f32 {
|
||||
@@ -305,11 +266,8 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t2,0
|
||||
; ld t2,12(t2)
|
||||
; j 12
|
||||
; .8byte 0xc030000000000000
|
||||
; fmv.d.x fa0,t2
|
||||
; auipc t1,0; ld t1,12(t1); j 12; .8byte 0xc030000000000000
|
||||
; fmv.d.x fa0,t1
|
||||
; ret
|
||||
|
||||
function %f() -> f32 {
|
||||
@@ -319,10 +277,7 @@ block0:
|
||||
}
|
||||
|
||||
; block0:
|
||||
; auipc t2,0
|
||||
; lwu t2,12(t2)
|
||||
; j 8
|
||||
; .4byte 0xc1800000
|
||||
; fmv.w.x fa0,t2
|
||||
; auipc t1,0; ld t1,12(t1); j 8; .4byte 0xc1800000
|
||||
; fmv.w.x fa0,t1
|
||||
; ret
|
||||
|
||||
|
||||
Reference in New Issue
Block a user