Merge pull request #3769 from cfallin/fix-debuginfo-cold-blocks

Cranelift: fix debuginfo wrt cold blocks and non-monotonic layout.
This commit is contained in:
Nick Fitzgerald
2022-02-07 08:58:50 -08:00
committed by GitHub
3 changed files with 120 additions and 26 deletions

View File

@@ -184,12 +184,13 @@ fn isa_constructor(
mod test {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::types::*;
use crate::ir::{types::*, SourceLoc, ValueLabel, ValueLabelStart};
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
use crate::isa::CallConv;
use crate::settings;
use crate::settings::Configurable;
use core::str::FromStr;
use cranelift_entity::EntityRef;
use target_lexicon::Triple;
/// We have to test cold blocks by observing final machine code,
@@ -204,6 +205,9 @@ mod test {
sig.params.push(AbiParam::new(I32));
sig.returns.push(AbiParam::new(I32));
let mut func = Function::with_name_signature(name, sig);
// Add debug info: this tests the debug machinery wrt cold
// blocks as well.
func.dfg.collect_debug_info();
let bb0 = func.dfg.make_block();
let arg0 = func.dfg.append_block_param(bb0, I32);
@@ -216,26 +220,73 @@ mod test {
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(bb0);
pos.set_srcloc(SourceLoc::new(1));
let v0 = pos.ins().iconst(I32, 0x1234);
pos.set_srcloc(SourceLoc::new(2));
let v1 = pos.ins().iadd(arg0, v0);
pos.ins().brnz(v1, bb1, &[v1]);
pos.ins().jump(bb2, &[]);
pos.insert_block(bb1);
pos.set_srcloc(SourceLoc::new(3));
let v2 = pos.ins().isub(v1, v0);
pos.set_srcloc(SourceLoc::new(4));
let v3 = pos.ins().iadd(v2, bb1_param);
pos.ins().brnz(v1, bb2, &[]);
pos.ins().jump(bb3, &[v3]);
pos.func.layout.set_cold(bb2);
pos.insert_block(bb2);
pos.set_srcloc(SourceLoc::new(5));
let v4 = pos.ins().iadd(v1, v0);
pos.ins().brnz(v4, bb2, &[]);
pos.ins().jump(bb1, &[v4]);
pos.insert_block(bb3);
pos.set_srcloc(SourceLoc::new(6));
pos.ins().return_(&[bb3_param]);
// Create some debug info. Make one label that follows all the
// values around. Note that this is usually done via an API on
// the FunctionBuilder, but that's in cranelift_frontend
// (i.e., a higher level of the crate DAG) so we have to build
// it manually here.
pos.func.dfg.values_labels.as_mut().unwrap().insert(
v0,
crate::ir::ValueLabelAssignments::Starts(vec![ValueLabelStart {
from: SourceLoc::new(1),
label: ValueLabel::new(1),
}]),
);
pos.func.dfg.values_labels.as_mut().unwrap().insert(
v1,
crate::ir::ValueLabelAssignments::Starts(vec![ValueLabelStart {
from: SourceLoc::new(2),
label: ValueLabel::new(1),
}]),
);
pos.func.dfg.values_labels.as_mut().unwrap().insert(
v2,
crate::ir::ValueLabelAssignments::Starts(vec![ValueLabelStart {
from: SourceLoc::new(3),
label: ValueLabel::new(1),
}]),
);
pos.func.dfg.values_labels.as_mut().unwrap().insert(
v3,
crate::ir::ValueLabelAssignments::Starts(vec![ValueLabelStart {
from: SourceLoc::new(4),
label: ValueLabel::new(1),
}]),
);
pos.func.dfg.values_labels.as_mut().unwrap().insert(
v4,
crate::ir::ValueLabelAssignments::Starts(vec![ValueLabelStart {
from: SourceLoc::new(5),
label: ValueLabel::new(1),
}]),
);
let mut shared_flags_builder = settings::builder();
shared_flags_builder.set("opt_level", "none").unwrap();
shared_flags_builder.set("enable_verifier", "true").unwrap();

View File

@@ -378,16 +378,26 @@ where
/// which that label is bound.
pub(crate) fn compute<I: VCodeInst>(
insts: &[I],
inst_ends: &[u32],
label_insn_index: &[u32],
layout_info: &InstsLayoutInfo,
) -> ValueLabelsRanges {
let inst_start = |idx: usize| if idx == 0 { 0 } else { inst_ends[idx - 1] };
let inst_start = |idx: usize| {
if idx == 0 {
0
} else {
layout_info.inst_end_offsets[idx - 1]
}
};
trace!("compute: insts =");
for i in 0..insts.len() {
trace!(" #{} end: {} -> {:?}", i, inst_ends[i], insts[i]);
trace!(
" #{} end: {} -> {:?}",
i,
layout_info.inst_end_offsets[i],
insts[i]
);
}
trace!("label_insn_index: {:?}", label_insn_index);
trace!("label_insn_index: {:?}", layout_info.label_inst_indices);
// Info at each block head, indexed by label.
let mut block_starts: HashMap<u32, AnalysisInfo> = HashMap::new();
@@ -409,7 +419,7 @@ pub(crate) fn compute<I: VCodeInst>(
trace!("at block {} -> state: {:?}", block, state);
// Iterate for each instruction in the block (we break at the first
// terminator we see).
let mut index = label_insn_index[block as usize];
let mut index = layout_info.label_inst_indices[block as usize];
while index < insts.len() as u32 {
state.step(&insts[index as usize]);
trace!(" -> inst #{}: {:?}", index, insts[index as usize]);
@@ -446,18 +456,33 @@ pub(crate) fn compute<I: VCodeInst>(
// value-label locations.
let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
for block in 0..label_insn_index.len() {
let start_index = label_insn_index[block];
let end_index = if block == label_insn_index.len() - 1 {
for block in 0..layout_info.label_inst_indices.len() {
let start_index = layout_info.label_inst_indices[block];
let end_index = if block == layout_info.label_inst_indices.len() - 1 {
insts.len() as u32
} else {
label_insn_index[block + 1]
layout_info.label_inst_indices[block + 1]
};
let block = block as u32;
let mut state = block_starts.get(&block).unwrap().clone();
for index in start_index..end_index {
let offset = inst_start(index as usize);
let end = inst_ends[index as usize];
let end = layout_info.inst_end_offsets[index as usize];
// Cold blocks cause instructions to occur out-of-order wrt
// others. We rely on the monotonic mapping from instruction
// index to offset in machine code for this analysis to work,
// so we just skip debuginfo for cold blocks. This should be
// generally fine, as cold blocks generally constitute
// slowpaths for expansions of particular ops, rather than
// user-written code.
if layout_info.start_of_cold_code.is_some()
&& offset >= layout_info.start_of_cold_code.unwrap()
{
continue;
}
assert!(offset <= end);
state.step(&insts[index as usize]);
for (label, locs) in &state.label_to_locs {

View File

@@ -107,9 +107,10 @@ pub struct VCode<I: VCodeInst> {
/// Do we generate debug info?
generate_debug_info: bool,
/// Instruction end offsets, instruction indices at each label, and total
/// buffer size. Only present if `generate_debug_info` is set.
insts_layout: RefCell<(Vec<u32>, Vec<u32>, u32)>,
/// Instruction end offsets, instruction indices at each label,
/// total buffer size, and start of cold code. Only present if
/// `generate_debug_info` is set.
insts_layout: RefCell<InstsLayoutInfo>,
/// Constants.
constants: VCodeConstants,
@@ -119,6 +120,13 @@ pub struct VCode<I: VCodeInst> {
has_value_labels: bool,
}
#[derive(Debug, Default)]
pub(crate) struct InstsLayoutInfo {
pub(crate) inst_end_offsets: Vec<CodeOffset>,
pub(crate) label_inst_indices: Vec<CodeOffset>,
pub(crate) start_of_cold_code: Option<CodeOffset>,
}
/// A builder for a VCode function body. This builder is designed for the
/// lowering approach that we take: we traverse basic blocks in forward
/// (original IR) order, but within each basic block, we generate code from
@@ -316,7 +324,7 @@ impl<I: VCodeInst> VCode<I> {
safepoint_insns: vec![],
safepoint_slots: vec![],
generate_debug_info,
insts_layout: RefCell::new((vec![], vec![], 0)),
insts_layout: RefCell::new(Default::default()),
constants,
has_value_labels: false,
}
@@ -467,8 +475,8 @@ impl<I: VCodeInst> VCode<I> {
buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex);
buffer.reserve_labels_for_constants(&self.constants);
let mut inst_ends = vec![0; self.insts.len()];
let mut label_insn_iix = vec![0; self.num_blocks()];
let mut inst_end_offsets = vec![0; self.insts.len()];
let mut label_inst_indices = vec![0; self.num_blocks()];
// Construct the final order we emit code in: cold blocks at the end.
let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
@@ -481,12 +489,14 @@ impl<I: VCodeInst> VCode<I> {
final_order.push(block);
}
}
final_order.extend(cold_blocks);
let first_cold_block = cold_blocks.first().cloned();
final_order.extend(cold_blocks.clone());
// Emit blocks.
let mut safepoint_idx = 0;
let mut cur_srcloc = None;
let mut last_offset = None;
let mut start_of_cold_code = None;
for block in final_order {
let new_offset = I::align_basic_block(buffer.cur_offset());
while new_offset > buffer.cur_offset() {
@@ -496,9 +506,13 @@ impl<I: VCodeInst> VCode<I> {
}
assert_eq!(buffer.cur_offset(), new_offset);
if Some(block) == first_cold_block {
start_of_cold_code = Some(buffer.cur_offset());
}
let (start, end) = self.block_ranges[block as usize];
buffer.bind_label(MachLabel::from_block(block));
label_insn_iix[block as usize] = start;
label_inst_indices[block as usize] = start;
if cfg_metadata {
// Track BB starts. If we have backed up due to MachBuffer
@@ -545,7 +559,7 @@ impl<I: VCodeInst> VCode<I> {
if self.generate_debug_info {
// Buffer truncation may have happened since last inst append; trim inst-end
// layout info as appropriate.
let l = &mut inst_ends[0..iix as usize];
let l = &mut inst_end_offsets[0..iix as usize];
for end in l.iter_mut().rev() {
if *end > buffer.cur_offset() {
*end = buffer.cur_offset();
@@ -553,7 +567,7 @@ impl<I: VCodeInst> VCode<I> {
break;
}
}
inst_ends[iix as usize] = buffer.cur_offset();
inst_end_offsets[iix as usize] = buffer.cur_offset();
}
}
@@ -582,14 +596,18 @@ impl<I: VCodeInst> VCode<I> {
}
if self.generate_debug_info {
for end in inst_ends.iter_mut().rev() {
for end in inst_end_offsets.iter_mut().rev() {
if *end > buffer.cur_offset() {
*end = buffer.cur_offset();
} else {
break;
}
}
*self.insts_layout.borrow_mut() = (inst_ends, label_insn_iix, buffer.cur_offset());
*self.insts_layout.borrow_mut() = InstsLayoutInfo {
inst_end_offsets,
label_inst_indices,
start_of_cold_code,
};
}
// Create `bb_edges` and final (filtered) `bb_starts`.
@@ -622,8 +640,8 @@ impl<I: VCodeInst> VCode<I> {
return ValueLabelsRanges::default();
}
let layout = &self.insts_layout.borrow();
debug::compute(&self.insts, &layout.0[..], &layout.1[..])
let layout_info = &self.insts_layout.borrow();
debug::compute(&self.insts, &*layout_info)
}
/// Get the offsets of stackslots.