Files
wasmtime/cranelift/codegen/src/machinst/debug.rs
2021-01-22 16:02:29 -08:00

501 lines
18 KiB
Rust

//! Debug info analysis: computes value-label ranges from value-label markers in
//! generated VCode.
//!
//! We "reverse-engineer" debug info like this because it is far more reliable
//! than generating it while emitting code and keeping it in sync.
//!
//! This works by (i) observing "value-label marker" instructions, which are
//! semantically just an assignment from a register to a "value label" (which
//! one can think of as another register; they represent, e.g., Wasm locals) at
//! a certain point in the code, and (ii) observing loads and stores to the
//! stack and register moves.
//!
//! We track, at every program point, the correspondence between each value
//! label and *all* locations in which it resides. E.g., if it is stored to the
//! stack, we remember that it is in both a register and the stack slot; but if
//! the register is later overwritten, then we have it just in the stack slot.
//! This allows us to avoid false-positives observing loads/stores that we think
//! are spillslots but really aren't.
//!
//! We do a standard forward dataflow analysis to compute this info.
use crate::ir::ValueLabel;
use crate::machinst::*;
use crate::value_label::{LabelValueLoc, ValueLabelsRanges, ValueLocRange};
use log::trace;
use regalloc::{Reg, RegUsageCollector};
use std::collections::{HashMap, HashSet};
use std::hash::Hash;
/// Location of a labeled value: in a register or in a stack slot. Note that a
/// value may live in more than one location; `AnalysisInfo` maps each
/// value-label to multiple `ValueLoc`s.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
enum ValueLoc {
Reg(Reg),
/// Nominal-SP offset.
Stack(i64),
}
impl From<ValueLoc> for LabelValueLoc {
fn from(v: ValueLoc) -> Self {
match v {
ValueLoc::Reg(r) => LabelValueLoc::Reg(r),
ValueLoc::Stack(off) => LabelValueLoc::SPOffset(off),
}
}
}
impl ValueLoc {
fn is_reg(self) -> bool {
match self {
ValueLoc::Reg(_) => true,
_ => false,
}
}
fn is_stack(self) -> bool {
match self {
ValueLoc::Stack(_) => true,
_ => false,
}
}
}
/// Mappings at one program point.
#[derive(Clone, Debug)]
struct AnalysisInfo {
/// Nominal SP relative to real SP. If `None`, then the offset is
/// indeterminate (i.e., we merged to the lattice 'bottom' element). This
/// should not happen in well-formed code.
nominal_sp_offset: Option<i64>,
/// Forward map from labeled values to sets of locations.
label_to_locs: HashMap<ValueLabel, HashSet<ValueLoc>>,
/// Reverse map for each register indicating the value it holds, if any.
reg_to_label: HashMap<Reg, ValueLabel>,
/// Reverse map for each stack offset indicating the value it holds, if any.
stack_to_label: HashMap<i64, ValueLabel>,
}
/// Get the registers written (mod'd or def'd) by a machine instruction.
fn get_inst_writes<M: MachInst>(m: &M) -> Vec<Reg> {
// TODO: expose this part of regalloc.rs's interface publicly.
let mut vecs = RegUsageCollector::get_empty_reg_vecs_test_framework_only(false);
let mut coll = RegUsageCollector::new(&mut vecs);
m.get_regs(&mut coll);
vecs.defs.extend(vecs.mods.into_iter());
vecs.defs
}
impl AnalysisInfo {
/// Create a new analysis state. This is the "top" lattice element at which
/// the fixpoint dataflow analysis starts.
fn new() -> Self {
AnalysisInfo {
nominal_sp_offset: Some(0),
label_to_locs: HashMap::new(),
reg_to_label: HashMap::new(),
stack_to_label: HashMap::new(),
}
}
/// Remove all locations for a given labeled value. Used when the labeled
/// value is redefined (so old values become stale).
fn clear_label(&mut self, label: ValueLabel) {
if let Some(locs) = self.label_to_locs.remove(&label) {
for loc in locs {
match loc {
ValueLoc::Reg(r) => {
self.reg_to_label.remove(&r);
}
ValueLoc::Stack(off) => {
self.stack_to_label.remove(&off);
}
}
}
}
}
/// Remove a label from a register, if any. Used, e.g., if the register is
/// overwritten.
fn clear_reg(&mut self, reg: Reg) {
if let Some(label) = self.reg_to_label.remove(&reg) {
if let Some(locs) = self.label_to_locs.get_mut(&label) {
locs.remove(&ValueLoc::Reg(reg));
}
}
}
/// Remove a label from a stack offset, if any. Used, e.g., when the stack
/// slot is overwritten.
fn clear_stack_off(&mut self, off: i64) {
if let Some(label) = self.stack_to_label.remove(&off) {
if let Some(locs) = self.label_to_locs.get_mut(&label) {
locs.remove(&ValueLoc::Stack(off));
}
}
}
/// Indicate that a labeled value is newly defined and its new value is in
/// `reg`.
fn def_label_at_reg(&mut self, label: ValueLabel, reg: Reg) {
self.clear_label(label);
self.label_to_locs
.entry(label)
.or_insert_with(|| HashSet::new())
.insert(ValueLoc::Reg(reg));
self.reg_to_label.insert(reg, label);
}
/// Process a store from a register to a stack slot (offset).
fn store_reg(&mut self, reg: Reg, off: i64) {
self.clear_stack_off(off);
if let Some(label) = self.reg_to_label.get(&reg) {
if let Some(locs) = self.label_to_locs.get_mut(label) {
locs.insert(ValueLoc::Stack(off));
}
self.stack_to_label.insert(off, *label);
}
}
/// Process a load from a stack slot (offset) to a register.
fn load_reg(&mut self, reg: Reg, off: i64) {
self.clear_reg(reg);
if let Some(&label) = self.stack_to_label.get(&off) {
if let Some(locs) = self.label_to_locs.get_mut(&label) {
locs.insert(ValueLoc::Reg(reg));
}
self.reg_to_label.insert(reg, label);
}
}
/// Process a move from one register to another.
fn move_reg(&mut self, to: Reg, from: Reg) {
self.clear_reg(to);
if let Some(&label) = self.reg_to_label.get(&from) {
if let Some(locs) = self.label_to_locs.get_mut(&label) {
locs.insert(ValueLoc::Reg(to));
}
self.reg_to_label.insert(to, label);
}
}
/// Update the analysis state w.r.t. an instruction's effects. Given the
/// state just before `inst`, this method updates `self` to be the state
/// just after `inst`.
fn step<M: MachInst>(&mut self, inst: &M) {
for write in get_inst_writes(inst) {
self.clear_reg(write);
}
if let Some((label, reg)) = inst.defines_value_label() {
self.def_label_at_reg(label, reg);
}
match inst.stack_op_info() {
Some(MachInstStackOpInfo::LoadNomSPOff(reg, offset)) => {
self.load_reg(reg, offset + self.nominal_sp_offset.unwrap());
}
Some(MachInstStackOpInfo::StoreNomSPOff(reg, offset)) => {
self.store_reg(reg, offset + self.nominal_sp_offset.unwrap());
}
Some(MachInstStackOpInfo::NomSPAdj(offset)) => {
if self.nominal_sp_offset.is_some() {
self.nominal_sp_offset = Some(self.nominal_sp_offset.unwrap() + offset);
}
}
_ => {}
}
if let Some((to, from)) = inst.is_move() {
let to = to.to_reg();
self.move_reg(to, from);
}
}
}
/// Trait used to implement the dataflow analysis' meet (intersect) function
/// onthe `AnalysisInfo` components. For efficiency, this is implemented as a
/// mutation on the LHS, rather than a pure functional operation.
trait IntersectFrom {
fn intersect_from(&mut self, other: &Self) -> IntersectResult;
}
/// Result of an intersection operation. Indicates whether the mutated LHS
/// (which becomes the intersection result) differs from the original LHS. Also
/// indicates if the value has become "empty" and should be removed from a
/// parent container, if any.
struct IntersectResult {
/// Did the intersection change the LHS input (the one that was mutated into
/// the result)? This is needed to drive the fixpoint loop; when no more
/// changes occur, then we have converted.
changed: bool,
/// Is the resulting value "empty"? This can be used when a container, such
/// as a map, holds values of this (intersection result) type; when
/// `is_empty` is true for the merge of the values at a particular key, we
/// can remove that key from the merged (intersected) result. This is not
/// necessary for analysis correctness but reduces the memory and runtime
/// cost of the fixpoint loop.
is_empty: bool,
}
impl IntersectFrom for AnalysisInfo {
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
let mut changed = false;
changed |= self
.nominal_sp_offset
.intersect_from(&other.nominal_sp_offset)
.changed;
changed |= self
.label_to_locs
.intersect_from(&other.label_to_locs)
.changed;
changed |= self
.reg_to_label
.intersect_from(&other.reg_to_label)
.changed;
changed |= self
.stack_to_label
.intersect_from(&other.stack_to_label)
.changed;
IntersectResult {
changed,
is_empty: false,
}
}
}
impl<K, V> IntersectFrom for HashMap<K, V>
where
K: Copy + Eq + Hash,
V: IntersectFrom,
{
/// Intersection for hashmap: remove keys that are not in both inputs;
/// recursively intersect values for keys in common.
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
let mut changed = false;
let mut remove_keys = vec![];
for k in self.keys() {
if !other.contains_key(k) {
remove_keys.push(*k);
}
}
for k in &remove_keys {
changed = true;
self.remove(k);
}
remove_keys.clear();
for k in other.keys() {
if let Some(v) = self.get_mut(k) {
let result = v.intersect_from(other.get(k).unwrap());
changed |= result.changed;
if result.is_empty {
remove_keys.push(*k);
}
}
}
for k in &remove_keys {
changed = true;
self.remove(k);
}
IntersectResult {
changed,
is_empty: self.len() == 0,
}
}
}
impl<T> IntersectFrom for HashSet<T>
where
T: Copy + Eq + Hash,
{
/// Intersection for hashset: just take the set intersection.
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
let mut changed = false;
let mut remove = vec![];
for val in self.iter() {
if !other.contains(val) {
remove.push(*val);
}
}
for val in remove {
changed = true;
self.remove(&val);
}
IntersectResult {
changed,
is_empty: self.len() == 0,
}
}
}
impl IntersectFrom for ValueLabel {
// Intersection for labeled value: remove if not equal. This is equivalent
// to a three-level lattice with top, bottom, and unordered set of
// individual labels in between.
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
IntersectResult {
changed: false,
is_empty: *self != *other,
}
}
}
impl<T> IntersectFrom for Option<T>
where
T: Copy + Eq,
{
/// Intersectino for Option<T>: recursively intersect if both `Some`, else
/// `None`.
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
let mut changed = false;
if !(self.is_some() && other.is_some() && self == other) {
changed = true;
*self = None;
}
IntersectResult {
changed,
is_empty: self.is_none(),
}
}
}
/// Compute the value-label ranges (locations for program-point ranges for
/// labeled values) from a given `VCode` compilation result.
///
/// In order to compute this information, we perform a dataflow analysis on the
/// machine code. To do so, and translate the results into a form usable by the
/// debug-info consumers, we need to know two additional things:
///
/// - The machine-code layout (code offsets) of the instructions. DWARF is
/// encoded in terms of instruction *ends* (and we reason about value
/// locations at program points *after* instructions, to match this), so we
/// take an array `inst_ends`, giving us code offsets for each instruction's
/// end-point. (Note that this is one *past* the last byte; so a 4-byte
/// instruction at offset 0 has an end offset of 4.)
///
/// - The locations of the labels to which branches will jump. Branches can tell
/// us about their targets in terms of `MachLabel`s, but we don't know where
/// those `MachLabel`s will be placed in the linear array of instructions. We
/// take the array `label_insn_index` to provide this info: for a label with
/// index `l`, `label_insn_index[l]` is the index of the instruction before
/// which that label is bound.
pub(crate) fn compute<I: VCodeInst>(
insts: &[I],
inst_ends: &[u32],
label_insn_index: &[u32],
) -> ValueLabelsRanges {
let inst_start = |idx: usize| if idx == 0 { 0 } else { inst_ends[idx - 1] };
trace!("compute: insts =");
for i in 0..insts.len() {
trace!(" #{} end: {} -> {:?}", i, inst_ends[i], insts[i]);
}
trace!("label_insn_index: {:?}", label_insn_index);
// Info at each block head, indexed by label.
let mut block_starts: HashMap<u32, AnalysisInfo> = HashMap::new();
// Initialize state at entry.
block_starts.insert(0, AnalysisInfo::new());
// Worklist: label indices for basic blocks.
let mut worklist = Vec::new();
let mut worklist_set = HashSet::new();
worklist.push(0);
worklist_set.insert(0);
while !worklist.is_empty() {
let block = worklist.pop().unwrap();
worklist_set.remove(&block);
let mut state = block_starts.get(&block).unwrap().clone();
trace!("at block {} -> state: {:?}", block, state);
// Iterate for each instruction in the block (we break at the first
// terminator we see).
let mut index = label_insn_index[block as usize];
while index < insts.len() as u32 {
state.step(&insts[index as usize]);
trace!(" -> inst #{}: {:?}", index, insts[index as usize]);
trace!(" --> state: {:?}", state);
let term = insts[index as usize].is_term();
if term.is_term() {
for succ in term.get_succs() {
trace!(" SUCCESSOR block {}", succ.get());
if let Some(succ_state) = block_starts.get_mut(&succ.get()) {
trace!(" orig state: {:?}", succ_state);
if succ_state.intersect_from(&state).changed {
if worklist_set.insert(succ.get()) {
worklist.push(succ.get());
}
trace!(" (changed)");
}
trace!(" new state: {:?}", succ_state);
} else {
// First time seeing this block
block_starts.insert(succ.get(), state.clone());
worklist.push(succ.get());
worklist_set.insert(succ.get());
}
}
break;
}
index += 1;
}
}
// Now iterate over blocks one last time, collecting
// value-label locations.
let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
for block in 0..label_insn_index.len() {
let start_index = label_insn_index[block];
let end_index = if block == label_insn_index.len() - 1 {
insts.len() as u32
} else {
label_insn_index[block + 1]
};
let block = block as u32;
let mut state = block_starts.get(&block).unwrap().clone();
for index in start_index..end_index {
let offset = inst_start(index as usize);
let end = inst_ends[index as usize];
state.step(&insts[index as usize]);
for (label, locs) in &state.label_to_locs {
trace!(" inst {} has label {:?} -> locs {:?}", index, label, locs);
// Find an appropriate loc: a register if possible, otherwise pick the first stack
// loc.
let reg = locs.iter().cloned().find(|l| l.is_reg());
let loc = reg.or_else(|| locs.iter().cloned().find(|l| l.is_stack()));
if let Some(loc) = loc {
let loc = LabelValueLoc::from(loc);
let list = value_labels_ranges.entry(*label).or_insert_with(|| vec![]);
// If the existing location list for this value-label is
// either empty, or has an end location that does not extend
// to the current offset, then we have to append a new
// entry. Otherwise, we can extend the current entry.
//
// Note that `end` is one past the end of the instruction;
// it appears that `end` is exclusive, so a mapping valid at
// offset 5 will have start = 5, end = 6.
if list
.last()
.map(|last| last.end <= offset || last.loc != loc)
.unwrap_or(true)
{
list.push(ValueLocRange {
loc,
start: end,
end: end + 1,
});
} else {
list.last_mut().unwrap().end = end + 1;
}
}
}
}
}
trace!("ret: {:?}", value_labels_ranges);
value_labels_ranges
}