Refactor address_transform.rs to use less memory (#1260)

The crates/debug/src/transform/address_transform.rs is unoptimized in terms of data structures. This PR refactors this file to remove creation of intermediate in-heap structures, thus improves overall performance of the DWARF transformation.

* Reduce amount of memory allocated in translate_ranges_raw
* refactor translate_ranges
* Don't transform non-unit .debug_line
* type annotation for TransformRangeXXXIter's
* Fix empty generated wasm positions
This commit is contained in:
Yury Delendik
2020-03-23 16:36:29 -05:00
committed by GitHub
parent 2fdc7f1a8e
commit 021ebb3748
4 changed files with 233 additions and 89 deletions

View File

@@ -2,7 +2,6 @@ use crate::WasmFileInfo;
use gimli::write; use gimli::write;
use more_asserts::assert_le; use more_asserts::assert_le;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::collections::HashMap;
use std::iter::FromIterator; use std::iter::FromIterator;
use wasmtime_environ::entity::{EntityRef, PrimaryMap}; use wasmtime_environ::entity::{EntityRef, PrimaryMap};
use wasmtime_environ::ir::SourceLoc; use wasmtime_environ::ir::SourceLoc;
@@ -50,13 +49,16 @@ struct Range {
positions: Box<[Position]>, positions: Box<[Position]>,
} }
type RangeIndex = usize;
/// Helper function address lookup data. Contains ranges start positions /// Helper function address lookup data. Contains ranges start positions
/// index and ranges data. The multiple ranges can include the same /// index and ranges data. The multiple ranges can include the same
/// original source position. The index (B-Tree) uses range start /// original source position. The index (B-Tree) uses range start
/// position as a key. /// position as a key. The index values reference the ranges array.
/// The item are ordered RangeIndex.
#[derive(Debug)] #[derive(Debug)]
struct FuncLookup { struct FuncLookup {
index: Vec<(WasmAddress, Box<[usize]>)>, index: Vec<(WasmAddress, Box<[RangeIndex]>)>,
ranges: Box<[Range]>, ranges: Box<[Range]>,
} }
@@ -100,6 +102,7 @@ fn build_function_lookup(
let mut ranges = Vec::new(); let mut ranges = Vec::new();
let mut ranges_index = BTreeMap::new(); let mut ranges_index = BTreeMap::new();
let mut current_range = Vec::new(); let mut current_range = Vec::new();
let mut last_gen_inst_empty = false;
for t in &ft.instructions { for t in &ft.instructions {
if t.srcloc.is_default() { if t.srcloc.is_default() {
continue; continue;
@@ -125,13 +128,26 @@ fn build_function_lookup(
range_wasm_start = offset; range_wasm_start = offset;
range_gen_start = inst_gen_start; range_gen_start = inst_gen_start;
current_range = Vec::new(); current_range = Vec::new();
last_gen_inst_empty = false;
} }
if last_gen_inst_empty && current_range.last().unwrap().gen_start == inst_gen_start {
// It is possible that previous inst_gen_start == inst_gen_end, so
// make an attempt to merge all such positions with current one.
if inst_gen_start < inst_gen_end {
let last = current_range.last_mut().unwrap();
last.gen_end = inst_gen_end;
last_gen_inst_empty = false;
}
} else {
// Continue existing range: add new wasm->generated code position. // Continue existing range: add new wasm->generated code position.
current_range.push(Position { current_range.push(Position {
wasm_pos: offset, wasm_pos: offset,
gen_start: inst_gen_start, gen_start: inst_gen_start,
gen_end: inst_gen_end, gen_end: inst_gen_end,
}); });
// Track if last position was empty (see if-branch above).
last_gen_inst_empty = inst_gen_start == inst_gen_end;
}
last_wasm_pos = offset; last_wasm_pos = offset;
} }
let last_gen_addr = ft.body_offset + ft.body_len; let last_gen_addr = ft.body_offset + ft.body_len;
@@ -156,12 +172,15 @@ fn build_function_lookup(
continue; continue;
} }
if let Some(position) = last_wasm_pos { if let Some(position) = last_wasm_pos {
index.insert(position, active_ranges.clone().into_boxed_slice()); let mut sorted_ranges = active_ranges.clone();
sorted_ranges.sort();
index.insert(position, sorted_ranges.into_boxed_slice());
} }
active_ranges.retain(|r| ranges[*r].wasm_end.cmp(&wasm_start) != std::cmp::Ordering::Less); active_ranges.retain(|r| ranges[*r].wasm_end.cmp(&wasm_start) != std::cmp::Ordering::Less);
active_ranges.push(range_index); active_ranges.push(range_index);
last_wasm_pos = Some(wasm_start); last_wasm_pos = Some(wasm_start);
} }
active_ranges.sort();
index.insert(last_wasm_pos.unwrap(), active_ranges.into_boxed_slice()); index.insert(last_wasm_pos.unwrap(), active_ranges.into_boxed_slice());
let index = Vec::from_iter(index.into_iter()); let index = Vec::from_iter(index.into_iter());
(fn_start, fn_end, FuncLookup { index, ranges }) (fn_start, fn_end, FuncLookup { index, ranges })
@@ -203,14 +222,16 @@ fn build_function_addr_map(
map map
} }
struct TransformRangeIter<'a> { // Utility iterator to find all ranges starts for specific Wasm address.
addr: u64, // The iterator returns generated addresses sorted by RangeIndex.
indicies: &'a [usize], struct TransformRangeStartIter<'a> {
addr: WasmAddress,
indicies: &'a [RangeIndex],
ranges: &'a [Range], ranges: &'a [Range],
} }
impl<'a> TransformRangeIter<'a> { impl<'a> TransformRangeStartIter<'a> {
fn new(func: &'a FuncTransform, addr: u64) -> Self { fn new(func: &'a FuncTransform, addr: WasmAddress) -> Self {
let found = match func let found = match func
.lookup .lookup
.index .index
@@ -226,7 +247,7 @@ impl<'a> TransformRangeIter<'a> {
} }
}; };
if let Some(range_indices) = found { if let Some(range_indices) = found {
TransformRangeIter { TransformRangeStartIter {
addr, addr,
indicies: range_indices, indicies: range_indices,
ranges: &func.lookup.ranges, ranges: &func.lookup.ranges,
@@ -236,8 +257,9 @@ impl<'a> TransformRangeIter<'a> {
} }
} }
} }
impl<'a> Iterator for TransformRangeIter<'a> {
type Item = (usize, usize); impl<'a> Iterator for TransformRangeStartIter<'a> {
type Item = (GeneratedAddress, RangeIndex);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if let Some((first, tail)) = self.indicies.split_first() { if let Some((first, tail)) = self.indicies.split_first() {
let range_index = *first; let range_index = *first;
@@ -263,14 +285,16 @@ impl<'a> Iterator for TransformRangeIter<'a> {
} }
} }
// Utility iterator to find all ranges ends for specific Wasm address.
// The iterator returns generated addresses sorted by RangeIndex.
struct TransformRangeEndIter<'a> { struct TransformRangeEndIter<'a> {
addr: u64, addr: WasmAddress,
indicies: &'a [usize], indicies: &'a [RangeIndex],
ranges: &'a [Range], ranges: &'a [Range],
} }
impl<'a> TransformRangeEndIter<'a> { impl<'a> TransformRangeEndIter<'a> {
fn new(func: &'a FuncTransform, addr: u64) -> Self { fn new(func: &'a FuncTransform, addr: WasmAddress) -> Self {
let found = match func let found = match func
.lookup .lookup
.index .index
@@ -298,7 +322,7 @@ impl<'a> TransformRangeEndIter<'a> {
} }
impl<'a> Iterator for TransformRangeEndIter<'a> { impl<'a> Iterator for TransformRangeEndIter<'a> {
type Item = (usize, usize); type Item = (GeneratedAddress, RangeIndex);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
while let Some((first, tail)) = self.indicies.split_first() { while let Some((first, tail)) = self.indicies.split_first() {
let range_index = *first; let range_index = *first;
@@ -326,6 +350,97 @@ impl<'a> Iterator for TransformRangeEndIter<'a> {
} }
} }
// Utility iterator to iterate by translated function ranges.
pub struct TransformRangeIter<'a> {
func: &'a FuncTransform,
start_it: TransformRangeStartIter<'a>,
end_it: TransformRangeEndIter<'a>,
last_start: Option<(GeneratedAddress, RangeIndex)>,
last_end: Option<(GeneratedAddress, RangeIndex)>,
}
impl<'a> TransformRangeIter<'a> {
fn new(func: &'a FuncTransform, start: WasmAddress, end: WasmAddress) -> Self {
let mut start_it = TransformRangeStartIter::new(func, start);
let last_start = start_it.next();
let mut end_it = TransformRangeEndIter::new(func, end);
let last_end = end_it.next();
TransformRangeIter {
func,
start_it,
end_it,
last_start,
last_end,
}
}
}
impl<'a> Iterator for TransformRangeIter<'a> {
type Item = (GeneratedAddress, GeneratedAddress);
fn next(&mut self) -> Option<Self::Item> {
loop {
// Merge TransformRangeStartIter and TransformRangeEndIter data using
// FuncLookup index's field propery to be sorted by RangeIndex.
let (start, end, range_index): (
Option<GeneratedAddress>,
Option<GeneratedAddress>,
RangeIndex,
) = {
match (self.last_start.as_ref(), self.last_end.as_ref()) {
(Some((s, sri)), Some((e, eri))) => {
if sri == eri {
// Start and end RangeIndex matched.
(Some(*s), Some(*e), *sri)
} else if sri < eri {
(Some(*s), None, *sri)
} else {
(None, Some(*e), *eri)
}
}
(Some((s, sri)), None) => (Some(*s), None, *sri),
(None, Some((e, eri))) => (None, Some(*e), *eri),
(None, None) => {
// Reached ends for start and end iterators.
return None;
}
}
};
let range_start = match start {
Some(range_start) => {
// Consume start iterator.
self.last_start = self.start_it.next();
debug_assert!(
self.last_start.is_none() || range_start < self.last_start.unwrap().0
);
range_start
}
None => {
let range = &self.func.lookup.ranges[range_index];
range.gen_start
}
};
let range_end = match end {
Some(range_end) => {
// Consume end iterator.
self.last_end = self.end_it.next();
debug_assert!(self.last_end.is_none() || range_end < self.last_end.unwrap().0);
range_end
}
None => {
let range = &self.func.lookup.ranges[range_index];
range.gen_end
}
};
if range_start < range_end {
return Some((range_start, range_end));
}
// Throw away empty ranges.
debug_assert!(range_start == range_end);
}
}
}
impl AddressTransform { impl AddressTransform {
pub fn new(at: &ModuleAddressMap, wasm_file: &WasmFileInfo) -> Self { pub fn new(at: &ModuleAddressMap, wasm_file: &WasmFileInfo) -> Self {
let code_section_offset = wasm_file.code_section_offset; let code_section_offset = wasm_file.code_section_offset;
@@ -384,7 +499,7 @@ impl AddressTransform {
let map = &self.map[func.index]; let map = &self.map[func.index];
return Some((func.index, map.len)); return Some((func.index, map.len));
} }
let first_result = TransformRangeIter::new(func, addr).next(); let first_result = TransformRangeStartIter::new(func, addr).next();
first_result.map(|(address, _)| (func.index, address)) first_result.map(|(address, _)| (func.index, address))
} else { } else {
// Address was not found: function was not compiled? // Address was not found: function was not compiled?
@@ -404,56 +519,64 @@ impl AddressTransform {
}) })
} }
pub fn translate_ranges_raw( pub fn translate_ranges_raw<'a>(
&self, &'a self,
start: u64, start: u64,
end: u64, end: u64,
) -> Option<(DefinedFuncIndex, Vec<(GeneratedAddress, GeneratedAddress)>)> { ) -> Option<(DefinedFuncIndex, impl Iterator<Item = (usize, usize)> + 'a)> {
if start == 0 { if start == 0 {
// It's normally 0 for debug info without the linked code. // It's normally 0 for debug info without the linked code.
return None; return None;
} }
if let Some(func) = self.find_func(start) { if let Some(func) = self.find_func(start) {
let mut starts: HashMap<usize, usize> = let result = TransformRangeIter::new(func, start, end);
HashMap::from_iter(TransformRangeIter::new(func, start).map(|(a, r)| (r, a)));
let mut result = Vec::new();
TransformRangeEndIter::new(func, end).for_each(|(a, r)| {
let range_start = if let Some(range_start) = starts.get(&r) {
let range_start = *range_start;
starts.remove(&r);
range_start
} else {
let range = &func.lookup.ranges[r];
range.gen_start
};
result.push((range_start, a));
});
for (r, range_start) in starts {
let range = &func.lookup.ranges[r];
result.push((range_start, range.gen_end));
}
return Some((func.index, result)); return Some((func.index, result));
} }
// Address was not found: function was not compiled? // Address was not found: function was not compiled?
None None
} }
pub fn translate_ranges(&self, start: u64, end: u64) -> Vec<(write::Address, u64)> { pub fn translate_ranges<'a>(
self.translate_ranges_raw(start, end) &'a self,
.map_or(vec![], |(func_index, ranges)| { start: u64,
ranges end: u64,
.iter() ) -> impl Iterator<Item = (write::Address, u64)> + 'a {
.map(|(start, end)| { enum TranslateRangesResult<'a> {
( Empty,
write::Address::Symbol { Raw {
symbol: func_index.index(), symbol: usize,
addend: *start as i64, it: Box<dyn Iterator<Item = (usize, usize)> + 'a>,
}, },
(*end - *start) as u64, }
) impl<'a> Iterator for TranslateRangesResult<'a> {
}) type Item = (write::Address, u64);
.collect::<Vec<_>>() fn next(&mut self) -> Option<Self::Item> {
}) match self {
TranslateRangesResult::Empty => None,
TranslateRangesResult::Raw { symbol, it } => match it.next() {
Some((start, end)) => {
debug_assert!(start < end);
Some((
write::Address::Symbol {
symbol: *symbol,
addend: start as i64,
},
(end - start) as u64,
))
}
None => None,
},
}
}
}
match self.translate_ranges_raw(start, end) {
Some((func_index, ranges)) => TranslateRangesResult::Raw {
symbol: func_index.index(),
it: Box::new(ranges),
},
None => TranslateRangesResult::Empty,
}
} }
pub fn map(&self) -> &PrimaryMap<DefinedFuncIndex, FunctionMap> { pub fn map(&self) -> &PrimaryMap<DefinedFuncIndex, FunctionMap> {

View File

@@ -6,9 +6,8 @@ use gimli::{
write, DebugLine, DebugLineOffset, DebugStr, DebuggingInformationEntry, LineEncoding, Unit, write, DebugLine, DebugLineOffset, DebugStr, DebuggingInformationEntry, LineEncoding, Unit,
}; };
use more_asserts::assert_le; use more_asserts::assert_le;
use std::collections::BTreeMap;
use std::iter::FromIterator;
use wasmtime_environ::entity::EntityRef; use wasmtime_environ::entity::EntityRef;
use wasmtime_environ::wasm::DefinedFuncIndex;
#[derive(Debug)] #[derive(Debug)]
enum SavedLineProgramRow { enum SavedLineProgramRow {
@@ -28,10 +27,16 @@ enum SavedLineProgramRow {
EndOfSequence(u64), EndOfSequence(u64),
} }
#[derive(Debug)]
struct FuncRows {
index: DefinedFuncIndex,
sorted_rows: Vec<(u64, SavedLineProgramRow)>,
}
#[derive(Debug, Eq, PartialEq)] #[derive(Debug, Eq, PartialEq)]
enum ReadLineProgramState { enum ReadLineProgramState {
SequenceEnded, SequenceEnded,
ReadSequence, ReadSequence(DefinedFuncIndex),
IgnoreSequence, IgnoreSequence,
} }
@@ -119,7 +124,8 @@ where
} }
let mut rows = program.rows(); let mut rows = program.rows();
let mut saved_rows = BTreeMap::new(); let mut func_rows = Vec::new();
let mut saved_rows: Vec<(u64, SavedLineProgramRow)> = Vec::new();
let mut state = ReadLineProgramState::SequenceEnded; let mut state = ReadLineProgramState::SequenceEnded;
while let Some((_header, row)) = rows.next_row()? { while let Some((_header, row)) = rows.next_row()? {
if state == ReadLineProgramState::IgnoreSequence { if state == ReadLineProgramState::IgnoreSequence {
@@ -129,6 +135,17 @@ where
continue; continue;
} }
let saved_row = if row.end_sequence() { let saved_row = if row.end_sequence() {
let index = match state {
ReadLineProgramState::ReadSequence(index) => index,
_ => panic!(),
};
saved_rows.sort_by_key(|r| r.0);
func_rows.push(FuncRows {
index,
sorted_rows: saved_rows,
});
saved_rows = Vec::new();
state = ReadLineProgramState::SequenceEnded; state = ReadLineProgramState::SequenceEnded;
SavedLineProgramRow::EndOfSequence(row.address()) SavedLineProgramRow::EndOfSequence(row.address())
} else { } else {
@@ -138,7 +155,16 @@ where
state = ReadLineProgramState::IgnoreSequence; state = ReadLineProgramState::IgnoreSequence;
continue; continue;
} }
state = ReadLineProgramState::ReadSequence; match addr_tr.find_func_index(row.address()) {
Some(index) => {
state = ReadLineProgramState::ReadSequence(index);
}
None => {
// Some non-existent address found.
state = ReadLineProgramState::IgnoreSequence;
continue;
}
}
} }
SavedLineProgramRow::Normal { SavedLineProgramRow::Normal {
address: row.address(), address: row.address(),
@@ -157,15 +183,21 @@ where
isa: row.isa(), isa: row.isa(),
} }
}; };
saved_rows.insert(row.address(), saved_row); saved_rows.push((row.address(), saved_row));
} }
let saved_rows = Vec::from_iter(saved_rows.into_iter()); for FuncRows {
for (i, map) in addr_tr.map() { index,
if map.len == 0 { sorted_rows: saved_rows,
} in func_rows
{
let map = match addr_tr.map().get(index) {
Some(map) if map.len > 0 => map,
_ => {
continue; // no code generated continue; // no code generated
} }
let symbol = i.index(); };
let symbol = index.index();
let base_addr = map.offset; let base_addr = map.offset;
out_program.begin_sequence(Some(write::Address::Symbol { symbol, addend: 0 })); out_program.begin_sequence(Some(write::Address::Symbol { symbol, addend: 0 }));
// TODO track and place function declaration line here // TODO track and place function declaration line here

View File

@@ -131,13 +131,7 @@ impl RangeInfoBuilder {
RangeInfoBuilder::Ranges(ranges) => { RangeInfoBuilder::Ranges(ranges) => {
let mut result = Vec::new(); let mut result = Vec::new();
for (begin, end) in ranges { for (begin, end) in ranges {
for tr in addr_tr.translate_ranges(*begin, *end) { result.extend(addr_tr.translate_ranges(*begin, *end));
if tr.1 == 0 {
// Ignore empty range
continue;
}
result.push(tr);
}
} }
if result.len() != 1 { if result.len() != 1 {
let range_list = result let range_list = result
@@ -200,16 +194,12 @@ impl RangeInfoBuilder {
let mut range_list = Vec::new(); let mut range_list = Vec::new();
for (begin, end) in ranges { for (begin, end) in ranges {
assert_lt!(begin, end); assert_lt!(begin, end);
for tr in addr_tr.translate_ranges(*begin, *end) { range_list.extend(addr_tr.translate_ranges(*begin, *end).map(|tr| {
if tr.1 == 0 { write::Range::StartLength {
// Ignore empty range
continue;
}
range_list.push(write::Range::StartLength {
begin: tr.0, begin: tr.0,
length: tr.1, length: tr.1,
});
} }
}));
} }
out_range_lists.add(write::RangeList(range_list)) out_range_lists.add(write::RangeList(range_list))
} else { } else {

View File

@@ -358,8 +358,7 @@ pub fn generate_simulated_dwarf(
write::AttributeValue::StringRef(name_id), write::AttributeValue::StringRef(name_id),
); );
let f = addr_tr.map().get(i).unwrap(); let f_start = map.addresses[0].wasm;
let f_start = f.addresses[0].wasm;
let wasm_offset = di.wasm_file.code_section_offset + f_start as u64; let wasm_offset = di.wasm_file.code_section_offset + f_start as u64;
die.set( die.set(
gimli::DW_AT_decl_file, gimli::DW_AT_decl_file,