Refactor address_transform.rs to use less memory (#1260)
The crates/debug/src/transform/address_transform.rs is unoptimized in terms of data structures. This PR refactors this file to remove creation of intermediate in-heap structures, thus improves overall performance of the DWARF transformation. * Reduce amount of memory allocated in translate_ranges_raw * refactor translate_ranges * Don't transform non-unit .debug_line * type annotation for TransformRangeXXXIter's * Fix empty generated wasm positions
This commit is contained in:
@@ -2,7 +2,6 @@ use crate::WasmFileInfo;
|
||||
use gimli::write;
|
||||
use more_asserts::assert_le;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::HashMap;
|
||||
use std::iter::FromIterator;
|
||||
use wasmtime_environ::entity::{EntityRef, PrimaryMap};
|
||||
use wasmtime_environ::ir::SourceLoc;
|
||||
@@ -50,13 +49,16 @@ struct Range {
|
||||
positions: Box<[Position]>,
|
||||
}
|
||||
|
||||
type RangeIndex = usize;
|
||||
|
||||
/// Helper function address lookup data. Contains ranges start positions
|
||||
/// index and ranges data. The multiple ranges can include the same
|
||||
/// original source position. The index (B-Tree) uses range start
|
||||
/// position as a key.
|
||||
/// position as a key. The index values reference the ranges array.
|
||||
/// The item are ordered RangeIndex.
|
||||
#[derive(Debug)]
|
||||
struct FuncLookup {
|
||||
index: Vec<(WasmAddress, Box<[usize]>)>,
|
||||
index: Vec<(WasmAddress, Box<[RangeIndex]>)>,
|
||||
ranges: Box<[Range]>,
|
||||
}
|
||||
|
||||
@@ -100,6 +102,7 @@ fn build_function_lookup(
|
||||
let mut ranges = Vec::new();
|
||||
let mut ranges_index = BTreeMap::new();
|
||||
let mut current_range = Vec::new();
|
||||
let mut last_gen_inst_empty = false;
|
||||
for t in &ft.instructions {
|
||||
if t.srcloc.is_default() {
|
||||
continue;
|
||||
@@ -125,13 +128,26 @@ fn build_function_lookup(
|
||||
range_wasm_start = offset;
|
||||
range_gen_start = inst_gen_start;
|
||||
current_range = Vec::new();
|
||||
last_gen_inst_empty = false;
|
||||
}
|
||||
if last_gen_inst_empty && current_range.last().unwrap().gen_start == inst_gen_start {
|
||||
// It is possible that previous inst_gen_start == inst_gen_end, so
|
||||
// make an attempt to merge all such positions with current one.
|
||||
if inst_gen_start < inst_gen_end {
|
||||
let last = current_range.last_mut().unwrap();
|
||||
last.gen_end = inst_gen_end;
|
||||
last_gen_inst_empty = false;
|
||||
}
|
||||
} else {
|
||||
// Continue existing range: add new wasm->generated code position.
|
||||
current_range.push(Position {
|
||||
wasm_pos: offset,
|
||||
gen_start: inst_gen_start,
|
||||
gen_end: inst_gen_end,
|
||||
});
|
||||
// Track if last position was empty (see if-branch above).
|
||||
last_gen_inst_empty = inst_gen_start == inst_gen_end;
|
||||
}
|
||||
// Continue existing range: add new wasm->generated code position.
|
||||
current_range.push(Position {
|
||||
wasm_pos: offset,
|
||||
gen_start: inst_gen_start,
|
||||
gen_end: inst_gen_end,
|
||||
});
|
||||
last_wasm_pos = offset;
|
||||
}
|
||||
let last_gen_addr = ft.body_offset + ft.body_len;
|
||||
@@ -156,12 +172,15 @@ fn build_function_lookup(
|
||||
continue;
|
||||
}
|
||||
if let Some(position) = last_wasm_pos {
|
||||
index.insert(position, active_ranges.clone().into_boxed_slice());
|
||||
let mut sorted_ranges = active_ranges.clone();
|
||||
sorted_ranges.sort();
|
||||
index.insert(position, sorted_ranges.into_boxed_slice());
|
||||
}
|
||||
active_ranges.retain(|r| ranges[*r].wasm_end.cmp(&wasm_start) != std::cmp::Ordering::Less);
|
||||
active_ranges.push(range_index);
|
||||
last_wasm_pos = Some(wasm_start);
|
||||
}
|
||||
active_ranges.sort();
|
||||
index.insert(last_wasm_pos.unwrap(), active_ranges.into_boxed_slice());
|
||||
let index = Vec::from_iter(index.into_iter());
|
||||
(fn_start, fn_end, FuncLookup { index, ranges })
|
||||
@@ -203,14 +222,16 @@ fn build_function_addr_map(
|
||||
map
|
||||
}
|
||||
|
||||
struct TransformRangeIter<'a> {
|
||||
addr: u64,
|
||||
indicies: &'a [usize],
|
||||
// Utility iterator to find all ranges starts for specific Wasm address.
|
||||
// The iterator returns generated addresses sorted by RangeIndex.
|
||||
struct TransformRangeStartIter<'a> {
|
||||
addr: WasmAddress,
|
||||
indicies: &'a [RangeIndex],
|
||||
ranges: &'a [Range],
|
||||
}
|
||||
|
||||
impl<'a> TransformRangeIter<'a> {
|
||||
fn new(func: &'a FuncTransform, addr: u64) -> Self {
|
||||
impl<'a> TransformRangeStartIter<'a> {
|
||||
fn new(func: &'a FuncTransform, addr: WasmAddress) -> Self {
|
||||
let found = match func
|
||||
.lookup
|
||||
.index
|
||||
@@ -226,7 +247,7 @@ impl<'a> TransformRangeIter<'a> {
|
||||
}
|
||||
};
|
||||
if let Some(range_indices) = found {
|
||||
TransformRangeIter {
|
||||
TransformRangeStartIter {
|
||||
addr,
|
||||
indicies: range_indices,
|
||||
ranges: &func.lookup.ranges,
|
||||
@@ -236,8 +257,9 @@ impl<'a> TransformRangeIter<'a> {
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> Iterator for TransformRangeIter<'a> {
|
||||
type Item = (usize, usize);
|
||||
|
||||
impl<'a> Iterator for TransformRangeStartIter<'a> {
|
||||
type Item = (GeneratedAddress, RangeIndex);
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some((first, tail)) = self.indicies.split_first() {
|
||||
let range_index = *first;
|
||||
@@ -263,14 +285,16 @@ impl<'a> Iterator for TransformRangeIter<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
// Utility iterator to find all ranges ends for specific Wasm address.
|
||||
// The iterator returns generated addresses sorted by RangeIndex.
|
||||
struct TransformRangeEndIter<'a> {
|
||||
addr: u64,
|
||||
indicies: &'a [usize],
|
||||
addr: WasmAddress,
|
||||
indicies: &'a [RangeIndex],
|
||||
ranges: &'a [Range],
|
||||
}
|
||||
|
||||
impl<'a> TransformRangeEndIter<'a> {
|
||||
fn new(func: &'a FuncTransform, addr: u64) -> Self {
|
||||
fn new(func: &'a FuncTransform, addr: WasmAddress) -> Self {
|
||||
let found = match func
|
||||
.lookup
|
||||
.index
|
||||
@@ -298,7 +322,7 @@ impl<'a> TransformRangeEndIter<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TransformRangeEndIter<'a> {
|
||||
type Item = (usize, usize);
|
||||
type Item = (GeneratedAddress, RangeIndex);
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
while let Some((first, tail)) = self.indicies.split_first() {
|
||||
let range_index = *first;
|
||||
@@ -326,6 +350,97 @@ impl<'a> Iterator for TransformRangeEndIter<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
// Utility iterator to iterate by translated function ranges.
|
||||
pub struct TransformRangeIter<'a> {
|
||||
func: &'a FuncTransform,
|
||||
start_it: TransformRangeStartIter<'a>,
|
||||
end_it: TransformRangeEndIter<'a>,
|
||||
last_start: Option<(GeneratedAddress, RangeIndex)>,
|
||||
last_end: Option<(GeneratedAddress, RangeIndex)>,
|
||||
}
|
||||
|
||||
impl<'a> TransformRangeIter<'a> {
|
||||
fn new(func: &'a FuncTransform, start: WasmAddress, end: WasmAddress) -> Self {
|
||||
let mut start_it = TransformRangeStartIter::new(func, start);
|
||||
let last_start = start_it.next();
|
||||
let mut end_it = TransformRangeEndIter::new(func, end);
|
||||
let last_end = end_it.next();
|
||||
TransformRangeIter {
|
||||
func,
|
||||
start_it,
|
||||
end_it,
|
||||
last_start,
|
||||
last_end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TransformRangeIter<'a> {
|
||||
type Item = (GeneratedAddress, GeneratedAddress);
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
// Merge TransformRangeStartIter and TransformRangeEndIter data using
|
||||
// FuncLookup index's field propery to be sorted by RangeIndex.
|
||||
let (start, end, range_index): (
|
||||
Option<GeneratedAddress>,
|
||||
Option<GeneratedAddress>,
|
||||
RangeIndex,
|
||||
) = {
|
||||
match (self.last_start.as_ref(), self.last_end.as_ref()) {
|
||||
(Some((s, sri)), Some((e, eri))) => {
|
||||
if sri == eri {
|
||||
// Start and end RangeIndex matched.
|
||||
(Some(*s), Some(*e), *sri)
|
||||
} else if sri < eri {
|
||||
(Some(*s), None, *sri)
|
||||
} else {
|
||||
(None, Some(*e), *eri)
|
||||
}
|
||||
}
|
||||
(Some((s, sri)), None) => (Some(*s), None, *sri),
|
||||
(None, Some((e, eri))) => (None, Some(*e), *eri),
|
||||
(None, None) => {
|
||||
// Reached ends for start and end iterators.
|
||||
return None;
|
||||
}
|
||||
}
|
||||
};
|
||||
let range_start = match start {
|
||||
Some(range_start) => {
|
||||
// Consume start iterator.
|
||||
self.last_start = self.start_it.next();
|
||||
debug_assert!(
|
||||
self.last_start.is_none() || range_start < self.last_start.unwrap().0
|
||||
);
|
||||
range_start
|
||||
}
|
||||
None => {
|
||||
let range = &self.func.lookup.ranges[range_index];
|
||||
range.gen_start
|
||||
}
|
||||
};
|
||||
let range_end = match end {
|
||||
Some(range_end) => {
|
||||
// Consume end iterator.
|
||||
self.last_end = self.end_it.next();
|
||||
debug_assert!(self.last_end.is_none() || range_end < self.last_end.unwrap().0);
|
||||
range_end
|
||||
}
|
||||
None => {
|
||||
let range = &self.func.lookup.ranges[range_index];
|
||||
range.gen_end
|
||||
}
|
||||
};
|
||||
|
||||
if range_start < range_end {
|
||||
return Some((range_start, range_end));
|
||||
}
|
||||
// Throw away empty ranges.
|
||||
debug_assert!(range_start == range_end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AddressTransform {
|
||||
pub fn new(at: &ModuleAddressMap, wasm_file: &WasmFileInfo) -> Self {
|
||||
let code_section_offset = wasm_file.code_section_offset;
|
||||
@@ -384,7 +499,7 @@ impl AddressTransform {
|
||||
let map = &self.map[func.index];
|
||||
return Some((func.index, map.len));
|
||||
}
|
||||
let first_result = TransformRangeIter::new(func, addr).next();
|
||||
let first_result = TransformRangeStartIter::new(func, addr).next();
|
||||
first_result.map(|(address, _)| (func.index, address))
|
||||
} else {
|
||||
// Address was not found: function was not compiled?
|
||||
@@ -404,56 +519,64 @@ impl AddressTransform {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn translate_ranges_raw(
|
||||
&self,
|
||||
pub fn translate_ranges_raw<'a>(
|
||||
&'a self,
|
||||
start: u64,
|
||||
end: u64,
|
||||
) -> Option<(DefinedFuncIndex, Vec<(GeneratedAddress, GeneratedAddress)>)> {
|
||||
) -> Option<(DefinedFuncIndex, impl Iterator<Item = (usize, usize)> + 'a)> {
|
||||
if start == 0 {
|
||||
// It's normally 0 for debug info without the linked code.
|
||||
return None;
|
||||
}
|
||||
if let Some(func) = self.find_func(start) {
|
||||
let mut starts: HashMap<usize, usize> =
|
||||
HashMap::from_iter(TransformRangeIter::new(func, start).map(|(a, r)| (r, a)));
|
||||
let mut result = Vec::new();
|
||||
TransformRangeEndIter::new(func, end).for_each(|(a, r)| {
|
||||
let range_start = if let Some(range_start) = starts.get(&r) {
|
||||
let range_start = *range_start;
|
||||
starts.remove(&r);
|
||||
range_start
|
||||
} else {
|
||||
let range = &func.lookup.ranges[r];
|
||||
range.gen_start
|
||||
};
|
||||
result.push((range_start, a));
|
||||
});
|
||||
for (r, range_start) in starts {
|
||||
let range = &func.lookup.ranges[r];
|
||||
result.push((range_start, range.gen_end));
|
||||
}
|
||||
let result = TransformRangeIter::new(func, start, end);
|
||||
return Some((func.index, result));
|
||||
}
|
||||
// Address was not found: function was not compiled?
|
||||
None
|
||||
}
|
||||
|
||||
pub fn translate_ranges(&self, start: u64, end: u64) -> Vec<(write::Address, u64)> {
|
||||
self.translate_ranges_raw(start, end)
|
||||
.map_or(vec![], |(func_index, ranges)| {
|
||||
ranges
|
||||
.iter()
|
||||
.map(|(start, end)| {
|
||||
(
|
||||
write::Address::Symbol {
|
||||
symbol: func_index.index(),
|
||||
addend: *start as i64,
|
||||
},
|
||||
(*end - *start) as u64,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
pub fn translate_ranges<'a>(
|
||||
&'a self,
|
||||
start: u64,
|
||||
end: u64,
|
||||
) -> impl Iterator<Item = (write::Address, u64)> + 'a {
|
||||
enum TranslateRangesResult<'a> {
|
||||
Empty,
|
||||
Raw {
|
||||
symbol: usize,
|
||||
it: Box<dyn Iterator<Item = (usize, usize)> + 'a>,
|
||||
},
|
||||
}
|
||||
impl<'a> Iterator for TranslateRangesResult<'a> {
|
||||
type Item = (write::Address, u64);
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self {
|
||||
TranslateRangesResult::Empty => None,
|
||||
TranslateRangesResult::Raw { symbol, it } => match it.next() {
|
||||
Some((start, end)) => {
|
||||
debug_assert!(start < end);
|
||||
Some((
|
||||
write::Address::Symbol {
|
||||
symbol: *symbol,
|
||||
addend: start as i64,
|
||||
},
|
||||
(end - start) as u64,
|
||||
))
|
||||
}
|
||||
None => None,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match self.translate_ranges_raw(start, end) {
|
||||
Some((func_index, ranges)) => TranslateRangesResult::Raw {
|
||||
symbol: func_index.index(),
|
||||
it: Box::new(ranges),
|
||||
},
|
||||
None => TranslateRangesResult::Empty,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn map(&self) -> &PrimaryMap<DefinedFuncIndex, FunctionMap> {
|
||||
|
||||
@@ -6,9 +6,8 @@ use gimli::{
|
||||
write, DebugLine, DebugLineOffset, DebugStr, DebuggingInformationEntry, LineEncoding, Unit,
|
||||
};
|
||||
use more_asserts::assert_le;
|
||||
use std::collections::BTreeMap;
|
||||
use std::iter::FromIterator;
|
||||
use wasmtime_environ::entity::EntityRef;
|
||||
use wasmtime_environ::wasm::DefinedFuncIndex;
|
||||
|
||||
#[derive(Debug)]
|
||||
enum SavedLineProgramRow {
|
||||
@@ -28,10 +27,16 @@ enum SavedLineProgramRow {
|
||||
EndOfSequence(u64),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct FuncRows {
|
||||
index: DefinedFuncIndex,
|
||||
sorted_rows: Vec<(u64, SavedLineProgramRow)>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
enum ReadLineProgramState {
|
||||
SequenceEnded,
|
||||
ReadSequence,
|
||||
ReadSequence(DefinedFuncIndex),
|
||||
IgnoreSequence,
|
||||
}
|
||||
|
||||
@@ -119,7 +124,8 @@ where
|
||||
}
|
||||
|
||||
let mut rows = program.rows();
|
||||
let mut saved_rows = BTreeMap::new();
|
||||
let mut func_rows = Vec::new();
|
||||
let mut saved_rows: Vec<(u64, SavedLineProgramRow)> = Vec::new();
|
||||
let mut state = ReadLineProgramState::SequenceEnded;
|
||||
while let Some((_header, row)) = rows.next_row()? {
|
||||
if state == ReadLineProgramState::IgnoreSequence {
|
||||
@@ -129,6 +135,17 @@ where
|
||||
continue;
|
||||
}
|
||||
let saved_row = if row.end_sequence() {
|
||||
let index = match state {
|
||||
ReadLineProgramState::ReadSequence(index) => index,
|
||||
_ => panic!(),
|
||||
};
|
||||
saved_rows.sort_by_key(|r| r.0);
|
||||
func_rows.push(FuncRows {
|
||||
index,
|
||||
sorted_rows: saved_rows,
|
||||
});
|
||||
|
||||
saved_rows = Vec::new();
|
||||
state = ReadLineProgramState::SequenceEnded;
|
||||
SavedLineProgramRow::EndOfSequence(row.address())
|
||||
} else {
|
||||
@@ -138,7 +155,16 @@ where
|
||||
state = ReadLineProgramState::IgnoreSequence;
|
||||
continue;
|
||||
}
|
||||
state = ReadLineProgramState::ReadSequence;
|
||||
match addr_tr.find_func_index(row.address()) {
|
||||
Some(index) => {
|
||||
state = ReadLineProgramState::ReadSequence(index);
|
||||
}
|
||||
None => {
|
||||
// Some non-existent address found.
|
||||
state = ReadLineProgramState::IgnoreSequence;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
SavedLineProgramRow::Normal {
|
||||
address: row.address(),
|
||||
@@ -157,15 +183,21 @@ where
|
||||
isa: row.isa(),
|
||||
}
|
||||
};
|
||||
saved_rows.insert(row.address(), saved_row);
|
||||
saved_rows.push((row.address(), saved_row));
|
||||
}
|
||||
|
||||
let saved_rows = Vec::from_iter(saved_rows.into_iter());
|
||||
for (i, map) in addr_tr.map() {
|
||||
if map.len == 0 {
|
||||
continue; // no code generated
|
||||
}
|
||||
let symbol = i.index();
|
||||
for FuncRows {
|
||||
index,
|
||||
sorted_rows: saved_rows,
|
||||
} in func_rows
|
||||
{
|
||||
let map = match addr_tr.map().get(index) {
|
||||
Some(map) if map.len > 0 => map,
|
||||
_ => {
|
||||
continue; // no code generated
|
||||
}
|
||||
};
|
||||
let symbol = index.index();
|
||||
let base_addr = map.offset;
|
||||
out_program.begin_sequence(Some(write::Address::Symbol { symbol, addend: 0 }));
|
||||
// TODO track and place function declaration line here
|
||||
|
||||
@@ -131,13 +131,7 @@ impl RangeInfoBuilder {
|
||||
RangeInfoBuilder::Ranges(ranges) => {
|
||||
let mut result = Vec::new();
|
||||
for (begin, end) in ranges {
|
||||
for tr in addr_tr.translate_ranges(*begin, *end) {
|
||||
if tr.1 == 0 {
|
||||
// Ignore empty range
|
||||
continue;
|
||||
}
|
||||
result.push(tr);
|
||||
}
|
||||
result.extend(addr_tr.translate_ranges(*begin, *end));
|
||||
}
|
||||
if result.len() != 1 {
|
||||
let range_list = result
|
||||
@@ -200,16 +194,12 @@ impl RangeInfoBuilder {
|
||||
let mut range_list = Vec::new();
|
||||
for (begin, end) in ranges {
|
||||
assert_lt!(begin, end);
|
||||
for tr in addr_tr.translate_ranges(*begin, *end) {
|
||||
if tr.1 == 0 {
|
||||
// Ignore empty range
|
||||
continue;
|
||||
}
|
||||
range_list.push(write::Range::StartLength {
|
||||
range_list.extend(addr_tr.translate_ranges(*begin, *end).map(|tr| {
|
||||
write::Range::StartLength {
|
||||
begin: tr.0,
|
||||
length: tr.1,
|
||||
});
|
||||
}
|
||||
}
|
||||
}));
|
||||
}
|
||||
out_range_lists.add(write::RangeList(range_list))
|
||||
} else {
|
||||
|
||||
@@ -358,8 +358,7 @@ pub fn generate_simulated_dwarf(
|
||||
write::AttributeValue::StringRef(name_id),
|
||||
);
|
||||
|
||||
let f = addr_tr.map().get(i).unwrap();
|
||||
let f_start = f.addresses[0].wasm;
|
||||
let f_start = map.addresses[0].wasm;
|
||||
let wasm_offset = di.wasm_file.code_section_offset + f_start as u64;
|
||||
die.set(
|
||||
gimli::DW_AT_decl_file,
|
||||
|
||||
Reference in New Issue
Block a user