Don't re-parse wasm for debuginfo (#2085)

* Don't re-parse wasm for debuginfo

This commit updates debuginfo parsing to happen during the main
translation of the original wasm module. This avoid re-parsing the wasm
module twice (at least the section-level headers). Additionally this
ties debuginfo directly to a `ModuleTranslation` which makes it easier
to process debuginfo for nested modules in the upcoming module linking
proposal.

The changes here are summarized by taking the `read_debuginfo` function
and merging it with the main module translation that happens which is
driven by cranelift. Some new hooks were added to the module environment
trait to support this, but most of it was integrating with existing hooks.

* Fix tests in debug crate
This commit is contained in:
Alex Crichton
2020-08-03 09:59:20 -05:00
committed by GitHub
parent e108f14620
commit 026fb8d388
18 changed files with 261 additions and 377 deletions

View File

@@ -6,11 +6,9 @@ use anyhow::{bail, ensure, Error};
use object::{RelocationEncoding, RelocationKind};
use std::collections::HashMap;
pub use crate::read_debuginfo::{read_debuginfo, DebugInfoData, WasmFileInfo};
pub use crate::write_debuginfo::{emit_dwarf, DwarfSection, DwarfSectionRelocTarget};
mod gc;
mod read_debuginfo;
mod transform;
mod write_debuginfo;

View File

@@ -1,244 +0,0 @@
use anyhow::{bail, Result};
use gimli::{
DebugAbbrev, DebugAddr, DebugInfo, DebugLine, DebugLineStr, DebugLoc, DebugLocLists,
DebugRanges, DebugRngLists, DebugStr, DebugStrOffsets, DebugTypes, EndianSlice, LittleEndian,
LocationLists, RangeLists,
};
use std::collections::HashMap;
use std::path::PathBuf;
use wasmparser::{self, NameSectionReader, Parser, Payload, TypeDef};
trait Reader: gimli::Reader<Offset = usize, Endian = LittleEndian> {}
impl<'input> Reader for gimli::EndianSlice<'input, LittleEndian> {}
pub use wasmparser::Type as WasmType;
pub type Dwarf<'input> = gimli::Dwarf<gimli::EndianSlice<'input, LittleEndian>>;
#[derive(Debug)]
pub struct FunctionMetadata {
pub params: Box<[WasmType]>,
pub locals: Box<[(u32, WasmType)]>,
}
#[derive(Debug)]
pub struct WasmFileInfo {
pub path: Option<PathBuf>,
pub code_section_offset: u64,
pub imported_func_count: u32,
pub funcs: Box<[FunctionMetadata]>,
}
#[derive(Debug)]
pub struct NameSection {
pub module_name: Option<String>,
pub func_names: HashMap<u32, String>,
pub locals_names: HashMap<u32, HashMap<u32, String>>,
}
#[derive(Debug)]
pub struct DebugInfoData<'a> {
pub dwarf: Dwarf<'a>,
pub name_section: Option<NameSection>,
pub wasm_file: WasmFileInfo,
}
fn convert_sections<'a>(sections: HashMap<&str, &'a [u8]>) -> Result<Dwarf<'a>> {
const EMPTY_SECTION: &[u8] = &[];
let endian = LittleEndian;
let debug_str = DebugStr::new(sections.get(".debug_str").unwrap_or(&EMPTY_SECTION), endian);
let debug_abbrev = DebugAbbrev::new(
sections.get(".debug_abbrev").unwrap_or(&EMPTY_SECTION),
endian,
);
let debug_info = DebugInfo::new(
sections.get(".debug_info").unwrap_or(&EMPTY_SECTION),
endian,
);
let debug_line = DebugLine::new(
sections.get(".debug_line").unwrap_or(&EMPTY_SECTION),
endian,
);
let debug_addr = DebugAddr::from(EndianSlice::new(
sections.get(".debug_addr").unwrap_or(&EMPTY_SECTION),
endian,
));
let debug_line_str = DebugLineStr::from(EndianSlice::new(
sections.get(".debug_line_str").unwrap_or(&EMPTY_SECTION),
endian,
));
let debug_str_sup = DebugStr::from(EndianSlice::new(EMPTY_SECTION, endian));
let debug_ranges = match sections.get(".debug_ranges") {
Some(section) => DebugRanges::new(section, endian),
None => DebugRanges::new(EMPTY_SECTION, endian),
};
let debug_rnglists = match sections.get(".debug_rnglists") {
Some(section) => DebugRngLists::new(section, endian),
None => DebugRngLists::new(EMPTY_SECTION, endian),
};
let ranges = RangeLists::new(debug_ranges, debug_rnglists);
let debug_loc = match sections.get(".debug_loc") {
Some(section) => DebugLoc::new(section, endian),
None => DebugLoc::new(EMPTY_SECTION, endian),
};
let debug_loclists = match sections.get(".debug_loclists") {
Some(section) => DebugLocLists::new(section, endian),
None => DebugLocLists::new(EMPTY_SECTION, endian),
};
let locations = LocationLists::new(debug_loc, debug_loclists);
let debug_str_offsets = DebugStrOffsets::from(EndianSlice::new(
sections.get(".debug_str_offsets").unwrap_or(&EMPTY_SECTION),
endian,
));
if sections.contains_key(".debug_types") {
bail!("Unexpected .debug_types");
}
let debug_types = DebugTypes::from(EndianSlice::new(EMPTY_SECTION, endian));
Ok(Dwarf {
debug_abbrev,
debug_addr,
debug_info,
debug_line,
debug_line_str,
debug_str,
debug_str_offsets,
debug_str_sup,
debug_types,
locations,
ranges,
})
}
fn read_name_section(reader: wasmparser::NameSectionReader) -> wasmparser::Result<NameSection> {
let mut module_name = None;
let mut func_names = HashMap::new();
let mut locals_names = HashMap::new();
for i in reader.into_iter() {
match i? {
wasmparser::Name::Module(m) => {
module_name = Some(String::from(m.get_name()?));
}
wasmparser::Name::Function(f) => {
let mut reader = f.get_map()?;
while let Ok(naming) = reader.read() {
func_names.insert(naming.index, String::from(naming.name));
}
}
wasmparser::Name::Local(l) => {
let mut reader = l.get_function_local_reader()?;
while let Ok(f) = reader.read() {
let mut names = HashMap::new();
let mut reader = f.get_map()?;
while let Ok(naming) = reader.read() {
names.insert(naming.index, String::from(naming.name));
}
locals_names.insert(f.func_index, names);
}
}
}
}
let result = NameSection {
module_name,
func_names,
locals_names,
};
Ok(result)
}
pub fn read_debuginfo(data: &[u8]) -> Result<DebugInfoData> {
let mut sections = HashMap::new();
let mut name_section = None;
let mut code_section_offset = 0;
let mut imported_func_count = 0;
let mut signatures_params: Vec<Box<[WasmType]>> = Vec::new();
let mut func_params_refs: Vec<usize> = Vec::new();
let mut func_locals: Vec<Box<[(u32, WasmType)]>> = Vec::new();
for payload in Parser::new(0).parse_all(data) {
match payload? {
Payload::CustomSection {
name,
data,
data_offset,
} => {
if name.starts_with(".debug_") {
sections.insert(name, data);
} else if name == "name" {
if let Ok(reader) = NameSectionReader::new(data, data_offset) {
if let Ok(section) = read_name_section(reader) {
name_section = Some(section);
}
}
}
}
Payload::TypeSection(s) => {
signatures_params = s
.into_iter()
.map(|ft| {
if let Ok(TypeDef::Func(ft)) = ft {
Ok(ft.params)
} else {
unimplemented!("module linking not implemented yet")
}
})
.collect::<Result<Vec<_>>>()?;
}
Payload::ImportSection(s) => {
for i in s {
if let wasmparser::ImportSectionEntryType::Function(_) = i?.ty {
imported_func_count += 1;
}
}
}
Payload::FunctionSection(s) => {
func_params_refs = s
.into_iter()
.map(|index| Ok(index? as usize))
.collect::<Result<Vec<_>>>()?;
}
Payload::CodeSectionStart { range, .. } => {
code_section_offset = range.start as u64;
}
Payload::CodeSectionEntry(body) => {
let locals = body.get_locals_reader()?;
let locals = locals
.into_iter()
.collect::<Result<Vec<_>, _>>()?
.into_boxed_slice();
func_locals.push(locals);
}
_ => (),
}
}
let func_meta = func_params_refs
.into_iter()
.zip(func_locals.into_iter())
.map(|(params_index, locals)| FunctionMetadata {
params: signatures_params[params_index].clone(),
locals,
})
.collect::<Vec<_>>();
let dwarf = convert_sections(sections)?;
Ok(DebugInfoData {
dwarf,
name_section,
wasm_file: WasmFileInfo {
path: None,
code_section_offset,
imported_func_count,
funcs: func_meta.into_boxed_slice(),
},
})
}

View File

@@ -1,4 +1,3 @@
use crate::WasmFileInfo;
use gimli::write;
use more_asserts::assert_le;
use std::collections::BTreeMap;
@@ -6,6 +5,7 @@ use std::iter::FromIterator;
use wasmtime_environ::entity::{EntityRef, PrimaryMap};
use wasmtime_environ::ir::SourceLoc;
use wasmtime_environ::wasm::DefinedFuncIndex;
use wasmtime_environ::WasmFileInfo;
use wasmtime_environ::{FunctionAddressMap, ModuleAddressMap};
pub type GeneratedAddress = usize;
@@ -602,11 +602,11 @@ impl AddressTransform {
#[cfg(test)]
mod tests {
use super::{build_function_lookup, get_wasm_code_offset, AddressTransform};
use crate::read_debuginfo::WasmFileInfo;
use gimli::write::Address;
use std::iter::FromIterator;
use wasmtime_environ::entity::PrimaryMap;
use wasmtime_environ::ir::SourceLoc;
use wasmtime_environ::WasmFileInfo;
use wasmtime_environ::{FunctionAddressMap, InstructionAddressMap, ModuleAddressMap};
#[test]
@@ -724,7 +724,7 @@ mod tests {
path: None,
code_section_offset: 1,
imported_func_count: 0,
funcs: Box::new([]),
funcs: Vec::new(),
},
);

View File

@@ -683,9 +683,9 @@ mod tests {
}
fn create_mock_address_transform() -> AddressTransform {
use crate::read_debuginfo::WasmFileInfo;
use wasmtime_environ::entity::PrimaryMap;
use wasmtime_environ::ir::SourceLoc;
use wasmtime_environ::WasmFileInfo;
use wasmtime_environ::{FunctionAddressMap, InstructionAddressMap};
let mut module_map = PrimaryMap::new();
let code_section_offset: u32 = 100;
@@ -709,7 +709,7 @@ mod tests {
});
let fi = WasmFileInfo {
code_section_offset: code_section_offset.into(),
funcs: Box::new([]),
funcs: Vec::new(),
imported_func_count: 0,
path: None,
};

View File

@@ -2,7 +2,6 @@ use self::refs::DebugInfoRefsMap;
use self::simulate::generate_simulated_dwarf;
use self::unit::clone_unit;
use crate::gc::build_dependencies;
use crate::DebugInfoData;
use anyhow::Error;
use gimli::{
write, DebugAddr, DebugLine, DebugLineStr, DebugStr, DebugStrOffsets, LocationLists,
@@ -11,6 +10,7 @@ use gimli::{
use std::collections::HashSet;
use thiserror::Error;
use wasmtime_environ::isa::TargetIsa;
use wasmtime_environ::DebugInfoData;
use wasmtime_environ::{ModuleAddressMap, ModuleVmctxInfo, ValueLabelsRanges};
pub use address_transform::AddressTransform;

View File

@@ -1,18 +1,19 @@
use super::expression::{CompiledExpression, FunctionFrameInfo};
use super::utils::{add_internal_types, append_vmctx_info, get_function_frame_info};
use super::AddressTransform;
use crate::read_debuginfo::WasmFileInfo;
use anyhow::{Context, Error};
use gimli::write;
use gimli::{self, LineEncoding};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
use wasmparser::Type as WasmType;
use wasmtime_environ::entity::EntityRef;
use wasmtime_environ::wasm::{get_vmctx_value_label, DefinedFuncIndex};
use wasmtime_environ::{ModuleVmctxInfo, ValueLabelsRanges};
pub use crate::read_debuginfo::{DebugInfoData, FunctionMetadata, WasmType};
use wasmtime_environ::isa::TargetIsa;
use wasmtime_environ::wasm::{get_vmctx_value_label, DefinedFuncIndex};
use wasmtime_environ::WasmFileInfo;
use wasmtime_environ::{DebugInfoData, FunctionMetadata};
use wasmtime_environ::{ModuleVmctxInfo, ValueLabelsRanges};
const PRODUCER_NAME: &str = "wasmtime";
@@ -87,7 +88,7 @@ fn generate_line_info(
Ok(out_program)
}
fn check_invalid_chars_in_name(s: String) -> Option<String> {
fn check_invalid_chars_in_name(s: &str) -> Option<&str> {
if s.contains('\x00') {
None
} else {
@@ -96,16 +97,13 @@ fn check_invalid_chars_in_name(s: String) -> Option<String> {
}
fn autogenerate_dwarf_wasm_path(di: &DebugInfoData) -> PathBuf {
static NEXT_ID: AtomicUsize = AtomicUsize::new(0);
let module_name = di
.name_section
.as_ref()
.and_then(|ns| ns.module_name.to_owned())
.module_name
.and_then(check_invalid_chars_in_name)
.unwrap_or_else(|| unsafe {
static mut GEN_ID: u32 = 0;
GEN_ID += 1;
format!("<gen-{}>", GEN_ID)
});
.map(|s| s.to_string())
.unwrap_or_else(|| format!("<gen-{}>", NEXT_ID.fetch_add(1, SeqCst)));
let path = format!("/<wasm-module>/{}.wasm", module_name);
PathBuf::from(path)
}
@@ -195,7 +193,7 @@ fn generate_vars(
scope_ranges: &[(u64, u64)],
wasm_types: &WasmTypesDieRefs,
func_meta: &FunctionMetadata,
locals_names: Option<&HashMap<u32, String>>,
locals_names: Option<&HashMap<u32, &str>>,
out_strings: &mut write::StringTable,
isa: &dyn TargetIsa,
) -> Result<(), Error> {
@@ -253,7 +251,7 @@ fn generate_vars(
let name_id = match locals_names
.and_then(|m| m.get(&(var_index as u32)))
.and_then(|s| check_invalid_chars_in_name(s.to_owned()))
.and_then(|s| check_invalid_chars_in_name(s))
{
Some(n) => out_strings.add(assert_dwarf_str!(n)),
None => out_strings.add(format!("var{}", var_index)),
@@ -297,14 +295,8 @@ pub fn generate_simulated_dwarf(
.and_then(check_invalid_chars_in_path)
.unwrap_or_else(|| autogenerate_dwarf_wasm_path(di));
let (func_names, locals_names) = if let Some(ref name_section) = di.name_section {
(
Some(&name_section.func_names),
Some(&name_section.locals_names),
)
} else {
(None, None)
};
let func_names = &di.name_section.func_names;
let locals_names = &di.name_section.locals_names;
let imported_func_count = di.wasm_file.imported_func_count;
let (unit, root_id, name_id) = {
@@ -376,8 +368,8 @@ pub fn generate_simulated_dwarf(
let func_index = imported_func_count + (index as u32);
let id = match func_names
.and_then(|m| m.get(&func_index))
.and_then(|s| check_invalid_chars_in_name(s.to_owned()))
.get(&func_index)
.and_then(|s| check_invalid_chars_in_name(s))
{
Some(n) => out_strings.add(assert_dwarf_str!(n)),
None => out_strings.add(format!("wasm-function[{}]", func_index)),
@@ -407,7 +399,7 @@ pub fn generate_simulated_dwarf(
&[(source_range.0, source_range.1)],
&wasm_types,
&di.wasm_file.funcs[index],
locals_names.and_then(|m| m.get(&(index as u32))),
locals_names.get(&(index as u32)),
out_strings,
isa,
)?;

View File

@@ -1,10 +1,10 @@
pub use crate::read_debuginfo::{read_debuginfo, DebugInfoData, WasmFileInfo};
pub use crate::transform::transform_dwarf;
use gimli::write::{Address, Dwarf, EndianVec, FrameTable, Result, Sections, Writer};
use gimli::{RunTimeEndian, SectionId};
use wasmtime_environ::entity::{EntityRef, PrimaryMap};
use wasmtime_environ::isa::{unwind::UnwindInfo, TargetIsa};
use wasmtime_environ::wasm::DefinedFuncIndex;
use wasmtime_environ::DebugInfoData;
use wasmtime_environ::{ModuleAddressMap, ModuleVmctxInfo, ValueLabelsRanges};
#[derive(Clone)]