From 026fb8d388964c7c1bace7019c4fe0d63c584560 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 3 Aug 2020 09:59:20 -0500 Subject: [PATCH] Don't re-parse wasm for debuginfo (#2085) * Don't re-parse wasm for debuginfo This commit updates debuginfo parsing to happen during the main translation of the original wasm module. This avoid re-parsing the wasm module twice (at least the section-level headers). Additionally this ties debuginfo directly to a `ModuleTranslation` which makes it easier to process debuginfo for nested modules in the upcoming module linking proposal. The changes here are summarized by taking the `read_debuginfo` function and merging it with the main module translation that happens which is driven by cranelift. Some new hooks were added to the module environment trait to support this, but most of it was integrating with existing hooks. * Fix tests in debug crate --- Cargo.lock | 1 + cranelift/wasm/src/environ/dummy.rs | 6 +- cranelift/wasm/src/environ/spec.rs | 36 ++- cranelift/wasm/src/module_translator.rs | 14 +- cranelift/wasm/src/sections_translator.rs | 71 +++-- crates/debug/src/lib.rs | 2 - crates/debug/src/read_debuginfo.rs | 244 ------------------ .../debug/src/transform/address_transform.rs | 6 +- crates/debug/src/transform/expression.rs | 4 +- crates/debug/src/transform/mod.rs | 2 +- crates/debug/src/transform/simulate.rs | 44 ++-- crates/debug/src/write_debuginfo.rs | 2 +- crates/environ/Cargo.toml | 1 + crates/environ/src/lib.rs | 5 +- crates/environ/src/module_environ.rs | 168 ++++++++++-- crates/jit/src/compiler.rs | 13 +- crates/jit/src/instantiate.rs | 14 +- src/obj.rs | 5 +- 18 files changed, 261 insertions(+), 377 deletions(-) delete mode 100644 crates/debug/src/read_debuginfo.rs diff --git a/Cargo.lock b/Cargo.lock index 5235a3861e..4bc45bbdef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2466,6 +2466,7 @@ dependencies = [ "errno", "file-per-thread-logger", "filetime", + "gimli 0.21.0", "indexmap", "lazy_static", "libc", diff --git a/cranelift/wasm/src/environ/dummy.rs b/cranelift/wasm/src/environ/dummy.rs index 9047df05d2..5b64254fa3 100644 --- a/cranelift/wasm/src/environ/dummy.rs +++ b/cranelift/wasm/src/environ/dummy.rs @@ -743,13 +743,11 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment { Ok(()) } - fn declare_module_name(&mut self, name: &'data str) -> WasmResult<()> { + fn declare_module_name(&mut self, name: &'data str) { self.module_name = Some(String::from(name)); - Ok(()) } - fn declare_func_name(&mut self, func_index: FuncIndex, name: &'data str) -> WasmResult<()> { + fn declare_func_name(&mut self, func_index: FuncIndex, name: &'data str) { self.function_names[func_index] = String::from(name); - Ok(()) } } diff --git a/cranelift/wasm/src/environ/spec.rs b/cranelift/wasm/src/environ/spec.rs index 9a3498b05e..56d86522b1 100644 --- a/cranelift/wasm/src/environ/spec.rs +++ b/cranelift/wasm/src/environ/spec.rs @@ -66,6 +66,20 @@ impl TryFrom for WasmType { } } +impl From for wasmparser::Type { + fn from(ty: WasmType) -> wasmparser::Type { + match ty { + WasmType::I32 => wasmparser::Type::I32, + WasmType::I64 => wasmparser::Type::I64, + WasmType::F32 => wasmparser::Type::F32, + WasmType::F64 => wasmparser::Type::F64, + WasmType::V128 => wasmparser::Type::V128, + WasmType::FuncRef => wasmparser::Type::FuncRef, + WasmType::ExternRef => wasmparser::Type::ExternRef, + } + } +} + /// WebAssembly function type -- equivalent of `wasmparser`'s FuncType. #[derive(Debug, Clone, Eq, PartialEq, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] @@ -743,10 +757,13 @@ pub trait ModuleEnvironment<'data>: TargetEnvironment { /// Declare a passive data segment. fn declare_passive_data(&mut self, data_index: DataIndex, data: &'data [u8]) -> WasmResult<()>; + /// Indicates how many functions the code section reports and the byte + /// offset of where the code sections starts. + fn reserve_function_bodies(&mut self, bodies: u32, code_section_offset: u64) { + drop((bodies, code_section_offset)); + } + /// Provides the contents of a function body. - /// - /// Note there's no `reserve_function_bodies` function because the number of - /// functions is already provided by `reserve_func_types`. fn define_function_body( &mut self, module_translation_state: &ModuleTranslationState, @@ -773,16 +790,19 @@ pub trait ModuleEnvironment<'data>: TargetEnvironment { /// /// By default this does nothing, but implementations can use this to read /// the module name subsection of the custom name section if desired. - fn declare_module_name(&mut self, _name: &'data str) -> WasmResult<()> { - Ok(()) - } + fn declare_module_name(&mut self, _name: &'data str) {} /// Declares the name of a function to the environment. /// /// By default this does nothing, but implementations can use this to read /// the function name subsection of the custom name section if desired. - fn declare_func_name(&mut self, _func_index: FuncIndex, _name: &'data str) -> WasmResult<()> { - Ok(()) + fn declare_func_name(&mut self, _func_index: FuncIndex, _name: &'data str) {} + + /// Declares the name of a function's local to the environment. + /// + /// By default this does nothing, but implementations can use this to read + /// the local name subsection of the custom name section if desired. + fn declare_local_name(&mut self, _func_index: FuncIndex, _local_index: u32, _name: &'data str) { } /// Indicates that a custom section has been found in the wasm file diff --git a/cranelift/wasm/src/module_translator.rs b/cranelift/wasm/src/module_translator.rs index 9903a2979c..e3cd0b44bf 100644 --- a/cranelift/wasm/src/module_translator.rs +++ b/cranelift/wasm/src/module_translator.rs @@ -59,7 +59,10 @@ pub fn translate_module<'data>( parse_element_section(elements, environ)?; } - Payload::CodeSectionStart { .. } => {} + Payload::CodeSectionStart { count, range, .. } => { + environ.reserve_function_bodies(count, range.start as u64); + } + Payload::CodeSectionEntry(code) => { let mut code = code.get_binary_reader(); let size = code.bytes_remaining(); @@ -91,7 +94,14 @@ pub fn translate_module<'data>( name: "name", data, data_offset, - } => parse_name_section(NameSectionReader::new(data, data_offset)?, environ)?, + } => { + let result = NameSectionReader::new(data, data_offset) + .map_err(|e| e.into()) + .and_then(|s| parse_name_section(s, environ)); + if let Err(e) = result { + log::warn!("failed to parse name section {:?}", e); + } + } Payload::CustomSection { name, data, .. } => environ.custom_section(name, data)?, diff --git a/cranelift/wasm/src/sections_translator.rs b/cranelift/wasm/src/sections_translator.rs index 3bc4d7d144..3267822526 100644 --- a/cranelift/wasm/src/sections_translator.rs +++ b/cranelift/wasm/src/sections_translator.rs @@ -13,7 +13,7 @@ use crate::translation_utils::{ tabletype_to_type, type_to_type, DataIndex, ElemIndex, FuncIndex, Global, GlobalIndex, GlobalInit, Memory, MemoryIndex, SignatureIndex, Table, TableElementType, TableIndex, }; -use crate::{wasm_unsupported, HashMap}; +use crate::wasm_unsupported; use core::convert::TryFrom; use core::convert::TryInto; use cranelift_codegen::ir::immediates::V128Imm; @@ -26,8 +26,8 @@ use wasmparser::{ self, Data, DataKind, DataSectionReader, Element, ElementItem, ElementItems, ElementKind, ElementSectionReader, Export, ExportSectionReader, ExternalKind, FunctionSectionReader, GlobalSectionReader, GlobalType, ImportSectionEntryType, ImportSectionReader, - MemorySectionReader, MemoryType, NameSectionReader, Naming, NamingReader, Operator, - TableSectionReader, Type, TypeDef, TypeSectionReader, + MemorySectionReader, MemoryType, NameSectionReader, Naming, Operator, TableSectionReader, Type, + TypeDef, TypeSectionReader, }; /// Parses the Type section of the wasm module. @@ -404,53 +404,40 @@ pub fn parse_data_section<'data>( /// Parses the Name section of the wasm module. pub fn parse_name_section<'data>( - mut names: NameSectionReader<'data>, + names: NameSectionReader<'data>, environ: &mut dyn ModuleEnvironment<'data>, ) -> WasmResult<()> { - while let Ok(subsection) = names.read() { - match subsection { - wasmparser::Name::Function(function_subsection) => { - if let Some(function_names) = function_subsection - .get_map() - .ok() - .and_then(parse_function_name_subsection) - { - for (index, name) in function_names { - environ.declare_func_name(index, name)?; + for subsection in names { + match subsection? { + wasmparser::Name::Function(f) => { + let mut names = f.get_map()?; + for _ in 0..names.get_count() { + let Naming { index, name } = names.read()?; + // We reserve `u32::MAX` for our own use in cranelift-entity. + if index != u32::max_value() { + environ.declare_func_name(FuncIndex::from_u32(index), name); } } } wasmparser::Name::Module(module) => { - if let Ok(name) = module.get_name() { - environ.declare_module_name(name)?; + let name = module.get_name()?; + environ.declare_module_name(name); + } + wasmparser::Name::Local(l) => { + let mut reader = l.get_function_local_reader()?; + for _ in 0..reader.get_count() { + let f = reader.read()?; + if f.func_index == u32::max_value() { + continue; + } + let mut map = f.get_map()?; + for _ in 0..map.get_count() { + let Naming { index, name } = map.read()?; + environ.declare_local_name(FuncIndex::from_u32(f.func_index), index, name) + } } } - wasmparser::Name::Local(_) => {} - }; + } } Ok(()) } - -fn parse_function_name_subsection( - mut naming_reader: NamingReader<'_>, -) -> Option> { - let mut function_names = HashMap::new(); - for _ in 0..naming_reader.get_count() { - let Naming { index, name } = naming_reader.read().ok()?; - if index == std::u32::MAX { - // We reserve `u32::MAX` for our own use in cranelift-entity. - return None; - } - - if function_names - .insert(FuncIndex::from_u32(index), name) - .is_some() - { - // If the function index has been previously seen, then we - // break out of the loop and early return `None`, because these - // should be unique. - return None; - } - } - Some(function_names) -} diff --git a/crates/debug/src/lib.rs b/crates/debug/src/lib.rs index 8b0a2b0bdb..259c869756 100644 --- a/crates/debug/src/lib.rs +++ b/crates/debug/src/lib.rs @@ -6,11 +6,9 @@ use anyhow::{bail, ensure, Error}; use object::{RelocationEncoding, RelocationKind}; use std::collections::HashMap; -pub use crate::read_debuginfo::{read_debuginfo, DebugInfoData, WasmFileInfo}; pub use crate::write_debuginfo::{emit_dwarf, DwarfSection, DwarfSectionRelocTarget}; mod gc; -mod read_debuginfo; mod transform; mod write_debuginfo; diff --git a/crates/debug/src/read_debuginfo.rs b/crates/debug/src/read_debuginfo.rs deleted file mode 100644 index ad28d9b1be..0000000000 --- a/crates/debug/src/read_debuginfo.rs +++ /dev/null @@ -1,244 +0,0 @@ -use anyhow::{bail, Result}; -use gimli::{ - DebugAbbrev, DebugAddr, DebugInfo, DebugLine, DebugLineStr, DebugLoc, DebugLocLists, - DebugRanges, DebugRngLists, DebugStr, DebugStrOffsets, DebugTypes, EndianSlice, LittleEndian, - LocationLists, RangeLists, -}; -use std::collections::HashMap; -use std::path::PathBuf; -use wasmparser::{self, NameSectionReader, Parser, Payload, TypeDef}; - -trait Reader: gimli::Reader {} - -impl<'input> Reader for gimli::EndianSlice<'input, LittleEndian> {} - -pub use wasmparser::Type as WasmType; - -pub type Dwarf<'input> = gimli::Dwarf>; - -#[derive(Debug)] -pub struct FunctionMetadata { - pub params: Box<[WasmType]>, - pub locals: Box<[(u32, WasmType)]>, -} - -#[derive(Debug)] -pub struct WasmFileInfo { - pub path: Option, - pub code_section_offset: u64, - pub imported_func_count: u32, - pub funcs: Box<[FunctionMetadata]>, -} - -#[derive(Debug)] -pub struct NameSection { - pub module_name: Option, - pub func_names: HashMap, - pub locals_names: HashMap>, -} - -#[derive(Debug)] -pub struct DebugInfoData<'a> { - pub dwarf: Dwarf<'a>, - pub name_section: Option, - pub wasm_file: WasmFileInfo, -} - -fn convert_sections<'a>(sections: HashMap<&str, &'a [u8]>) -> Result> { - const EMPTY_SECTION: &[u8] = &[]; - - let endian = LittleEndian; - let debug_str = DebugStr::new(sections.get(".debug_str").unwrap_or(&EMPTY_SECTION), endian); - let debug_abbrev = DebugAbbrev::new( - sections.get(".debug_abbrev").unwrap_or(&EMPTY_SECTION), - endian, - ); - let debug_info = DebugInfo::new( - sections.get(".debug_info").unwrap_or(&EMPTY_SECTION), - endian, - ); - let debug_line = DebugLine::new( - sections.get(".debug_line").unwrap_or(&EMPTY_SECTION), - endian, - ); - let debug_addr = DebugAddr::from(EndianSlice::new( - sections.get(".debug_addr").unwrap_or(&EMPTY_SECTION), - endian, - )); - - let debug_line_str = DebugLineStr::from(EndianSlice::new( - sections.get(".debug_line_str").unwrap_or(&EMPTY_SECTION), - endian, - )); - let debug_str_sup = DebugStr::from(EndianSlice::new(EMPTY_SECTION, endian)); - - let debug_ranges = match sections.get(".debug_ranges") { - Some(section) => DebugRanges::new(section, endian), - None => DebugRanges::new(EMPTY_SECTION, endian), - }; - let debug_rnglists = match sections.get(".debug_rnglists") { - Some(section) => DebugRngLists::new(section, endian), - None => DebugRngLists::new(EMPTY_SECTION, endian), - }; - let ranges = RangeLists::new(debug_ranges, debug_rnglists); - - let debug_loc = match sections.get(".debug_loc") { - Some(section) => DebugLoc::new(section, endian), - None => DebugLoc::new(EMPTY_SECTION, endian), - }; - let debug_loclists = match sections.get(".debug_loclists") { - Some(section) => DebugLocLists::new(section, endian), - None => DebugLocLists::new(EMPTY_SECTION, endian), - }; - let locations = LocationLists::new(debug_loc, debug_loclists); - - let debug_str_offsets = DebugStrOffsets::from(EndianSlice::new( - sections.get(".debug_str_offsets").unwrap_or(&EMPTY_SECTION), - endian, - )); - - if sections.contains_key(".debug_types") { - bail!("Unexpected .debug_types"); - } - - let debug_types = DebugTypes::from(EndianSlice::new(EMPTY_SECTION, endian)); - - Ok(Dwarf { - debug_abbrev, - debug_addr, - debug_info, - debug_line, - debug_line_str, - debug_str, - debug_str_offsets, - debug_str_sup, - debug_types, - locations, - ranges, - }) -} - -fn read_name_section(reader: wasmparser::NameSectionReader) -> wasmparser::Result { - let mut module_name = None; - let mut func_names = HashMap::new(); - let mut locals_names = HashMap::new(); - for i in reader.into_iter() { - match i? { - wasmparser::Name::Module(m) => { - module_name = Some(String::from(m.get_name()?)); - } - wasmparser::Name::Function(f) => { - let mut reader = f.get_map()?; - while let Ok(naming) = reader.read() { - func_names.insert(naming.index, String::from(naming.name)); - } - } - wasmparser::Name::Local(l) => { - let mut reader = l.get_function_local_reader()?; - while let Ok(f) = reader.read() { - let mut names = HashMap::new(); - let mut reader = f.get_map()?; - while let Ok(naming) = reader.read() { - names.insert(naming.index, String::from(naming.name)); - } - locals_names.insert(f.func_index, names); - } - } - } - } - let result = NameSection { - module_name, - func_names, - locals_names, - }; - Ok(result) -} - -pub fn read_debuginfo(data: &[u8]) -> Result { - let mut sections = HashMap::new(); - let mut name_section = None; - let mut code_section_offset = 0; - let mut imported_func_count = 0; - - let mut signatures_params: Vec> = Vec::new(); - let mut func_params_refs: Vec = Vec::new(); - let mut func_locals: Vec> = Vec::new(); - - for payload in Parser::new(0).parse_all(data) { - match payload? { - Payload::CustomSection { - name, - data, - data_offset, - } => { - if name.starts_with(".debug_") { - sections.insert(name, data); - } else if name == "name" { - if let Ok(reader) = NameSectionReader::new(data, data_offset) { - if let Ok(section) = read_name_section(reader) { - name_section = Some(section); - } - } - } - } - Payload::TypeSection(s) => { - signatures_params = s - .into_iter() - .map(|ft| { - if let Ok(TypeDef::Func(ft)) = ft { - Ok(ft.params) - } else { - unimplemented!("module linking not implemented yet") - } - }) - .collect::>>()?; - } - Payload::ImportSection(s) => { - for i in s { - if let wasmparser::ImportSectionEntryType::Function(_) = i?.ty { - imported_func_count += 1; - } - } - } - Payload::FunctionSection(s) => { - func_params_refs = s - .into_iter() - .map(|index| Ok(index? as usize)) - .collect::>>()?; - } - Payload::CodeSectionStart { range, .. } => { - code_section_offset = range.start as u64; - } - Payload::CodeSectionEntry(body) => { - let locals = body.get_locals_reader()?; - let locals = locals - .into_iter() - .collect::, _>>()? - .into_boxed_slice(); - func_locals.push(locals); - } - _ => (), - } - } - - let func_meta = func_params_refs - .into_iter() - .zip(func_locals.into_iter()) - .map(|(params_index, locals)| FunctionMetadata { - params: signatures_params[params_index].clone(), - locals, - }) - .collect::>(); - - let dwarf = convert_sections(sections)?; - Ok(DebugInfoData { - dwarf, - name_section, - wasm_file: WasmFileInfo { - path: None, - code_section_offset, - imported_func_count, - funcs: func_meta.into_boxed_slice(), - }, - }) -} diff --git a/crates/debug/src/transform/address_transform.rs b/crates/debug/src/transform/address_transform.rs index 71fa050172..db558309ce 100644 --- a/crates/debug/src/transform/address_transform.rs +++ b/crates/debug/src/transform/address_transform.rs @@ -1,4 +1,3 @@ -use crate::WasmFileInfo; use gimli::write; use more_asserts::assert_le; use std::collections::BTreeMap; @@ -6,6 +5,7 @@ use std::iter::FromIterator; use wasmtime_environ::entity::{EntityRef, PrimaryMap}; use wasmtime_environ::ir::SourceLoc; use wasmtime_environ::wasm::DefinedFuncIndex; +use wasmtime_environ::WasmFileInfo; use wasmtime_environ::{FunctionAddressMap, ModuleAddressMap}; pub type GeneratedAddress = usize; @@ -602,11 +602,11 @@ impl AddressTransform { #[cfg(test)] mod tests { use super::{build_function_lookup, get_wasm_code_offset, AddressTransform}; - use crate::read_debuginfo::WasmFileInfo; use gimli::write::Address; use std::iter::FromIterator; use wasmtime_environ::entity::PrimaryMap; use wasmtime_environ::ir::SourceLoc; + use wasmtime_environ::WasmFileInfo; use wasmtime_environ::{FunctionAddressMap, InstructionAddressMap, ModuleAddressMap}; #[test] @@ -724,7 +724,7 @@ mod tests { path: None, code_section_offset: 1, imported_func_count: 0, - funcs: Box::new([]), + funcs: Vec::new(), }, ); diff --git a/crates/debug/src/transform/expression.rs b/crates/debug/src/transform/expression.rs index 8bd3a76302..c92280f69b 100644 --- a/crates/debug/src/transform/expression.rs +++ b/crates/debug/src/transform/expression.rs @@ -683,9 +683,9 @@ mod tests { } fn create_mock_address_transform() -> AddressTransform { - use crate::read_debuginfo::WasmFileInfo; use wasmtime_environ::entity::PrimaryMap; use wasmtime_environ::ir::SourceLoc; + use wasmtime_environ::WasmFileInfo; use wasmtime_environ::{FunctionAddressMap, InstructionAddressMap}; let mut module_map = PrimaryMap::new(); let code_section_offset: u32 = 100; @@ -709,7 +709,7 @@ mod tests { }); let fi = WasmFileInfo { code_section_offset: code_section_offset.into(), - funcs: Box::new([]), + funcs: Vec::new(), imported_func_count: 0, path: None, }; diff --git a/crates/debug/src/transform/mod.rs b/crates/debug/src/transform/mod.rs index 8d6c009e30..c2dbead619 100644 --- a/crates/debug/src/transform/mod.rs +++ b/crates/debug/src/transform/mod.rs @@ -2,7 +2,6 @@ use self::refs::DebugInfoRefsMap; use self::simulate::generate_simulated_dwarf; use self::unit::clone_unit; use crate::gc::build_dependencies; -use crate::DebugInfoData; use anyhow::Error; use gimli::{ write, DebugAddr, DebugLine, DebugLineStr, DebugStr, DebugStrOffsets, LocationLists, @@ -11,6 +10,7 @@ use gimli::{ use std::collections::HashSet; use thiserror::Error; use wasmtime_environ::isa::TargetIsa; +use wasmtime_environ::DebugInfoData; use wasmtime_environ::{ModuleAddressMap, ModuleVmctxInfo, ValueLabelsRanges}; pub use address_transform::AddressTransform; diff --git a/crates/debug/src/transform/simulate.rs b/crates/debug/src/transform/simulate.rs index 4f06ba8aa3..a4a1b2f9e2 100644 --- a/crates/debug/src/transform/simulate.rs +++ b/crates/debug/src/transform/simulate.rs @@ -1,18 +1,19 @@ use super::expression::{CompiledExpression, FunctionFrameInfo}; use super::utils::{add_internal_types, append_vmctx_info, get_function_frame_info}; use super::AddressTransform; -use crate::read_debuginfo::WasmFileInfo; use anyhow::{Context, Error}; use gimli::write; use gimli::{self, LineEncoding}; use std::collections::{HashMap, HashSet}; use std::path::PathBuf; +use std::sync::atomic::{AtomicUsize, Ordering::SeqCst}; +use wasmparser::Type as WasmType; use wasmtime_environ::entity::EntityRef; -use wasmtime_environ::wasm::{get_vmctx_value_label, DefinedFuncIndex}; -use wasmtime_environ::{ModuleVmctxInfo, ValueLabelsRanges}; - -pub use crate::read_debuginfo::{DebugInfoData, FunctionMetadata, WasmType}; use wasmtime_environ::isa::TargetIsa; +use wasmtime_environ::wasm::{get_vmctx_value_label, DefinedFuncIndex}; +use wasmtime_environ::WasmFileInfo; +use wasmtime_environ::{DebugInfoData, FunctionMetadata}; +use wasmtime_environ::{ModuleVmctxInfo, ValueLabelsRanges}; const PRODUCER_NAME: &str = "wasmtime"; @@ -87,7 +88,7 @@ fn generate_line_info( Ok(out_program) } -fn check_invalid_chars_in_name(s: String) -> Option { +fn check_invalid_chars_in_name(s: &str) -> Option<&str> { if s.contains('\x00') { None } else { @@ -96,16 +97,13 @@ fn check_invalid_chars_in_name(s: String) -> Option { } fn autogenerate_dwarf_wasm_path(di: &DebugInfoData) -> PathBuf { + static NEXT_ID: AtomicUsize = AtomicUsize::new(0); let module_name = di .name_section - .as_ref() - .and_then(|ns| ns.module_name.to_owned()) + .module_name .and_then(check_invalid_chars_in_name) - .unwrap_or_else(|| unsafe { - static mut GEN_ID: u32 = 0; - GEN_ID += 1; - format!("", GEN_ID) - }); + .map(|s| s.to_string()) + .unwrap_or_else(|| format!("", NEXT_ID.fetch_add(1, SeqCst))); let path = format!("//{}.wasm", module_name); PathBuf::from(path) } @@ -195,7 +193,7 @@ fn generate_vars( scope_ranges: &[(u64, u64)], wasm_types: &WasmTypesDieRefs, func_meta: &FunctionMetadata, - locals_names: Option<&HashMap>, + locals_names: Option<&HashMap>, out_strings: &mut write::StringTable, isa: &dyn TargetIsa, ) -> Result<(), Error> { @@ -253,7 +251,7 @@ fn generate_vars( let name_id = match locals_names .and_then(|m| m.get(&(var_index as u32))) - .and_then(|s| check_invalid_chars_in_name(s.to_owned())) + .and_then(|s| check_invalid_chars_in_name(s)) { Some(n) => out_strings.add(assert_dwarf_str!(n)), None => out_strings.add(format!("var{}", var_index)), @@ -297,14 +295,8 @@ pub fn generate_simulated_dwarf( .and_then(check_invalid_chars_in_path) .unwrap_or_else(|| autogenerate_dwarf_wasm_path(di)); - let (func_names, locals_names) = if let Some(ref name_section) = di.name_section { - ( - Some(&name_section.func_names), - Some(&name_section.locals_names), - ) - } else { - (None, None) - }; + let func_names = &di.name_section.func_names; + let locals_names = &di.name_section.locals_names; let imported_func_count = di.wasm_file.imported_func_count; let (unit, root_id, name_id) = { @@ -376,8 +368,8 @@ pub fn generate_simulated_dwarf( let func_index = imported_func_count + (index as u32); let id = match func_names - .and_then(|m| m.get(&func_index)) - .and_then(|s| check_invalid_chars_in_name(s.to_owned())) + .get(&func_index) + .and_then(|s| check_invalid_chars_in_name(s)) { Some(n) => out_strings.add(assert_dwarf_str!(n)), None => out_strings.add(format!("wasm-function[{}]", func_index)), @@ -407,7 +399,7 @@ pub fn generate_simulated_dwarf( &[(source_range.0, source_range.1)], &wasm_types, &di.wasm_file.funcs[index], - locals_names.and_then(|m| m.get(&(index as u32))), + locals_names.get(&(index as u32)), out_strings, isa, )?; diff --git a/crates/debug/src/write_debuginfo.rs b/crates/debug/src/write_debuginfo.rs index 68b9e9a004..3292be373a 100644 --- a/crates/debug/src/write_debuginfo.rs +++ b/crates/debug/src/write_debuginfo.rs @@ -1,10 +1,10 @@ -pub use crate::read_debuginfo::{read_debuginfo, DebugInfoData, WasmFileInfo}; pub use crate::transform::transform_dwarf; use gimli::write::{Address, Dwarf, EndianVec, FrameTable, Result, Sections, Writer}; use gimli::{RunTimeEndian, SectionId}; use wasmtime_environ::entity::{EntityRef, PrimaryMap}; use wasmtime_environ::isa::{unwind::UnwindInfo, TargetIsa}; use wasmtime_environ::wasm::DefinedFuncIndex; +use wasmtime_environ::DebugInfoData; use wasmtime_environ::{ModuleAddressMap, ModuleVmctxInfo, ValueLabelsRanges}; #[derive(Clone)] diff --git a/crates/environ/Cargo.toml b/crates/environ/Cargo.toml index ea1b17c16e..3be89c91f5 100644 --- a/crates/environ/Cargo.toml +++ b/crates/environ/Cargo.toml @@ -33,6 +33,7 @@ toml = "0.5.5" file-per-thread-logger = "0.1.1" more-asserts = "0.2.1" cfg-if = "0.1.9" +gimli = "0.21" [target.'cfg(target_os = "windows")'.dependencies] winapi = "0.3.7" diff --git a/crates/environ/src/lib.rs b/crates/environ/src/lib.rs index 276dd539c2..62c3dae93b 100644 --- a/crates/environ/src/lib.rs +++ b/crates/environ/src/lib.rs @@ -57,10 +57,7 @@ pub use crate::lightbeam::Lightbeam; pub use crate::module::{ EntityIndex, MemoryPlan, MemoryStyle, Module, ModuleLocal, TableElements, TablePlan, TableStyle, }; -pub use crate::module_environ::{ - translate_signature, DataInitializer, DataInitializerLocation, FunctionBodyData, - ModuleEnvironment, ModuleTranslation, -}; +pub use crate::module_environ::*; pub use crate::tunables::Tunables; pub use crate::vmoffsets::{TargetSharedSignatureIndex, VMOffsets, INTERRUPTED}; diff --git a/crates/environ/src/module_environ.rs b/crates/environ/src/module_environ.rs index 9499250a12..65fa3d3bd4 100644 --- a/crates/environ/src/module_environ.rs +++ b/crates/environ/src/module_environ.rs @@ -10,17 +10,17 @@ use cranelift_wasm::{ TargetEnvironment, WasmError, WasmFuncType, WasmResult, }; use serde::{Deserialize, Serialize}; +use std::collections::HashMap; use std::convert::TryFrom; +use std::path::PathBuf; use std::sync::Arc; +use wasmparser::Type as WasmType; -/// Contains function data: byte code and its offset in the module. -#[derive(Hash)] -pub struct FunctionBodyData<'a> { - /// Body byte code. - pub data: &'a [u8], - - /// Body offset in the module file. - pub module_offset: usize, +/// Object containing the standalone environment information. +pub struct ModuleEnvironment<'data> { + /// The result to be filled in. + result: ModuleTranslation<'data>, + code_index: u32, } /// The result of translating via `ModuleEnvironment`. Function bodies are not @@ -44,12 +44,60 @@ pub struct ModuleTranslation<'data> { /// The decoded Wasm types for the module. pub module_translation: Option, + + /// DWARF debug information, if enabled, parsed from the module. + pub debuginfo: Option>, } -/// Object containing the standalone environment information. -pub struct ModuleEnvironment<'data> { - /// The result to be filled in. - result: ModuleTranslation<'data>, +/// Contains function data: byte code and its offset in the module. +#[derive(Hash)] +pub struct FunctionBodyData<'a> { + /// Body byte code. + pub data: &'a [u8], + + /// Body offset in the module file. + pub module_offset: usize, +} + +#[derive(Debug, Default)] +#[allow(missing_docs)] +pub struct DebugInfoData<'a> { + pub dwarf: Dwarf<'a>, + pub name_section: NameSection<'a>, + pub wasm_file: WasmFileInfo, + debug_loc: gimli::DebugLoc>, + debug_loclists: gimli::DebugLocLists>, + debug_ranges: gimli::DebugRanges>, + debug_rnglists: gimli::DebugRngLists>, +} + +#[allow(missing_docs)] +pub type Dwarf<'input> = gimli::Dwarf>; + +type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>; + +#[derive(Debug, Default)] +#[allow(missing_docs)] +pub struct NameSection<'a> { + pub module_name: Option<&'a str>, + pub func_names: HashMap, + pub locals_names: HashMap>, +} + +#[derive(Debug, Default)] +#[allow(missing_docs)] +pub struct WasmFileInfo { + pub path: Option, + pub code_section_offset: u64, + pub imported_func_count: u32, + pub funcs: Vec, +} + +#[derive(Debug)] +#[allow(missing_docs)] +pub struct FunctionMetadata { + pub params: Box<[WasmType]>, + pub locals: Box<[(u32, WasmType)]>, } impl<'data> ModuleEnvironment<'data> { @@ -63,7 +111,13 @@ impl<'data> ModuleEnvironment<'data> { data_initializers: Vec::new(), tunables: tunables.clone(), module_translation: None, + debuginfo: if tunables.debug_info { + Some(DebugInfoData::default()) + } else { + None + }, }, + code_index: 0, } } @@ -87,6 +141,42 @@ impl<'data> ModuleEnvironment<'data> { .insert(String::from(name), export); Ok(()) } + + fn register_dwarf_section(&mut self, name: &str, data: &'data [u8]) { + let info = match &mut self.result.debuginfo { + Some(info) => info, + None => return, + }; + if !name.starts_with(".debug_") { + return; + } + let dwarf = &mut info.dwarf; + let endian = gimli::LittleEndian; + let slice = gimli::EndianSlice::new(data, endian); + + match name { + ".debug_str" => dwarf.debug_str = gimli::DebugStr::new(data, endian), + ".debug_abbrev" => dwarf.debug_abbrev = gimli::DebugAbbrev::new(data, endian), + ".debug_info" => dwarf.debug_info = gimli::DebugInfo::new(data, endian), + ".debug_line" => dwarf.debug_line = gimli::DebugLine::new(data, endian), + ".debug_addr" => dwarf.debug_addr = gimli::DebugAddr::from(slice), + ".debug_line_str" => dwarf.debug_line_str = gimli::DebugLineStr::from(slice), + ".debug_str_sup" => dwarf.debug_str_sup = gimli::DebugStr::from(slice), + ".debug_ranges" => info.debug_ranges = gimli::DebugRanges::new(data, endian), + ".debug_rnglists" => info.debug_rnglists = gimli::DebugRngLists::new(data, endian), + ".debug_loc" => info.debug_loc = gimli::DebugLoc::from(slice), + ".debug_loclists" => info.debug_loclists = gimli::DebugLocLists::from(slice), + ".debug_str_offsets" => dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(slice), + ".debug_types" => dwarf.debug_types = gimli::DebugTypes::from(slice), + other => { + log::warn!("unknown debug section `{}`", other); + return; + } + } + + dwarf.ranges = gimli::RangeLists::new(info.debug_ranges, info.debug_rnglists); + dwarf.locations = gimli::LocationLists::new(info.debug_loc, info.debug_loclists); + } } impl<'data> TargetEnvironment for ModuleEnvironment<'data> { @@ -144,6 +234,9 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data EntityIndex::Function(func_index), )); self.result.module.local.num_imported_funcs += 1; + if let Some(info) = &mut self.result.debuginfo { + info.wasm_file.imported_func_count += 1; + } Ok(()) } @@ -346,6 +439,12 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data Ok(()) } + fn reserve_function_bodies(&mut self, _count: u32, offset: u64) { + if let Some(info) = &mut self.result.debuginfo { + info.wasm_file.code_section_offset = offset; + } + } + fn define_function_body( &mut self, _module_translation: &ModuleTranslationState, @@ -356,6 +455,22 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data data: body_bytes, module_offset: body_offset, }); + if let Some(info) = &mut self.result.debuginfo { + let func_index = self.code_index + self.result.module.local.num_imported_funcs as u32; + let func_index = FuncIndex::from_u32(func_index); + let sig_index = self.result.module.local.functions[func_index]; + let sig = &self.result.module.local.signatures[sig_index]; + let mut locals = Vec::new(); + let body = wasmparser::FunctionBody::new(body_offset, body_bytes); + for pair in body.get_locals_reader()? { + locals.push(pair?); + } + info.wasm_file.funcs.push(FunctionMetadata { + locals: locals.into_boxed_slice(), + params: sig.0.params.iter().cloned().map(|i| i.into()).collect(), + }); + } + self.code_index += 1; Ok(()) } @@ -402,20 +517,38 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data Ok(()) } - fn declare_module_name(&mut self, name: &'data str) -> WasmResult<()> { + fn declare_module_name(&mut self, name: &'data str) { self.result.module.name = Some(name.to_string()); - Ok(()) + if let Some(info) = &mut self.result.debuginfo { + info.name_section.module_name = Some(name); + } } - fn declare_func_name(&mut self, func_index: FuncIndex, name: &'data str) -> WasmResult<()> { + fn declare_func_name(&mut self, func_index: FuncIndex, name: &'data str) { self.result .module .func_names .insert(func_index, name.to_string()); - Ok(()) + if let Some(info) = &mut self.result.debuginfo { + info.name_section + .func_names + .insert(func_index.as_u32(), name); + } } - fn custom_section(&mut self, name: &'data str, _data: &'data [u8]) -> WasmResult<()> { + fn declare_local_name(&mut self, func_index: FuncIndex, local: u32, name: &'data str) { + if let Some(info) = &mut self.result.debuginfo { + info.name_section + .locals_names + .entry(func_index.as_u32()) + .or_insert(HashMap::new()) + .insert(local, name); + } + } + + fn custom_section(&mut self, name: &'data str, data: &'data [u8]) -> WasmResult<()> { + self.register_dwarf_section(name, data); + match name { "webidl-bindings" | "wasm-interface-types" => Err(WasmError::Unsupported( "\ @@ -431,6 +564,7 @@ and for re-adding support for interface types you can see this issue: " .to_owned(), )), + // skip other sections _ => Ok(()), } diff --git a/crates/jit/src/compiler.rs b/crates/jit/src/compiler.rs index 2938e5766c..fa3bbe956b 100644 --- a/crates/jit/src/compiler.rs +++ b/crates/jit/src/compiler.rs @@ -5,12 +5,12 @@ use crate::object::{build_object, ObjectUnwindInfo}; use cranelift_codegen::ir; use object::write::Object; use std::hash::{Hash, Hasher}; -use wasmtime_debug::{emit_dwarf, DebugInfoData, DwarfSection}; +use wasmtime_debug::{emit_dwarf, DwarfSection}; use wasmtime_environ::entity::{EntityRef, PrimaryMap}; use wasmtime_environ::isa::{unwind::UnwindInfo, TargetFrontendConfig, TargetIsa}; use wasmtime_environ::wasm::{DefinedFuncIndex, DefinedMemoryIndex, MemoryIndex}; use wasmtime_environ::{ - Compiler as _C, Module, ModuleAddressMap, ModuleMemoryOffset, ModuleTranslation, + Compiler as _C, DebugInfoData, Module, ModuleAddressMap, ModuleMemoryOffset, ModuleTranslation, ModuleVmctxInfo, StackMaps, Traps, Tunables, VMOffsets, ValueLabelsRanges, }; @@ -61,7 +61,7 @@ fn _assert_compiler_send_sync() { fn transform_dwarf_data( isa: &dyn TargetIsa, module: &Module, - debug_data: DebugInfoData, + debug_data: &DebugInfoData, address_transform: &ModuleAddressMap, value_ranges: &ValueLabelsRanges, stack_slots: PrimaryMap, @@ -86,7 +86,7 @@ fn transform_dwarf_data( }; emit_dwarf( isa, - &debug_data, + debug_data, &address_transform, &module_vmctx_info, &value_ranges, @@ -129,7 +129,6 @@ impl Compiler { pub(crate) fn compile<'data>( &self, translation: &ModuleTranslation, - debug_data: Option, ) -> Result { let ( compilation, @@ -152,12 +151,12 @@ impl Compiler { } .map_err(SetupError::Compile)?; - let dwarf_sections = if debug_data.is_some() && !compilation.is_empty() { + let dwarf_sections = if translation.debuginfo.is_some() && !compilation.is_empty() { let unwind_info = compilation.unwind_info(); transform_dwarf_data( &*self.isa, &translation.module, - debug_data.unwrap(), + translation.debuginfo.as_ref().unwrap(), &address_transform, &value_ranges, stack_slots, diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index 0e50a927d6..65931b364a 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -15,7 +15,7 @@ use std::any::Any; use std::collections::HashMap; use std::sync::Arc; use thiserror::Error; -use wasmtime_debug::{create_gdbjit_image, read_debuginfo}; +use wasmtime_debug::create_gdbjit_image; use wasmtime_environ::entity::{BoxedSlice, PrimaryMap}; use wasmtime_environ::isa::TargetIsa; use wasmtime_environ::wasm::{DefinedFuncIndex, SignatureIndex}; @@ -89,21 +89,13 @@ impl CompilationArtifacts { .translate(data) .map_err(|error| SetupError::Compile(CompileError::Wasm(error)))?; - let debug_info = compiler.tunables().debug_info; - - let mut debug_data = None; - if debug_info { - // TODO Do we want to ignore invalid DWARF data? - debug_data = Some(read_debuginfo(&data)?); - } - let Compilation { obj, unwind_info, traps, stack_maps, address_transform, - } = compiler.compile(&translation, debug_data)?; + } = compiler.compile(&translation)?; let ModuleTranslation { module, @@ -131,7 +123,7 @@ impl CompilationArtifacts { traps, stack_maps, address_transform, - debug_info, + debug_info: compiler.tunables().debug_info, }) } } diff --git a/src/obj.rs b/src/obj.rs index b61a61650b..cce633e928 100644 --- a/src/obj.rs +++ b/src/obj.rs @@ -2,7 +2,7 @@ use anyhow::{anyhow, bail, Context as _, Result}; use object::write::Object; use target_lexicon::Triple; use wasmtime::Strategy; -use wasmtime_debug::{emit_dwarf, read_debuginfo}; +use wasmtime_debug::emit_dwarf; #[cfg(feature = "lightbeam")] use wasmtime_environ::Lightbeam; use wasmtime_environ::{ @@ -103,8 +103,7 @@ pub fn compile_to_obj( } }; - let dwarf_sections = if debug_info { - let debug_data = read_debuginfo(wasm).context("failed to emit DWARF")?; + let dwarf_sections = if let Some(debug_data) = &translation.debuginfo { emit_dwarf( &*isa, &debug_data,