diff --git a/crates/cranelift/src/debug/transform/simulate.rs b/crates/cranelift/src/debug/transform/simulate.rs index b27b78a01b..de023a8dbf 100644 --- a/crates/cranelift/src/debug/transform/simulate.rs +++ b/crates/cranelift/src/debug/transform/simulate.rs @@ -13,7 +13,7 @@ use std::path::PathBuf; use std::sync::atomic::{AtomicUsize, Ordering::SeqCst}; use wasmparser::Type as WasmType; use wasmtime_environ::{ - DebugInfoData, DefinedFuncIndex, EntityRef, FunctionMetadata, WasmFileInfo, + DebugInfoData, DefinedFuncIndex, EntityRef, FuncIndex, FunctionMetadata, WasmFileInfo, }; const PRODUCER_NAME: &str = "wasmtime"; @@ -369,7 +369,7 @@ pub fn generate_simulated_dwarf( let func_index = imported_func_count + (index as u32); let id = match func_names - .get(&func_index) + .get(&FuncIndex::from_u32(func_index)) .and_then(|s| check_invalid_chars_in_name(s)) { Some(n) => out_strings.add(assert_dwarf_str!(n)), @@ -400,7 +400,7 @@ pub fn generate_simulated_dwarf( &[(source_range.0, source_range.1)], &wasm_types, &di.wasm_file.funcs[index], - locals_names.get(&(index as u32)), + locals_names.get(&FuncIndex::from_u32(index as u32)), out_strings, isa, )?; diff --git a/crates/environ/src/module.rs b/crates/environ/src/module.rs index abf03753c3..fded370228 100644 --- a/crates/environ/src/module.rs +++ b/crates/environ/src/module.rs @@ -664,9 +664,6 @@ pub struct Module { /// The map from passive data index (data segment index space) to index in `passive_data`. pub passive_data_map: BTreeMap>, - /// WebAssembly function names. - pub func_names: BTreeMap, - /// Types declared in the wasm module. pub types: PrimaryMap, diff --git a/crates/environ/src/module_environ.rs b/crates/environ/src/module_environ.rs index 76e6c55b52..b8f50febe2 100644 --- a/crates/environ/src/module_environ.rs +++ b/crates/environ/src/module_environ.rs @@ -146,8 +146,8 @@ type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>; #[allow(missing_docs)] pub struct NameSection<'a> { pub module_name: Option<&'a str>, - pub func_names: HashMap, - pub locals_names: HashMap>, + pub func_names: HashMap, + pub locals_names: HashMap>, } #[derive(Debug, Default)] @@ -1291,18 +1291,17 @@ and for re-adding support for interface types you can see this issue: if (index as usize) >= self.result.module.functions.len() { continue; } + + // Store the name unconditionally, regardless of + // whether we're parsing debuginfo, since function + // names are almost always present in the + // final compilation artifact. let index = FuncIndex::from_u32(index); self.result - .module + .debuginfo + .name_section .func_names - .insert(index, name.to_string()); - if self.tunables.generate_native_debuginfo { - self.result - .debuginfo - .name_section - .func_names - .insert(index.as_u32(), name); - } + .insert(index, name); } } wasmparser::Name::Module(module) => { @@ -1332,7 +1331,7 @@ and for re-adding support for interface types you can see this issue: .debuginfo .name_section .locals_names - .entry(f.indirect_index) + .entry(FuncIndex::from_u32(f.indirect_index)) .or_insert(HashMap::new()) .insert(index, name); } diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index 1256198aa2..8339b21ec5 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -6,16 +6,18 @@ use crate::code_memory::CodeMemory; use crate::debug::create_gdbjit_image; use crate::{MmapVec, ProfilingAgent}; -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, bail, Context, Result}; use object::write::{Object, StandardSegment}; use object::{File, Object as _, ObjectSection, SectionKind}; use serde::{Deserialize, Serialize}; +use std::convert::TryFrom; use std::ops::Range; +use std::str; use std::sync::Arc; use thiserror::Error; use wasmtime_environ::{ - CompileError, DefinedFuncIndex, FunctionInfo, InstanceSignature, InstanceTypeIndex, Module, - ModuleSignature, ModuleTranslation, ModuleTypeIndex, PrimaryMap, SignatureIndex, + CompileError, DefinedFuncIndex, FuncIndex, FunctionInfo, InstanceSignature, InstanceTypeIndex, + Module, ModuleSignature, ModuleTranslation, ModuleTypeIndex, PrimaryMap, SignatureIndex, StackMapInformation, Trampoline, Tunables, WasmFuncType, ELF_WASMTIME_ADDRMAP, ELF_WASMTIME_TRAPS, }; @@ -48,6 +50,23 @@ const ELF_WASM_DATA: &'static str = ".rodata.wasm"; /// decoded to get all the relevant information. const ELF_WASMTIME_INFO: &'static str = ".wasmtime.info"; +/// This is the name of the section in the final ELF image which contains a +/// concatenated list of all function names. +/// +/// This section is optionally included in the final artifact depending on +/// whether the wasm module has any name data at all (or in the future if we add +/// an option to not preserve name data). This section is a concatenated list of +/// strings where `CompiledModuleInfo::func_names` stores offsets/lengths into +/// this section. +/// +/// Note that the goal of this section is to avoid having to decode names at +/// module-load time if we can. Names are typically only used for debugging or +/// things like backtraces so there's no need to eagerly load all of them. By +/// storing the data in a separate section the hope is that the data, which is +/// sometimes quite large (3MB seen for spidermonkey-compiled-to-wasm), can be +/// paged in lazily from an mmap and is never paged in if we never reference it. +const ELF_NAME_DATA: &'static str = ".name.wasm"; + /// An error condition while setting up a wasm instance, be it validation, /// compilation, or instantiation. #[derive(Error, Debug)] @@ -82,6 +101,9 @@ pub struct CompiledModuleInfo { /// Metadata about each compiled function. funcs: PrimaryMap, + /// Sorted list, by function index, of names we have for this module. + func_names: Vec, + /// The trampolines compiled into the text section and their start/length /// relative to the start of the text section. trampolines: Vec, @@ -90,6 +112,13 @@ pub struct CompiledModuleInfo { meta: Metadata, } +#[derive(Serialize, Deserialize)] +struct FunctionName { + idx: FuncIndex, + offset: u32, + len: u32, +} + #[derive(Serialize, Deserialize)] struct Metadata { /// Whether or not native debug information is available in `obj` @@ -158,6 +187,32 @@ pub fn finish_compile( obj.append_section_data(data_id, data, 1); } + // If any names are present in the module then the `ELF_NAME_DATA` section + // is create and appended. + let mut func_names = Vec::new(); + if debuginfo.name_section.func_names.len() > 0 { + let name_id = obj.add_section( + obj.segment_name(StandardSegment::Data).to_vec(), + ELF_NAME_DATA.as_bytes().to_vec(), + SectionKind::ReadOnlyData, + ); + let mut sorted_names = debuginfo.name_section.func_names.iter().collect::>(); + sorted_names.sort_by_key(|(idx, _name)| *idx); + for (idx, name) in sorted_names { + let offset = obj.append_section_data(name_id, name.as_bytes(), 1); + let offset = match u32::try_from(offset) { + Ok(offset) => offset, + Err(_) => bail!("name section too large (> 4gb)"), + }; + let len = u32::try_from(name.len()).unwrap(); + func_names.push(FunctionName { + idx: *idx, + offset, + len, + }); + } + } + // Update passive data offsets since they're all located after the other // data in the module. for (_, range) in module.passive_data_map.iter_mut() { @@ -200,6 +255,7 @@ pub fn finish_compile( module, funcs, trampolines, + func_names, meta: Metadata { native_debug_info_present: tunables.generate_native_debuginfo, has_unparsed_debuginfo, @@ -253,6 +309,8 @@ pub struct CompiledModule { dbg_jit_registration: Option, /// A unique ID used to register this module with the engine. unique_id: CompiledModuleId, + func_names: Vec, + func_name_data: Range, } impl CompiledModule { @@ -302,6 +360,15 @@ impl CompiledModule { .context("failed to deserialize wasmtime module info")?, }; + let func_name_data = match code + .obj + .section_by_name(ELF_NAME_DATA) + .and_then(|s| s.data().ok()) + { + Some(data) => subslice_range(data, code.mmap), + None => 0..0, + }; + let mut ret = Self { module: Arc::new(info.module), funcs: info.funcs, @@ -319,6 +386,8 @@ impl CompiledModule { code_memory, meta: info.meta, unique_id: id_allocator.alloc(), + func_names: info.func_names, + func_name_data, }; ret.register_debug_and_profiling(profiler)?; @@ -391,6 +460,21 @@ impl CompiledModule { &self.funcs } + /// Looks up the `name` section name for the function index `idx`, if one + /// was specified in the original wasm module. + pub fn func_name(&self, idx: FuncIndex) -> Option<&str> { + // Find entry for `idx`, if present. + let i = self.func_names.binary_search_by_key(&idx, |n| n.idx).ok()?; + let name = &self.func_names[i]; + + // Here we `unwrap` the `from_utf8` but this can theoretically be a + // `from_utf8_unchecked` if we really wanted since this section is + // guaranteed to only have valid utf-8 data. Until it's a problem it's + // probably best to double-check this though. + let data = &self.mmap()[self.func_name_data.clone()]; + Some(str::from_utf8(&data[name.offset as usize..][..name.len as usize]).unwrap()) + } + /// Return a reference to a mutable module (if possible). pub fn module_mut(&mut self) -> Option<&mut Module> { Arc::get_mut(&mut self.module) diff --git a/crates/jit/src/profiling.rs b/crates/jit/src/profiling.rs index a45db568aa..e6fefcccdf 100644 --- a/crates/jit/src/profiling.rs +++ b/crates/jit/src/profiling.rs @@ -1,5 +1,5 @@ use crate::{demangling::demangle_function_name_or_index, CompiledModule}; -use wasmtime_environ::{DefinedFuncIndex, EntityRef, Module}; +use wasmtime_environ::{DefinedFuncIndex, EntityRef}; cfg_if::cfg_if! { if #[cfg(all(feature = "jitdump", target_os = "linux"))] { @@ -52,14 +52,10 @@ impl ProfilingAgent for NullProfilerAgent { } #[allow(dead_code)] -fn debug_name(module: &Module, index: DefinedFuncIndex) -> String { - let index = module.func_index(index); +fn debug_name(module: &CompiledModule, index: DefinedFuncIndex) -> String { + let index = module.module().func_index(index); let mut debug_name = String::new(); - demangle_function_name_or_index( - &mut debug_name, - module.func_names.get(&index).map(|s| s.as_str()), - index.index(), - ) - .unwrap(); + demangle_function_name_or_index(&mut debug_name, module.func_name(index), index.index()) + .unwrap(); debug_name } diff --git a/crates/jit/src/profiling/jitdump_linux.rs b/crates/jit/src/profiling/jitdump_linux.rs index 374b21b503..f96a4dff5e 100644 --- a/crates/jit/src/profiling/jitdump_linux.rs +++ b/crates/jit/src/profiling/jitdump_linux.rs @@ -302,7 +302,7 @@ impl State { } } else { let timestamp = self.get_time_stamp(); - let name = super::debug_name(module.module(), idx); + let name = super::debug_name(module, idx); self.dump_code_load_record(&name, addr, len, timestamp, pid, tid); } } diff --git a/crates/jit/src/profiling/vtune_linux.rs b/crates/jit/src/profiling/vtune_linux.rs index 9730e2e5ab..8f236ea4d1 100644 --- a/crates/jit/src/profiling/vtune_linux.rs +++ b/crates/jit/src/profiling/vtune_linux.rs @@ -131,7 +131,7 @@ impl State { for (idx, func) in module.finished_functions() { let (addr, len) = unsafe { ((*func).as_ptr().cast::(), (*func).len()) }; - let method_name = super::debug_name(module.module(), idx); + let method_name = super::debug_name(module, idx); let method_id = self.get_method_id(); log::trace!( "new function ({}) {:?}::{:?} @ {:?}\n", diff --git a/crates/wasmtime/src/module/registry.rs b/crates/wasmtime/src/module/registry.rs index 60bbbfffd7..1b39748137 100644 --- a/crates/wasmtime/src/module/registry.rs +++ b/crates/wasmtime/src/module/registry.rs @@ -323,7 +323,7 @@ impl GlobalRegisteredModule { Some(FrameInfo { module_name: module.name.clone(), func_index: index.index() as u32, - func_name: module.func_names.get(&index).cloned(), + func_name: self.module.func_name(index).map(|s| s.to_string()), instr, func_start: info.start_srcloc, symbols,