Change how wasm DWARF is inserted into artifacts (#5358)
This commit fixes a bug with components by changing how DWARF information from a wasm binary is copied over to the final compiled artifact. Note that this is not the Wasmtime-generated DWARF but rather the native wasm DWARF itself used in backtraces. Previously the wasm dwarf was inserted into sections `.*.wasm` where `*` was `debug_info`, `debug_str`, etc -- one per `gimli::SectionId` as found in the original wasm module. This does not work with components, however, where modules did not correctly separate their debug information into separate sections or otherwise disambiguate. The fix in this commit is to instead smash all the debug information together into one large section and store offsets into that giant section. This is similar to the `name`-section scraping or the trap metadata section where one section contains all the data for all the modules in a component. This simplifies the object file parsing by only looking for one section name and doesn't add all that much complexity to serializing and looking up dwarf information as well.
This commit is contained in:
@@ -119,3 +119,12 @@ pub const ELF_WASMTIME_INFO: &'static str = ".wasmtime.info";
|
||||
/// sometimes quite large (3MB seen for spidermonkey-compiled-to-wasm), can be
|
||||
/// paged in lazily from an mmap and is never paged in if we never reference it.
|
||||
pub const ELF_NAME_DATA: &'static str = ".name.wasm";
|
||||
|
||||
/// This is the name of the section in the final ELF image that contains the
|
||||
/// concatenation of all the native DWARF information found in the original wasm
|
||||
/// files.
|
||||
///
|
||||
/// This concatenation is not intended to be read by external tools at this time
|
||||
/// and is instead indexed directly by relative indices stored in compilation
|
||||
/// metadata.
|
||||
pub const ELF_WASMTIME_DWARF: &str = ".wasmtime.dwarf";
|
||||
|
||||
@@ -32,10 +32,7 @@ pub struct CodeMemory {
|
||||
address_map_data: Range<usize>,
|
||||
func_name_data: Range<usize>,
|
||||
info_data: Range<usize>,
|
||||
|
||||
/// Map of dwarf sections indexed by `gimli::SectionId` which points to the
|
||||
/// range within `code_memory`'s mmap as to the contents of the section.
|
||||
dwarf_sections: Vec<Range<usize>>,
|
||||
dwarf: Range<usize>,
|
||||
}
|
||||
|
||||
impl Drop for CodeMemory {
|
||||
@@ -60,8 +57,6 @@ impl CodeMemory {
|
||||
/// The returned `CodeMemory` manages the internal `MmapVec` and the
|
||||
/// `publish` method is used to actually make the memory executable.
|
||||
pub fn new(mmap: MmapVec) -> Result<Self> {
|
||||
use gimli::SectionId::*;
|
||||
|
||||
let obj = File::parse(&mmap[..])
|
||||
.with_context(|| "failed to parse internal compilation artifact")?;
|
||||
|
||||
@@ -73,7 +68,7 @@ impl CodeMemory {
|
||||
let mut address_map_data = 0..0;
|
||||
let mut func_name_data = 0..0;
|
||||
let mut info_data = 0..0;
|
||||
let mut dwarf_sections = Vec::new();
|
||||
let mut dwarf = 0..0;
|
||||
for section in obj.sections() {
|
||||
let data = section.data()?;
|
||||
let name = section.name()?;
|
||||
@@ -90,14 +85,6 @@ impl CodeMemory {
|
||||
}
|
||||
}
|
||||
|
||||
let mut gimli = |id: gimli::SectionId| {
|
||||
let idx = id as usize;
|
||||
if dwarf_sections.len() <= idx {
|
||||
dwarf_sections.resize(idx + 1, 0..0);
|
||||
}
|
||||
dwarf_sections[idx] = range.clone();
|
||||
};
|
||||
|
||||
match name {
|
||||
obj::ELF_WASM_BTI => match data.len() {
|
||||
1 => enable_branch_protection = Some(data[0] != 0),
|
||||
@@ -118,31 +105,7 @@ impl CodeMemory {
|
||||
obj::ELF_WASMTIME_TRAPS => trap_data = range,
|
||||
obj::ELF_NAME_DATA => func_name_data = range,
|
||||
obj::ELF_WASMTIME_INFO => info_data = range,
|
||||
|
||||
// Register dwarf sections into the `dwarf_sections`
|
||||
// array which is indexed by `gimli::SectionId`
|
||||
".debug_abbrev.wasm" => gimli(DebugAbbrev),
|
||||
".debug_addr.wasm" => gimli(DebugAddr),
|
||||
".debug_aranges.wasm" => gimli(DebugAranges),
|
||||
".debug_frame.wasm" => gimli(DebugFrame),
|
||||
".eh_frame.wasm" => gimli(EhFrame),
|
||||
".eh_frame_hdr.wasm" => gimli(EhFrameHdr),
|
||||
".debug_info.wasm" => gimli(DebugInfo),
|
||||
".debug_line.wasm" => gimli(DebugLine),
|
||||
".debug_line_str.wasm" => gimli(DebugLineStr),
|
||||
".debug_loc.wasm" => gimli(DebugLoc),
|
||||
".debug_loc_lists.wasm" => gimli(DebugLocLists),
|
||||
".debug_macinfo.wasm" => gimli(DebugMacinfo),
|
||||
".debug_macro.wasm" => gimli(DebugMacro),
|
||||
".debug_pub_names.wasm" => gimli(DebugPubNames),
|
||||
".debug_pub_types.wasm" => gimli(DebugPubTypes),
|
||||
".debug_ranges.wasm" => gimli(DebugRanges),
|
||||
".debug_rng_lists.wasm" => gimli(DebugRngLists),
|
||||
".debug_str.wasm" => gimli(DebugStr),
|
||||
".debug_str_offsets.wasm" => gimli(DebugStrOffsets),
|
||||
".debug_types.wasm" => gimli(DebugTypes),
|
||||
".debug_cu_index.wasm" => gimli(DebugCuIndex),
|
||||
".debug_tu_index.wasm" => gimli(DebugTuIndex),
|
||||
obj::ELF_WASMTIME_DWARF => dwarf = range,
|
||||
|
||||
_ => log::debug!("ignoring section {name}"),
|
||||
}
|
||||
@@ -158,7 +121,7 @@ impl CodeMemory {
|
||||
trap_data,
|
||||
address_map_data,
|
||||
func_name_data,
|
||||
dwarf_sections,
|
||||
dwarf,
|
||||
info_data,
|
||||
wasm_data,
|
||||
})
|
||||
@@ -175,15 +138,9 @@ impl CodeMemory {
|
||||
&self.mmap[self.text.clone()]
|
||||
}
|
||||
|
||||
/// Returns the data in the corresponding dwarf section, or an empty slice
|
||||
/// if the section wasn't present.
|
||||
pub fn dwarf_section(&self, section: gimli::SectionId) -> &[u8] {
|
||||
let range = self
|
||||
.dwarf_sections
|
||||
.get(section as usize)
|
||||
.cloned()
|
||||
.unwrap_or(0..0);
|
||||
&self.mmap[range]
|
||||
/// Returns the contents of the `ELF_WASMTIME_DWARF` section.
|
||||
pub fn dwarf(&self) -> &[u8] {
|
||||
&self.mmap[self.dwarf.clone()]
|
||||
}
|
||||
|
||||
/// Returns the data in the `ELF_NAME_DATA` section.
|
||||
|
||||
@@ -71,6 +71,10 @@ struct Metadata {
|
||||
/// Note that even if this flag is `true` sections may be missing if they
|
||||
/// weren't found in the original wasm module itself.
|
||||
has_wasm_debuginfo: bool,
|
||||
|
||||
/// Dwarf sections and the offsets at which they're stored in the
|
||||
/// ELF_WASMTIME_DWARF
|
||||
dwarf: Vec<(u8, Range<u64>)>,
|
||||
}
|
||||
|
||||
/// Helper structure to create an ELF file as a compilation artifact.
|
||||
@@ -94,6 +98,12 @@ pub struct ObjectBuilder<'a> {
|
||||
///
|
||||
/// This is optional and lazily created on demand.
|
||||
names: Option<SectionId>,
|
||||
|
||||
/// The section identifier for dwarf information copied from the original
|
||||
/// wasm files.
|
||||
///
|
||||
/// This is optional and lazily created on demand.
|
||||
dwarf: Option<SectionId>,
|
||||
}
|
||||
|
||||
impl<'a> ObjectBuilder<'a> {
|
||||
@@ -109,6 +119,7 @@ impl<'a> ObjectBuilder<'a> {
|
||||
tunables,
|
||||
data,
|
||||
names: None,
|
||||
dwarf: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -239,18 +250,21 @@ impl<'a> ObjectBuilder<'a> {
|
||||
// requested. Note that this is distinct from the native debuginfo
|
||||
// possibly generated by the native compiler, hence these sections
|
||||
// getting wasm-specific names.
|
||||
let mut dwarf = Vec::new();
|
||||
if self.tunables.parse_wasm_debuginfo {
|
||||
self.push_debug(&debuginfo.dwarf.debug_abbrev);
|
||||
self.push_debug(&debuginfo.dwarf.debug_addr);
|
||||
self.push_debug(&debuginfo.dwarf.debug_aranges);
|
||||
self.push_debug(&debuginfo.dwarf.debug_info);
|
||||
self.push_debug(&debuginfo.dwarf.debug_line);
|
||||
self.push_debug(&debuginfo.dwarf.debug_line_str);
|
||||
self.push_debug(&debuginfo.dwarf.debug_str);
|
||||
self.push_debug(&debuginfo.dwarf.debug_str_offsets);
|
||||
self.push_debug(&debuginfo.debug_ranges);
|
||||
self.push_debug(&debuginfo.debug_rnglists);
|
||||
self.push_debug(&mut dwarf, &debuginfo.dwarf.debug_abbrev);
|
||||
self.push_debug(&mut dwarf, &debuginfo.dwarf.debug_addr);
|
||||
self.push_debug(&mut dwarf, &debuginfo.dwarf.debug_aranges);
|
||||
self.push_debug(&mut dwarf, &debuginfo.dwarf.debug_info);
|
||||
self.push_debug(&mut dwarf, &debuginfo.dwarf.debug_line);
|
||||
self.push_debug(&mut dwarf, &debuginfo.dwarf.debug_line_str);
|
||||
self.push_debug(&mut dwarf, &debuginfo.dwarf.debug_str);
|
||||
self.push_debug(&mut dwarf, &debuginfo.dwarf.debug_str_offsets);
|
||||
self.push_debug(&mut dwarf, &debuginfo.debug_ranges);
|
||||
self.push_debug(&mut dwarf, &debuginfo.debug_rnglists);
|
||||
}
|
||||
// Sort this for binary-search-lookup later in `symbolize_context`.
|
||||
dwarf.sort_by_key(|(id, _)| *id);
|
||||
|
||||
Ok(CompiledModuleInfo {
|
||||
module,
|
||||
@@ -262,11 +276,12 @@ impl<'a> ObjectBuilder<'a> {
|
||||
has_unparsed_debuginfo,
|
||||
code_section_offset: debuginfo.wasm_file.code_section_offset,
|
||||
has_wasm_debuginfo: self.tunables.parse_wasm_debuginfo,
|
||||
dwarf,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn push_debug<'b, T>(&mut self, section: &T)
|
||||
fn push_debug<'b, T>(&mut self, dwarf: &mut Vec<(u8, Range<u64>)>, section: &T)
|
||||
where
|
||||
T: gimli::Section<gimli::EndianSlice<'b, gimli::LittleEndian>>,
|
||||
{
|
||||
@@ -274,12 +289,15 @@ impl<'a> ObjectBuilder<'a> {
|
||||
if data.is_empty() {
|
||||
return;
|
||||
}
|
||||
let section_id = self.obj.add_section(
|
||||
let section_id = *self.dwarf.get_or_insert_with(|| {
|
||||
self.obj.add_section(
|
||||
self.obj.segment_name(StandardSegment::Debug).to_vec(),
|
||||
format!("{}.wasm", T::id().name()).into_bytes(),
|
||||
obj::ELF_WASMTIME_DWARF.as_bytes().to_vec(),
|
||||
SectionKind::Debug,
|
||||
);
|
||||
self.obj.append_section_data(section_id, data, 1);
|
||||
)
|
||||
});
|
||||
let offset = self.obj.append_section_data(section_id, data, 1);
|
||||
dwarf.push((T::id() as u8, offset..offset + data.len() as u64));
|
||||
}
|
||||
|
||||
/// Creates the `ELF_WASMTIME_INFO` section from the given serializable data
|
||||
@@ -591,7 +609,20 @@ impl CompiledModule {
|
||||
return Ok(None);
|
||||
}
|
||||
let dwarf = gimli::Dwarf::load(|id| -> Result<_> {
|
||||
let data = self.code_memory().dwarf_section(id);
|
||||
// Lookup the `id` in the `dwarf` array prepared for this module
|
||||
// during module serialization where it's sorted by the `id` key. If
|
||||
// found this is a range within the general module's concatenated
|
||||
// dwarf section which is extracted here, otherwise it's just an
|
||||
// empty list to represent that it's not present.
|
||||
let data = self
|
||||
.meta
|
||||
.dwarf
|
||||
.binary_search_by_key(&(id as u8), |(id, _)| *id)
|
||||
.map(|i| {
|
||||
let (_, range) = &self.meta.dwarf[i];
|
||||
&self.code_memory().dwarf()[range.start as usize..range.end as usize]
|
||||
})
|
||||
.unwrap_or(&[]);
|
||||
Ok(EndianSlice::new(data, gimli::LittleEndian))
|
||||
})?;
|
||||
let cx = addr2line::Context::from_dwarf(dwarf)
|
||||
|
||||
Reference in New Issue
Block a user