Move wasm data/debuginfo into the ELF compilation image (#3235)

* Move wasm data/debuginfo into the ELF compilation image

This commit moves existing allocations of `Box<[u8]>` stored separately
from compilation's final ELF image into the ELF image itself. The goal
of this commit is to reduce the amount of data which `bincode` will need
to process in the future. DWARF debugging information and wasm data
segments can be quite large, and they're relatively rarely read, so
there's typically no need to copy them around. Instead by moving them
into the ELF image this opens up the opportunity in the future to
eliminate copies and use data directly as-found in the image itself.

For information accessed possibly-multiple times, such as the wasm data
ranges, the indexes of the data within the ELF image are computed when
a `CompiledModule` is created. These indexes are then used to directly
index into the image without having to root around in the ELF file each
time they're accessed.

One other change located here is that the symbolication context
previously cloned the debug information into it to adhere to the
`'static` lifetime safely, but this isn't actually ever used in
`wasmtime` right now so the unsafety around this has been removed and
instead borrowed data is returned (no more clones, yay!).

* Fix lightbeam
This commit is contained in:
Alex Crichton
2021-08-25 09:03:07 -05:00
committed by GitHub
parent a662f5361d
commit 7d05ebe7ff
12 changed files with 273 additions and 222 deletions

View File

@@ -6,15 +6,18 @@
use crate::code_memory::CodeMemory;
use crate::debug::create_gdbjit_image;
use crate::link::link_module;
use anyhow::Result;
use anyhow::{anyhow, Context, Result};
use object::read::File;
use object::write::{Object, StandardSegment};
use object::{Object as _, ObjectSection, SectionKind};
use serde::{Deserialize, Serialize};
use std::ops::Range;
use std::sync::Arc;
use thiserror::Error;
use wasmtime_environ::{
CompileError, DebugInfoData, DefinedFuncIndex, FunctionInfo, InstanceSignature,
InstanceTypeIndex, Module, ModuleSignature, ModuleTranslation, ModuleTypeIndex, PrimaryMap,
SignatureIndex, StackMapInformation, Tunables, WasmFuncType,
CompileError, DefinedFuncIndex, FunctionInfo, InstanceSignature, InstanceTypeIndex, Module,
ModuleSignature, ModuleTranslation, ModuleTypeIndex, PrimaryMap, SignatureIndex,
StackMapInformation, Tunables, WasmFuncType,
};
use wasmtime_profiling::ProfilingAgent;
use wasmtime_runtime::{GdbJitImageRegistration, InstantiationError, VMFunctionBody, VMTrampoline};
@@ -51,9 +54,6 @@ pub struct CompilationArtifacts {
/// ELF image with functions code.
obj: Box<[u8]>,
/// All data segments referenced by this module, both active and passive.
wasm_data: Box<[u8]>,
/// Descriptions of compiled functions
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
@@ -64,25 +64,10 @@ pub struct CompilationArtifacts {
/// we skipped and did not parse.
has_unparsed_debuginfo: bool,
/// Debug information found in the wasm file, used for symbolicating
/// backtraces.
debug_info: Option<DebugInfo>,
}
#[derive(Serialize, Deserialize)]
struct DebugInfo {
data: Box<[u8]>,
/// Offset in the original wasm file to the code section.
code_section_offset: u64,
debug_abbrev: Range<usize>,
debug_addr: Range<usize>,
debug_aranges: Range<usize>,
debug_info: Range<usize>,
debug_line: Range<usize>,
debug_line_str: Range<usize>,
debug_ranges: Range<usize>,
debug_rnglists: Range<usize>,
debug_str: Range<usize>,
debug_str_offsets: Range<usize>,
has_wasm_debuginfo: bool,
}
impl CompilationArtifacts {
@@ -90,10 +75,10 @@ impl CompilationArtifacts {
/// compilation.
pub fn new(
translation: ModuleTranslation<'_>,
obj: Vec<u8>,
mut obj: Object,
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
tunables: &Tunables,
) -> CompilationArtifacts {
) -> Result<CompilationArtifacts> {
let ModuleTranslation {
mut module,
debuginfo,
@@ -103,45 +88,67 @@ impl CompilationArtifacts {
..
} = translation;
// Concatenate all the wasm data together, placing both active and
// passive data into the same chunk of data. Note that this
// implementation doesn't allow for unmapping or somehow releasing
// passive data on `data.drop`, and if we want to do that in the future
// we'll have to change this to store passive data segments separately
// from the main data segments.
//
// Also note that here we have to update all passive data segments and
// their relative indices.
let wasm_data_size = data
.iter()
.map(|s| s.len())
.chain(passive_data.iter().map(|s| s.len()))
.sum();
let mut wasm_data = Vec::with_capacity(wasm_data_size);
// Place all data from the wasm module into a section which will the
// source of the data later at runtime.
let segment = obj.segment_name(StandardSegment::Data).to_vec();
let section_id = obj.add_section(segment, b".wasmdata".to_vec(), SectionKind::ReadOnlyData);
let mut total_data_len = 0;
for data in data.iter() {
wasm_data.extend_from_slice(data);
obj.append_section_data(section_id, data, 1);
total_data_len += data.len();
}
let total_data_len = wasm_data.len();
for data in passive_data.iter() {
wasm_data.extend_from_slice(data);
obj.append_section_data(section_id, data, 1);
}
// Update passive data offsets since they're all located after the other
// data in the module.
for (_, range) in module.passive_data_map.iter_mut() {
range.start = range.start.checked_add(total_data_len as u32).unwrap();
range.end = range.end.checked_add(total_data_len as u32).unwrap();
}
CompilationArtifacts {
// Insert the wasm raw wasm-based debuginfo into the output, if
// requested. Note that this is distinct from the native debuginfo
// possibly generated by the native compiler, hence these sections
// getting wasm-specific names.
if tunables.parse_wasm_debuginfo {
push_debug(&mut obj, &debuginfo.dwarf.debug_abbrev);
push_debug(&mut obj, &debuginfo.dwarf.debug_addr);
push_debug(&mut obj, &debuginfo.dwarf.debug_aranges);
push_debug(&mut obj, &debuginfo.dwarf.debug_info);
push_debug(&mut obj, &debuginfo.dwarf.debug_line);
push_debug(&mut obj, &debuginfo.dwarf.debug_line_str);
push_debug(&mut obj, &debuginfo.dwarf.debug_str);
push_debug(&mut obj, &debuginfo.dwarf.debug_str_offsets);
push_debug(&mut obj, &debuginfo.debug_ranges);
push_debug(&mut obj, &debuginfo.debug_rnglists);
}
return Ok(CompilationArtifacts {
module: Arc::new(module),
obj: obj.into_boxed_slice(),
wasm_data: wasm_data.into(),
obj: obj.write()?.into(),
funcs,
native_debug_info_present: tunables.generate_native_debuginfo,
debug_info: if tunables.parse_wasm_debuginfo {
Some(debuginfo.into())
} else {
None
},
has_unparsed_debuginfo,
code_section_offset: debuginfo.wasm_file.code_section_offset,
has_wasm_debuginfo: tunables.parse_wasm_debuginfo,
});
fn push_debug<'a, T>(obj: &mut Object, section: &T)
where
T: gimli::Section<gimli::EndianSlice<'a, gimli::LittleEndian>>,
{
if section.reader().slice().is_empty() {
return;
}
let segment = obj.segment_name(StandardSegment::Debug).to_vec();
let section_id = obj.add_section(
segment,
wasm_section_name(T::id()).as_bytes().to_vec(),
SectionKind::Debug,
);
obj.append_section_data(section_id, section.reader().slice(), 1);
}
}
}
@@ -178,6 +185,7 @@ impl ModuleCode {
/// A compiled wasm module, ready to be instantiated.
pub struct CompiledModule {
wasm_data: Range<usize>,
artifacts: CompilationArtifacts,
code: Arc<ModuleCode>,
finished_functions: FinishedFunctions,
@@ -189,11 +197,14 @@ impl CompiledModule {
pub fn from_artifacts(
artifacts: CompilationArtifacts,
profiler: &dyn ProfilingAgent,
) -> Result<Arc<Self>, SetupError> {
) -> Result<Arc<Self>> {
let obj = File::parse(&artifacts.obj[..])
.with_context(|| "failed to parse internal ELF compilation artifact")?;
// Allocate all of the compiled functions into executable memory,
// copying over their contents.
let (code_memory, code_range, finished_functions, trampolines) =
build_code_memory(&artifacts.obj, &artifacts.module).map_err(|message| {
build_code_memory(&obj, &artifacts.module).map_err(|message| {
SetupError::Instantiate(InstantiationError::Resource(anyhow::anyhow!(
"failed to build code memory for functions: {}",
message
@@ -220,8 +231,14 @@ impl CompiledModule {
let start = code_range.0 as usize;
let end = start + code_range.1;
let data = obj
.section_by_name(".wasmdata")
.ok_or_else(|| anyhow!("failed to find internal data section for wasm module"))?;
let wasm_data = subslice_range(data.data()?, &artifacts.obj);
Ok(Arc::new(Self {
artifacts,
wasm_data,
code: Arc::new(ModuleCode {
range: (start, end),
code_memory,
@@ -243,7 +260,7 @@ impl CompiledModule {
/// This is used for initialization of memories and all data ranges stored
/// in a `Module` are relative to the slice returned here.
pub fn wasm_data(&self) -> &[u8] {
&self.artifacts.wasm_data
&self.artifacts.obj[self.wasm_data.clone()]
}
/// Return a reference-counting pointer to a module.
@@ -338,38 +355,25 @@ impl CompiledModule {
///
/// Basically this makes a thing which parses debuginfo and can tell you
/// what filename and line number a wasm pc comes from.
pub fn symbolize_context(&self) -> Result<Option<SymbolizeContext>, gimli::Error> {
pub fn symbolize_context(&self) -> Result<Option<SymbolizeContext<'_>>> {
use gimli::EndianSlice;
let info = match &self.artifacts.debug_info {
Some(info) => info,
None => return Ok(None),
};
// For now we clone the data into the `SymbolizeContext`, but if this
// becomes prohibitive we could always `Arc` it with our own allocation
// here.
let data = info.data.clone();
let endian = gimli::LittleEndian;
let cx = addr2line::Context::from_sections(
EndianSlice::new(&data[info.debug_abbrev.clone()], endian).into(),
EndianSlice::new(&data[info.debug_addr.clone()], endian).into(),
EndianSlice::new(&data[info.debug_aranges.clone()], endian).into(),
EndianSlice::new(&data[info.debug_info.clone()], endian).into(),
EndianSlice::new(&data[info.debug_line.clone()], endian).into(),
EndianSlice::new(&data[info.debug_line_str.clone()], endian).into(),
EndianSlice::new(&data[info.debug_ranges.clone()], endian).into(),
EndianSlice::new(&data[info.debug_rnglists.clone()], endian).into(),
EndianSlice::new(&data[info.debug_str.clone()], endian).into(),
EndianSlice::new(&data[info.debug_str_offsets.clone()], endian).into(),
EndianSlice::new(&[], endian),
)?;
if !self.artifacts.has_wasm_debuginfo {
return Ok(None);
}
let obj = File::parse(&self.artifacts.obj[..])
.context("failed to parse internal ELF file representation")?;
let dwarf = gimli::Dwarf::load(|id| -> Result<_> {
let data = obj
.section_by_name(wasm_section_name(id))
.and_then(|s| s.data().ok())
.unwrap_or(&[]);
Ok(EndianSlice::new(data, gimli::LittleEndian))
})?;
let cx = addr2line::Context::from_dwarf(dwarf)
.context("failed to create addr2line dwarf mapping context")?;
Ok(Some(SymbolizeContext {
// See comments on `SymbolizeContext` for why we do this static
// lifetime promotion.
inner: unsafe {
std::mem::transmute::<Addr2LineContext<'_>, Addr2LineContext<'static>>(cx)
},
code_section_offset: info.code_section_offset,
_data: data,
inner: cx,
code_section_offset: self.artifacts.code_section_offset,
}))
}
@@ -384,27 +388,16 @@ type Addr2LineContext<'a> = addr2line::Context<gimli::EndianSlice<'a, gimli::Lit
/// A context which contains dwarf debug information to translate program
/// counters back to filenames and line numbers.
pub struct SymbolizeContext {
// Note the `'static` lifetime on `inner`. That's actually a bunch of slices
// which point back into the `_data` field. We currently unsafely manage
// this by saying that when inside the struct it's `'static` (since we own
// the referenced data just next to it) and we only loan out borrowed
// references.
_data: Box<[u8]>,
inner: Addr2LineContext<'static>,
pub struct SymbolizeContext<'a> {
inner: Addr2LineContext<'a>,
code_section_offset: u64,
}
impl SymbolizeContext {
impl<'a> SymbolizeContext<'a> {
/// Returns access to the [`addr2line::Context`] which can be used to query
/// frame information with.
pub fn addr2line(&self) -> &Addr2LineContext<'_> {
// Here we demote our synthetic `'static` lifetime which doesn't
// actually exist back to a lifetime that's tied to `&self`, which
// should be safe.
unsafe {
std::mem::transmute::<&Addr2LineContext<'static>, &Addr2LineContext<'_>>(&self.inner)
}
pub fn addr2line(&self) -> &Addr2LineContext<'a> {
&self.inner
}
/// Returns the offset of the code section in the original wasm file, used
@@ -429,7 +422,7 @@ fn create_dbg_image(
}
fn build_code_memory(
obj: &[u8],
obj: &File,
module: &Module,
) -> Result<(
CodeMemory,
@@ -469,7 +462,7 @@ fn build_code_memory(
trampolines.push((i, fnptr));
}
link_module(&allocation.obj, allocation.code_range);
link_module(obj, allocation.code_range);
let code_range = (allocation.code_range.as_ptr(), allocation.code_range.len());
@@ -479,42 +472,6 @@ fn build_code_memory(
Ok((code_memory, code_range, finished_functions, trampolines))
}
impl From<DebugInfoData<'_>> for DebugInfo {
fn from(raw: DebugInfoData<'_>) -> DebugInfo {
use gimli::Section;
let mut data = Vec::new();
let mut push = |section: &[u8]| {
data.extend_from_slice(section);
data.len() - section.len()..data.len()
};
let debug_abbrev = push(raw.dwarf.debug_abbrev.reader().slice());
let debug_addr = push(raw.dwarf.debug_addr.reader().slice());
let debug_aranges = push(raw.dwarf.debug_aranges.reader().slice());
let debug_info = push(raw.dwarf.debug_info.reader().slice());
let debug_line = push(raw.dwarf.debug_line.reader().slice());
let debug_line_str = push(raw.dwarf.debug_line_str.reader().slice());
let debug_ranges = push(raw.debug_ranges.reader().slice());
let debug_rnglists = push(raw.debug_rnglists.reader().slice());
let debug_str = push(raw.dwarf.debug_str.reader().slice());
let debug_str_offsets = push(raw.dwarf.debug_str_offsets.reader().slice());
DebugInfo {
data: data.into(),
debug_abbrev,
debug_addr,
debug_aranges,
debug_info,
debug_line,
debug_line_str,
debug_ranges,
debug_rnglists,
debug_str,
debug_str_offsets,
code_section_offset: raw.wasm_file.code_section_offset,
}
}
}
mod arc_serde {
use super::Arc;
use serde::{de::Deserialize, ser::Serialize, Deserializer, Serializer};
@@ -535,3 +492,52 @@ mod arc_serde {
Ok(Arc::new(T::deserialize(de)?))
}
}
/// Returns the range of `inner` within `outer`, such that `outer[range]` is the
/// same as `inner`.
///
/// This method requires that `inner` is a sub-slice of `outer`, and if that
/// isn't true then this method will panic.
fn subslice_range(inner: &[u8], outer: &[u8]) -> Range<usize> {
if inner.len() == 0 {
return 0..0;
}
assert!(outer.as_ptr() <= inner.as_ptr());
assert!((&inner[inner.len() - 1] as *const _) <= (&outer[outer.len() - 1] as *const _));
let start = inner.as_ptr() as usize - outer.as_ptr() as usize;
start..start + inner.len()
}
/// Returns the Wasmtime-specific section name for dwarf debugging sections.
///
/// These sections, if configured in Wasmtime, will contain the original raw
/// dwarf debugging information found in the wasm file, unmodified. These tables
/// are then consulted later to convert wasm program counters to original wasm
/// source filenames/line numbers with `addr2line`.
fn wasm_section_name(id: gimli::SectionId) -> &'static str {
use gimli::SectionId::*;
match id {
DebugAbbrev => ".debug_abbrev.wasm",
DebugAddr => ".debug_addr.wasm",
DebugAranges => ".debug_aranges.wasm",
DebugFrame => ".debug_frame.wasm",
EhFrame => ".eh_frame.wasm",
EhFrameHdr => ".eh_frame_hdr.wasm",
DebugInfo => ".debug_info.wasm",
DebugLine => ".debug_line.wasm",
DebugLineStr => ".debug_line_str.wasm",
DebugLoc => ".debug_loc.wasm",
DebugLocLists => ".debug_loc_lists.wasm",
DebugMacinfo => ".debug_macinfo.wasm",
DebugMacro => ".debug_macro.wasm",
DebugPubNames => ".debug_pub_names.wasm",
DebugPubTypes => ".debug_pub_types.wasm",
DebugRanges => ".debug_ranges.wasm",
DebugRngLists => ".debug_rng_lists.wasm",
DebugStr => ".debug_str.wasm",
DebugStrOffsets => ".debug_str_offsets.wasm",
DebugTypes => ".debug_types.wasm",
}
}