Reduce calls to section_by_name loading artifacts (#5151)

* Reduce calls to `section_by_name` loading artifacts

Data is stored in binary artifacts as an ELF object and when loading an
artifact lots of calls are made to the `object` crate's
`section_by_name` method which ends up doing a linear search through the
list of sections for a particular name. To avoid doing this linear
search every time I've replaced this with one loop over the sections of
an object at the beginning when an object is loaded, or at least most of
the calls with this loop.

This isn't really a pressing issue today but some upcoming work I hope
to do for AOT-compiled components will be adding more sections to the
artifact so it seems best to keep the number of linear searches small
and avoided if possible.

* Fix an off-by-one
This commit is contained in:
Alex Crichton
2022-10-28 17:55:34 -05:00
committed by GitHub
parent 835abbcd11
commit 81f7ef7fbe

View File

@@ -287,7 +287,7 @@ pub fn finish_compile(
} }
let section_id = obj.add_section( let section_id = obj.add_section(
obj.segment_name(StandardSegment::Debug).to_vec(), obj.segment_name(StandardSegment::Debug).to_vec(),
wasm_section_name(T::id()).as_bytes().to_vec(), format!("{}.wasm", T::id().name()).into_bytes(),
SectionKind::Debug, SectionKind::Debug,
); );
obj.append_section_data(section_id, data, 1); obj.append_section_data(section_id, data, 1);
@@ -378,6 +378,9 @@ pub struct CompiledModule {
unique_id: CompiledModuleId, unique_id: CompiledModuleId,
func_names: Vec<FunctionName>, func_names: Vec<FunctionName>,
func_name_data: Range<usize>, func_name_data: Range<usize>,
/// Map of dwarf sections indexed by `gimli::SectionId` which points to the
/// range within `code_memory`'s mmap as to the contents of the section.
dwarf_sections: Vec<Range<usize>>,
} }
impl CompiledModule { impl CompiledModule {
@@ -399,43 +402,102 @@ impl CompiledModule {
/// about new code that is loaded. /// about new code that is loaded.
pub fn from_artifacts( pub fn from_artifacts(
mmap: MmapVec, mmap: MmapVec,
info: Option<CompiledModuleInfo>, mut info: Option<CompiledModuleInfo>,
profiler: &dyn ProfilingAgent, profiler: &dyn ProfilingAgent,
id_allocator: &CompiledModuleIdAllocator, id_allocator: &CompiledModuleIdAllocator,
) -> Result<Self> { ) -> Result<Self> {
let obj = File::parse(&mmap[..]).context("failed to parse internal elf file")?; use gimli::SectionId::*;
let opt_section = |name: &str| obj.section_by_name(name).and_then(|s| s.data().ok());
let section = |name: &str| {
opt_section(name)
.ok_or_else(|| anyhow!("missing section `{}` in compilation artifacts", name))
};
// Acquire the `CompiledModuleInfo`, either because it was passed in or // Parse the `code_memory` as an object file and extract information
// by deserializing it from the compiliation image. // about where all of its sections are located, stored into the
let info = match info { // `CompiledModule` created here.
Some(info) => info, //
None => bincode::deserialize(section(ELF_WASMTIME_INFO)?) // Note that dwarf sections here specifically are those that are carried
.context("failed to deserialize wasmtime module info")?, // over directly from the original wasm module's dwarf sections, not the
}; // wasmtime-generated host DWARF sections.
let obj = File::parse(&mmap[..]).context("failed to parse internal elf file")?;
let mut wasm_data = None;
let mut address_map_data = None;
let mut func_name_data = None;
let mut trap_data = None;
let mut code = None;
let mut dwarf_sections = Vec::new();
for section in obj.sections() {
let name = section.name()?;
let data = section.data()?;
let range = subslice_range(data, &mmap);
let mut gimli = |id: gimli::SectionId| {
let idx = id as usize;
if dwarf_sections.len() <= idx {
dwarf_sections.resize(idx + 1, 0..0);
}
dwarf_sections[idx] = range.clone();
};
match name {
ELF_WASM_DATA => wasm_data = Some(range),
ELF_WASMTIME_ADDRMAP => address_map_data = Some(range),
ELF_WASMTIME_TRAPS => trap_data = Some(range),
ELF_NAME_DATA => func_name_data = Some(range),
".text" => code = Some(range),
// Parse the metadata if it's not already available
// in-memory.
ELF_WASMTIME_INFO => {
if info.is_none() {
info = Some(
bincode::deserialize(data)
.context("failed to deserialize wasmtime module info")?,
);
}
}
// Register dwarf sections into the `dwarf_sections`
// array which is indexed by `gimli::SectionId`
".debug_abbrev.wasm" => gimli(DebugAbbrev),
".debug_addr.wasm" => gimli(DebugAddr),
".debug_aranges.wasm" => gimli(DebugAranges),
".debug_frame.wasm" => gimli(DebugFrame),
".eh_frame.wasm" => gimli(EhFrame),
".eh_frame_hdr.wasm" => gimli(EhFrameHdr),
".debug_info.wasm" => gimli(DebugInfo),
".debug_line.wasm" => gimli(DebugLine),
".debug_line_str.wasm" => gimli(DebugLineStr),
".debug_loc.wasm" => gimli(DebugLoc),
".debug_loc_lists.wasm" => gimli(DebugLocLists),
".debug_macinfo.wasm" => gimli(DebugMacinfo),
".debug_macro.wasm" => gimli(DebugMacro),
".debug_pub_names.wasm" => gimli(DebugPubNames),
".debug_pub_types.wasm" => gimli(DebugPubTypes),
".debug_ranges.wasm" => gimli(DebugRanges),
".debug_rng_lists.wasm" => gimli(DebugRngLists),
".debug_str.wasm" => gimli(DebugStr),
".debug_str_offsets.wasm" => gimli(DebugStrOffsets),
".debug_types.wasm" => gimli(DebugTypes),
".debug_cu_index.wasm" => gimli(DebugCuIndex),
".debug_tu_index.wasm" => gimli(DebugTuIndex),
_ => log::debug!("ignoring section {name}"),
}
}
let info = info.ok_or_else(|| anyhow!("failed to find wasm info section"))?;
let mut ret = Self { let mut ret = Self {
module: Arc::new(info.module), module: Arc::new(info.module),
funcs: info.funcs, funcs: info.funcs,
trampolines: info.trampolines, trampolines: info.trampolines,
wasm_data: subslice_range(section(ELF_WASM_DATA)?, &mmap), wasm_data: wasm_data.ok_or_else(|| anyhow!("missing wasm data section"))?,
address_map_data: opt_section(ELF_WASMTIME_ADDRMAP) address_map_data: address_map_data.unwrap_or(0..0),
.map(|slice| subslice_range(slice, &mmap)) func_name_data: func_name_data.unwrap_or(0..0),
.unwrap_or(0..0), trap_data: trap_data.ok_or_else(|| anyhow!("missing trap data section"))?,
func_name_data: opt_section(ELF_NAME_DATA) code: code.ok_or_else(|| anyhow!("missing code section"))?,
.map(|slice| subslice_range(slice, &mmap))
.unwrap_or(0..0),
trap_data: subslice_range(section(ELF_WASMTIME_TRAPS)?, &mmap),
code: subslice_range(section(".text")?, &mmap),
dbg_jit_registration: None, dbg_jit_registration: None,
code_memory: CodeMemory::new(mmap), code_memory: CodeMemory::new(mmap),
meta: info.meta, meta: info.meta,
unique_id: id_allocator.alloc(), unique_id: id_allocator.alloc(),
func_names: info.func_names, func_names: info.func_names,
dwarf_sections,
}; };
ret.code_memory ret.code_memory
.publish(ret.meta.is_branch_protection_enabled) .publish(ret.meta.is_branch_protection_enabled)
@@ -623,13 +685,13 @@ impl CompiledModule {
if !self.meta.has_wasm_debuginfo { if !self.meta.has_wasm_debuginfo {
return Ok(None); return Ok(None);
} }
let obj = File::parse(&self.mmap()[..])
.context("failed to parse internal ELF file representation")?;
let dwarf = gimli::Dwarf::load(|id| -> Result<_> { let dwarf = gimli::Dwarf::load(|id| -> Result<_> {
let data = obj let range = self
.section_by_name(wasm_section_name(id)) .dwarf_sections
.and_then(|s| s.data().ok()) .get(id as usize)
.unwrap_or(&[]); .cloned()
.unwrap_or(0..0);
let data = &self.mmap()[range];
Ok(EndianSlice::new(data, gimli::LittleEndian)) Ok(EndianSlice::new(data, gimli::LittleEndian))
})?; })?;
let cx = addr2line::Context::from_dwarf(dwarf) let cx = addr2line::Context::from_dwarf(dwarf)
@@ -703,37 +765,3 @@ pub fn subslice_range(inner: &[u8], outer: &[u8]) -> Range<usize> {
let start = inner.as_ptr() as usize - outer.as_ptr() as usize; let start = inner.as_ptr() as usize - outer.as_ptr() as usize;
start..start + inner.len() start..start + inner.len()
} }
/// Returns the Wasmtime-specific section name for dwarf debugging sections.
///
/// These sections, if configured in Wasmtime, will contain the original raw
/// dwarf debugging information found in the wasm file, unmodified. These tables
/// are then consulted later to convert wasm program counters to original wasm
/// source filenames/line numbers with `addr2line`.
fn wasm_section_name(id: gimli::SectionId) -> &'static str {
use gimli::SectionId::*;
match id {
DebugAbbrev => ".debug_abbrev.wasm",
DebugAddr => ".debug_addr.wasm",
DebugAranges => ".debug_aranges.wasm",
DebugFrame => ".debug_frame.wasm",
EhFrame => ".eh_frame.wasm",
EhFrameHdr => ".eh_frame_hdr.wasm",
DebugInfo => ".debug_info.wasm",
DebugLine => ".debug_line.wasm",
DebugLineStr => ".debug_line_str.wasm",
DebugLoc => ".debug_loc.wasm",
DebugLocLists => ".debug_loc_lists.wasm",
DebugMacinfo => ".debug_macinfo.wasm",
DebugMacro => ".debug_macro.wasm",
DebugPubNames => ".debug_pub_names.wasm",
DebugPubTypes => ".debug_pub_types.wasm",
DebugRanges => ".debug_ranges.wasm",
DebugRngLists => ".debug_rng_lists.wasm",
DebugStr => ".debug_str.wasm",
DebugStrOffsets => ".debug_str_offsets.wasm",
DebugTypes => ".debug_types.wasm",
DebugCuIndex => ".debug_cu_index.wasm",
DebugTuIndex => ".debug_tu_index.wasm",
}
}