Remove some allocations in CodeMemory (#3253)
* Remove some allocations in `CodeMemory` This commit removes the `FinishedFunctions` type as well as allocations associated with trampolines when allocating inside of a `CodeMemory`. The main goal of this commit is to improve the time spent in `CodeMemory` where currently today a good portion of time is spent simply parsing symbol names and trying to extract function indices from them. Instead this commit implements a new strategy (different from #3236) where compilation records offset/length information for all functions/trampolines so this doesn't need to be re-learned from the object file later. A consequence of this commit is that this offset information will be decoded/encoded through `bincode` unconditionally, but we can also optimize that later if necessary as well. Internally this involved quite a bit of refactoring since the previous map for `FinishedFunctions` was relatively heavily relied upon. * comments
This commit is contained in:
@@ -2,12 +2,9 @@
|
||||
|
||||
use crate::unwind::UnwindRegistration;
|
||||
use anyhow::{Context, Result};
|
||||
use object::read::{File as ObjectFile, Object, ObjectSection, ObjectSymbol};
|
||||
use std::collections::BTreeMap;
|
||||
use object::read::{File as ObjectFile, Object, ObjectSection};
|
||||
use std::mem::ManuallyDrop;
|
||||
use wasmtime_environ::obj::{try_parse_func_name, try_parse_trampoline_name};
|
||||
use wasmtime_environ::{FuncIndex, SignatureIndex};
|
||||
use wasmtime_runtime::{Mmap, VMFunctionBody};
|
||||
use wasmtime_runtime::Mmap;
|
||||
|
||||
struct CodeMemoryEntry {
|
||||
mmap: ManuallyDrop<Mmap>,
|
||||
@@ -38,42 +35,6 @@ impl Drop for CodeMemoryEntry {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CodeMemoryObjectAllocation<'a> {
|
||||
pub code_range: &'a mut [u8],
|
||||
funcs: BTreeMap<FuncIndex, (usize, usize)>,
|
||||
trampolines: BTreeMap<SignatureIndex, (usize, usize)>,
|
||||
}
|
||||
|
||||
impl<'a> CodeMemoryObjectAllocation<'a> {
|
||||
pub fn funcs_len(&self) -> usize {
|
||||
self.funcs.len()
|
||||
}
|
||||
|
||||
pub fn trampolines_len(&self) -> usize {
|
||||
self.trampolines.len()
|
||||
}
|
||||
|
||||
pub fn funcs(&'a self) -> impl Iterator<Item = (FuncIndex, &'a mut [VMFunctionBody])> + 'a {
|
||||
let buf = self.code_range as *const _ as *mut [u8];
|
||||
self.funcs.iter().map(move |(i, (start, len))| {
|
||||
(*i, unsafe {
|
||||
CodeMemory::view_as_mut_vmfunc_slice(&mut (*buf)[*start..*start + *len])
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
pub fn trampolines(
|
||||
&'a self,
|
||||
) -> impl Iterator<Item = (SignatureIndex, &'a mut [VMFunctionBody])> + 'a {
|
||||
let buf = self.code_range as *const _ as *mut [u8];
|
||||
self.trampolines.iter().map(move |(i, (start, len))| {
|
||||
(*i, unsafe {
|
||||
CodeMemory::view_as_mut_vmfunc_slice(&mut (*buf)[*start..*start + *len])
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory manager for executable code.
|
||||
pub struct CodeMemory {
|
||||
entries: Vec<CodeMemoryEntry>,
|
||||
@@ -132,39 +93,25 @@ impl CodeMemory {
|
||||
self.published = self.entries.len();
|
||||
}
|
||||
|
||||
/// Convert mut a slice from u8 to VMFunctionBody.
|
||||
fn view_as_mut_vmfunc_slice(slice: &mut [u8]) -> &mut [VMFunctionBody] {
|
||||
let byte_ptr: *mut [u8] = slice;
|
||||
let body_ptr = byte_ptr as *mut [VMFunctionBody];
|
||||
unsafe { &mut *body_ptr }
|
||||
}
|
||||
|
||||
/// Alternative to `allocate_for_object`, but when the object file isn't
|
||||
/// already parsed.
|
||||
pub fn allocate_for_object_unparsed<'a>(
|
||||
pub fn allocate_for_object_unparsed<'a, 'b>(
|
||||
&'a mut self,
|
||||
obj: &[u8],
|
||||
) -> Result<CodeMemoryObjectAllocation<'a>> {
|
||||
obj: &'b [u8],
|
||||
) -> Result<(&'a mut [u8], ObjectFile<'b>)> {
|
||||
let obj = ObjectFile::parse(obj)?;
|
||||
self.allocate_for_object(&obj)
|
||||
Ok((self.allocate_for_object(&obj)?, obj))
|
||||
}
|
||||
|
||||
/// Allocates and copies the ELF image code section into CodeMemory.
|
||||
/// Returns references to functions and trampolines defined there.
|
||||
pub fn allocate_for_object<'a>(
|
||||
&'a mut self,
|
||||
obj: &ObjectFile,
|
||||
) -> Result<CodeMemoryObjectAllocation<'a>> {
|
||||
pub fn allocate_for_object(&mut self, obj: &ObjectFile) -> Result<&mut [u8]> {
|
||||
let text_section = obj.section_by_name(".text").unwrap();
|
||||
let text_section_size = text_section.size() as usize;
|
||||
|
||||
if text_section_size == 0 {
|
||||
// No code in the image.
|
||||
return Ok(CodeMemoryObjectAllocation {
|
||||
code_range: &mut [],
|
||||
funcs: BTreeMap::new(),
|
||||
trampolines: BTreeMap::new(),
|
||||
});
|
||||
return Ok(&mut []);
|
||||
}
|
||||
|
||||
// Find the platform-specific unwind section, if present, which contains
|
||||
@@ -195,29 +142,6 @@ impl CodeMemory {
|
||||
);
|
||||
}
|
||||
|
||||
// Track locations of all defined functions and trampolines.
|
||||
let mut funcs = BTreeMap::new();
|
||||
let mut trampolines = BTreeMap::new();
|
||||
for sym in obj.symbols() {
|
||||
match sym.name() {
|
||||
Ok(name) => {
|
||||
if let Some(index) = try_parse_func_name(name) {
|
||||
let is_import = sym.section_index().is_none();
|
||||
if !is_import {
|
||||
funcs.insert(index, (sym.address() as usize, sym.size() as usize));
|
||||
}
|
||||
} else if let Some(index) = try_parse_trampoline_name(name) {
|
||||
trampolines.insert(index, (sym.address() as usize, sym.size() as usize));
|
||||
}
|
||||
}
|
||||
Err(_) => (),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(CodeMemoryObjectAllocation {
|
||||
code_range: &mut entry.mmap.as_mut_slice()[..text_section_size],
|
||||
funcs,
|
||||
trampolines,
|
||||
})
|
||||
Ok(&mut entry.mmap.as_mut_slice()[..text_section_size])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,9 +27,6 @@ pub fn create_gdbjit_image(
|
||||
}
|
||||
}
|
||||
|
||||
// let mut file = ::std::fs::File::create(::std::path::Path::new("test.o")).expect("file");
|
||||
// ::std::io::Write::write_all(&mut file, &bytes).expect("write");
|
||||
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
|
||||
@@ -8,9 +8,8 @@ use crate::debug::create_gdbjit_image;
|
||||
use crate::link::link_module;
|
||||
use crate::{MmapVec, ProfilingAgent};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use object::read::File;
|
||||
use object::write::{Object, StandardSegment};
|
||||
use object::{Object as _, ObjectSection, SectionKind};
|
||||
use object::{File, Object as _, ObjectSection, SectionKind};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
@@ -18,7 +17,8 @@ use thiserror::Error;
|
||||
use wasmtime_environ::{
|
||||
CompileError, DefinedFuncIndex, FunctionInfo, InstanceSignature, InstanceTypeIndex, Module,
|
||||
ModuleSignature, ModuleTranslation, ModuleTypeIndex, PrimaryMap, SignatureIndex,
|
||||
StackMapInformation, Tunables, WasmFuncType, ELF_WASMTIME_ADDRMAP, ELF_WASMTIME_TRAPS,
|
||||
StackMapInformation, Trampoline, Tunables, WasmFuncType, ELF_WASMTIME_ADDRMAP,
|
||||
ELF_WASMTIME_TRAPS,
|
||||
};
|
||||
use wasmtime_runtime::{GdbJitImageRegistration, InstantiationError, VMFunctionBody, VMTrampoline};
|
||||
|
||||
@@ -80,6 +80,10 @@ pub struct CompiledModuleInfo {
|
||||
/// Metadata about each compiled function.
|
||||
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
|
||||
|
||||
/// The trampolines compiled into the text section and their start/length
|
||||
/// relative to the start of the text section.
|
||||
trampolines: Vec<Trampoline>,
|
||||
|
||||
/// General compilation metadata.
|
||||
meta: Metadata,
|
||||
}
|
||||
@@ -124,6 +128,7 @@ pub fn finish_compile(
|
||||
translation: ModuleTranslation<'_>,
|
||||
mut obj: Object,
|
||||
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
|
||||
trampolines: Vec<Trampoline>,
|
||||
tunables: &Tunables,
|
||||
) -> Result<(MmapVec, CompiledModuleInfo)> {
|
||||
let ModuleTranslation {
|
||||
@@ -192,6 +197,7 @@ pub fn finish_compile(
|
||||
let info = CompiledModuleInfo {
|
||||
module,
|
||||
funcs,
|
||||
trampolines,
|
||||
meta: Metadata {
|
||||
native_debug_info_present: tunables.generate_native_debuginfo,
|
||||
has_unparsed_debuginfo,
|
||||
@@ -221,10 +227,6 @@ pub fn finish_compile(
|
||||
}
|
||||
}
|
||||
|
||||
struct FinishedFunctions(PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>);
|
||||
unsafe impl Send for FinishedFunctions {}
|
||||
unsafe impl Sync for FinishedFunctions {}
|
||||
|
||||
/// This is intended to mirror the type tables in `wasmtime_environ`, except that
|
||||
/// it doesn't store the native signatures which are no longer needed past compilation.
|
||||
#[derive(Serialize, Deserialize)]
|
||||
@@ -259,10 +261,9 @@ pub struct CompiledModule {
|
||||
mmap: MmapVec,
|
||||
module: Arc<Module>,
|
||||
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
|
||||
trampolines: Vec<Trampoline>,
|
||||
meta: Metadata,
|
||||
code: Arc<ModuleCode>,
|
||||
finished_functions: FinishedFunctions,
|
||||
trampolines: Vec<(SignatureIndex, VMTrampoline)>,
|
||||
}
|
||||
|
||||
impl CompiledModule {
|
||||
@@ -305,27 +306,27 @@ impl CompiledModule {
|
||||
};
|
||||
let module = Arc::new(info.module);
|
||||
let funcs = info.funcs;
|
||||
let trampolines = info.trampolines;
|
||||
let wasm_data = subslice_range(section(ELF_WASM_DATA)?, &mmap);
|
||||
let address_map_data = subslice_range(section(ELF_WASMTIME_ADDRMAP)?, &mmap);
|
||||
let trap_data = subslice_range(section(ELF_WASMTIME_TRAPS)?, &mmap);
|
||||
|
||||
// Allocate all of the compiled functions into executable memory,
|
||||
// copying over their contents.
|
||||
let (code_memory, code_range, finished_functions, trampolines) =
|
||||
build_code_memory(&obj, &module).map_err(|message| {
|
||||
SetupError::Instantiate(InstantiationError::Resource(anyhow::anyhow!(
|
||||
"failed to build code memory for functions: {}",
|
||||
message
|
||||
)))
|
||||
})?;
|
||||
let (code_memory, code_range) = build_code_memory(&obj).map_err(|message| {
|
||||
SetupError::Instantiate(InstantiationError::Resource(anyhow::anyhow!(
|
||||
"failed to build code memory for functions: {}",
|
||||
message
|
||||
)))
|
||||
})?;
|
||||
|
||||
let finished_functions = FinishedFunctions(finished_functions);
|
||||
let start = code_range.0 as usize;
|
||||
let end = start + code_range.1;
|
||||
|
||||
let mut ret = Self {
|
||||
meta: info.meta,
|
||||
funcs,
|
||||
trampolines,
|
||||
module,
|
||||
mmap,
|
||||
wasm_data,
|
||||
@@ -336,8 +337,6 @@ impl CompiledModule {
|
||||
code_memory,
|
||||
dbg_jit_registration: None,
|
||||
}),
|
||||
finished_functions,
|
||||
trampolines,
|
||||
};
|
||||
ret.register_debug_and_profiling(profiler)?;
|
||||
|
||||
@@ -400,6 +399,11 @@ impl CompiledModule {
|
||||
&self.module
|
||||
}
|
||||
|
||||
/// Returns the `FunctionInfo` map for all defined functions.
|
||||
pub fn functions(&self) -> &PrimaryMap<DefinedFuncIndex, FunctionInfo> {
|
||||
&self.funcs
|
||||
}
|
||||
|
||||
/// Return a reference to a mutable module (if possible).
|
||||
pub fn module_mut(&mut self) -> Option<&mut Module> {
|
||||
Arc::get_mut(&mut self.module)
|
||||
@@ -407,13 +411,28 @@ impl CompiledModule {
|
||||
|
||||
/// Returns the map of all finished JIT functions compiled for this module
|
||||
#[inline]
|
||||
pub fn finished_functions(&self) -> &PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]> {
|
||||
&self.finished_functions.0
|
||||
pub fn finished_functions(
|
||||
&self,
|
||||
) -> impl ExactSizeIterator<Item = (DefinedFuncIndex, *mut [VMFunctionBody])> + '_ {
|
||||
self.funcs.iter().map(move |(i, info)| {
|
||||
(
|
||||
i,
|
||||
std::ptr::slice_from_raw_parts_mut(
|
||||
(self.code.range.0 + info.start as usize) as *mut VMFunctionBody,
|
||||
info.length as usize,
|
||||
),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the per-signature trampolines for this module.
|
||||
pub fn trampolines(&self) -> &[(SignatureIndex, VMTrampoline)] {
|
||||
&self.trampolines
|
||||
pub fn trampolines(&self) -> impl Iterator<Item = (SignatureIndex, VMTrampoline)> + '_ {
|
||||
self.trampolines.iter().map(move |info| {
|
||||
(info.signature, unsafe {
|
||||
let ptr = self.code.range.0 + info.start as usize;
|
||||
std::mem::transmute::<usize, VMTrampoline>(ptr)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the stack map information for all functions defined in this
|
||||
@@ -425,25 +444,24 @@ impl CompiledModule {
|
||||
&self,
|
||||
) -> impl Iterator<Item = (*mut [VMFunctionBody], &[StackMapInformation])> {
|
||||
self.finished_functions()
|
||||
.values()
|
||||
.copied()
|
||||
.map(|(_, f)| f)
|
||||
.zip(self.funcs.values().map(|f| f.stack_maps.as_slice()))
|
||||
}
|
||||
|
||||
/// Lookups a defined function by a program counter value.
|
||||
///
|
||||
/// Returns the defined function index and the relative address of
|
||||
/// `text_offfset` within the function itself.
|
||||
/// `text_offset` within the function itself.
|
||||
pub fn func_by_text_offset(&self, text_offset: usize) -> Option<(DefinedFuncIndex, u32)> {
|
||||
let functions = self.finished_functions();
|
||||
let text_offset = text_offset as u64;
|
||||
|
||||
let text_section = self.code().range().0;
|
||||
let pc = text_section + text_offset;
|
||||
let index = match functions.binary_search_values_by_key(&pc, |body| unsafe {
|
||||
debug_assert!(!(**body).is_empty());
|
||||
// Return the inclusive "end" of the function
|
||||
(**body).as_ptr() as usize + (**body).len() - 1
|
||||
}) {
|
||||
let index = match self
|
||||
.funcs
|
||||
.binary_search_values_by_key(&text_offset, |info| {
|
||||
debug_assert!(info.length > 0);
|
||||
// Return the inclusive "end" of the function
|
||||
info.start + u64::from(info.length) - 1
|
||||
}) {
|
||||
Ok(k) => {
|
||||
// Exact match, pc is at the end of this function
|
||||
k
|
||||
@@ -456,18 +474,15 @@ impl CompiledModule {
|
||||
}
|
||||
};
|
||||
|
||||
let body = functions.get(index)?;
|
||||
let (start, end) = unsafe {
|
||||
let ptr = (**body).as_ptr();
|
||||
let len = (**body).len();
|
||||
(ptr as usize, ptr as usize + len)
|
||||
};
|
||||
let body = self.funcs.get(index)?;
|
||||
let start = body.start;
|
||||
let end = body.start + u64::from(body.length);
|
||||
|
||||
if pc < start || end < pc {
|
||||
if text_offset < start || end < text_offset {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((index, (text_offset - (start - text_section)) as u32))
|
||||
Some((index, (text_offset - body.start) as u32))
|
||||
}
|
||||
|
||||
/// Gets the function information for a given function index.
|
||||
@@ -539,55 +554,19 @@ impl<'a> SymbolizeContext<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_code_memory(
|
||||
obj: &File,
|
||||
module: &Module,
|
||||
) -> Result<(
|
||||
CodeMemory,
|
||||
(*const u8, usize),
|
||||
PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
|
||||
Vec<(SignatureIndex, VMTrampoline)>,
|
||||
)> {
|
||||
fn build_code_memory(obj: &File) -> Result<(CodeMemory, (*const u8, usize))> {
|
||||
let mut code_memory = CodeMemory::new();
|
||||
|
||||
let allocation = code_memory.allocate_for_object(obj)?;
|
||||
|
||||
// Populate the finished functions from the allocation
|
||||
let mut finished_functions = PrimaryMap::with_capacity(allocation.funcs_len());
|
||||
for (i, fat_ptr) in allocation.funcs() {
|
||||
let start = fat_ptr.as_ptr() as usize;
|
||||
let fat_ptr: *mut [VMFunctionBody] = fat_ptr;
|
||||
// Assert that the function bodies are pushed in sort order
|
||||
// This property is relied upon to search for functions by PC values
|
||||
assert!(
|
||||
start
|
||||
> finished_functions
|
||||
.last()
|
||||
.map(|f: &*mut [VMFunctionBody]| unsafe { (**f).as_ptr() as usize })
|
||||
.unwrap_or(0)
|
||||
);
|
||||
assert_eq!(
|
||||
Some(finished_functions.push(fat_ptr)),
|
||||
module.defined_func_index(i)
|
||||
);
|
||||
}
|
||||
link_module(obj, allocation);
|
||||
|
||||
// Populate the trampolines from the allocation
|
||||
let mut trampolines = Vec::with_capacity(allocation.trampolines_len());
|
||||
for (i, fat_ptr) in allocation.trampolines() {
|
||||
let fnptr =
|
||||
unsafe { std::mem::transmute::<*const VMFunctionBody, VMTrampoline>(fat_ptr.as_ptr()) };
|
||||
trampolines.push((i, fnptr));
|
||||
}
|
||||
|
||||
link_module(obj, allocation.code_range);
|
||||
|
||||
let code_range = (allocation.code_range.as_ptr(), allocation.code_range.len());
|
||||
let code_range = (allocation.as_ptr(), allocation.len());
|
||||
|
||||
// Make all code compiled thus far executable.
|
||||
code_memory.publish();
|
||||
|
||||
Ok((code_memory, code_range, finished_functions, trampolines))
|
||||
Ok((code_memory, code_range))
|
||||
}
|
||||
|
||||
/// Returns the range of `inner` within `outer`, such that `outer[range]` is the
|
||||
|
||||
@@ -290,7 +290,7 @@ impl State {
|
||||
let tid = pid; // ThreadId does appear to track underlying thread. Using PID.
|
||||
|
||||
for (idx, func) in module.finished_functions() {
|
||||
let (addr, len) = unsafe { ((**func).as_ptr() as *const u8, (**func).len()) };
|
||||
let (addr, len) = unsafe { ((*func).as_ptr() as *const u8, (*func).len()) };
|
||||
if let Some(img) = &dbg_image {
|
||||
if let Err(err) = self.dump_from_debug_image(img, "wasm", addr, len, pid, tid) {
|
||||
println!(
|
||||
|
||||
@@ -121,7 +121,7 @@ impl State {
|
||||
let global_module_id = MODULE_ID.fetch_add(1, atomic::Ordering::SeqCst);
|
||||
|
||||
for (idx, func) in module.finished_functions() {
|
||||
let (addr, len) = unsafe { ((**func).as_ptr() as *const u8, (**func).len()) };
|
||||
let (addr, len) = unsafe { ((*func).as_ptr() as *const u8, (*func).len()) };
|
||||
let default_filename = "wasm_file";
|
||||
let default_module_name = String::from("wasm_module");
|
||||
let module_name = module
|
||||
|
||||
Reference in New Issue
Block a user