Files
wasmtime/crates/jit/src/instantiate.rs
Alex Crichton 925b771d2d Remove some dead code from wasmtime-jit (#3225)
Looks like nothing is actually using these methods, so let's remove
them.
2021-08-23 14:35:39 -05:00

500 lines
17 KiB
Rust

//! Define the `instantiate` function, which takes a byte array containing an
//! encoded wasm module and returns a live wasm instance. Also, define
//! `CompiledModule` to allow compiling and instantiating to be done as separate
//! steps.
use crate::code_memory::CodeMemory;
use crate::debug::create_gdbjit_image;
use crate::link::link_module;
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::ops::Range;
use std::sync::Arc;
use thiserror::Error;
use wasmtime_environ::{
CompileError, DebugInfoData, DefinedFuncIndex, FunctionInfo, InstanceSignature,
InstanceTypeIndex, Module, ModuleSignature, ModuleTranslation, ModuleTypeIndex, PrimaryMap,
SignatureIndex, StackMapInformation, Tunables, WasmFuncType,
};
use wasmtime_profiling::ProfilingAgent;
use wasmtime_runtime::{GdbJitImageRegistration, InstantiationError, VMFunctionBody, VMTrampoline};
/// An error condition while setting up a wasm instance, be it validation,
/// compilation, or instantiation.
#[derive(Error, Debug)]
pub enum SetupError {
/// The module did not pass validation.
#[error("Validation error: {0}")]
Validate(String),
/// A wasm translation error occurred.
#[error("WebAssembly failed to compile")]
Compile(#[from] CompileError),
/// Some runtime resource was unavailable or insufficient, or the start function
/// trapped.
#[error("Instantiation failed during setup")]
Instantiate(#[from] InstantiationError),
/// Debug information generation error occurred.
#[error("Debug information error")]
DebugInfo(#[from] anyhow::Error),
}
/// Contains all compilation artifacts.
#[derive(Serialize, Deserialize)]
pub struct CompilationArtifacts {
/// Module metadata.
#[serde(with = "arc_serde")]
module: Arc<Module>,
/// ELF image with functions code.
obj: Box<[u8]>,
/// Descriptions of compiled functions
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
/// Whether or not native debug information is available in `obj`
native_debug_info_present: bool,
/// Whether or not the original wasm module contained debug information that
/// we skipped and did not parse.
has_unparsed_debuginfo: bool,
/// Debug information found in the wasm file, used for symbolicating
/// backtraces.
debug_info: Option<DebugInfo>,
}
#[derive(Serialize, Deserialize)]
struct DebugInfo {
data: Box<[u8]>,
code_section_offset: u64,
debug_abbrev: Range<usize>,
debug_addr: Range<usize>,
debug_aranges: Range<usize>,
debug_info: Range<usize>,
debug_line: Range<usize>,
debug_line_str: Range<usize>,
debug_ranges: Range<usize>,
debug_rnglists: Range<usize>,
debug_str: Range<usize>,
debug_str_offsets: Range<usize>,
}
impl CompilationArtifacts {
/// Creates a new `CompilationArtifacts` from the final results of
/// compilation.
pub fn new(
translation: ModuleTranslation<'_>,
obj: Vec<u8>,
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
tunables: &Tunables,
) -> CompilationArtifacts {
let ModuleTranslation {
module,
debuginfo,
has_unparsed_debuginfo,
..
} = translation;
CompilationArtifacts {
module: Arc::new(module),
obj: obj.into_boxed_slice(),
funcs,
native_debug_info_present: tunables.generate_native_debuginfo,
debug_info: if tunables.parse_wasm_debuginfo {
Some(debuginfo.into())
} else {
None
},
has_unparsed_debuginfo,
}
}
}
struct FinishedFunctions(PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>);
unsafe impl Send for FinishedFunctions {}
unsafe impl Sync for FinishedFunctions {}
/// This is intended to mirror the type tables in `wasmtime_environ`, except that
/// it doesn't store the native signatures which are no longer needed past compilation.
#[derive(Serialize, Deserialize)]
#[allow(missing_docs)]
pub struct TypeTables {
pub wasm_signatures: PrimaryMap<SignatureIndex, WasmFuncType>,
pub module_signatures: PrimaryMap<ModuleTypeIndex, ModuleSignature>,
pub instance_signatures: PrimaryMap<InstanceTypeIndex, InstanceSignature>,
}
/// Container for data needed for an Instance function to exist.
pub struct ModuleCode {
range: (usize, usize),
#[allow(dead_code)]
code_memory: CodeMemory,
#[allow(dead_code)]
dbg_jit_registration: Option<GdbJitImageRegistration>,
}
impl ModuleCode {
/// Gets the [begin, end) range of the module's code.
pub fn range(&self) -> (usize, usize) {
self.range
}
}
/// A compiled wasm module, ready to be instantiated.
pub struct CompiledModule {
artifacts: CompilationArtifacts,
code: Arc<ModuleCode>,
finished_functions: FinishedFunctions,
trampolines: Vec<(SignatureIndex, VMTrampoline)>,
}
impl CompiledModule {
/// Creates `CompiledModule` directly from `CompilationArtifacts`.
pub fn from_artifacts(
artifacts: CompilationArtifacts,
profiler: &dyn ProfilingAgent,
) -> Result<Arc<Self>, SetupError> {
// Allocate all of the compiled functions into executable memory,
// copying over their contents.
let (code_memory, code_range, finished_functions, trampolines) =
build_code_memory(&artifacts.obj, &artifacts.module).map_err(|message| {
SetupError::Instantiate(InstantiationError::Resource(anyhow::anyhow!(
"failed to build code memory for functions: {}",
message
)))
})?;
// Register GDB JIT images; initialize profiler and load the wasm module.
let dbg_jit_registration = if artifacts.native_debug_info_present {
let bytes = create_dbg_image(
artifacts.obj.to_vec(),
code_range,
&artifacts.module,
&finished_functions,
)?;
profiler.module_load(&artifacts.module, &finished_functions, Some(&bytes));
let reg = GdbJitImageRegistration::register(bytes);
Some(reg)
} else {
profiler.module_load(&artifacts.module, &finished_functions, None);
None
};
let finished_functions = FinishedFunctions(finished_functions);
let start = code_range.0 as usize;
let end = start + code_range.1;
Ok(Arc::new(Self {
artifacts,
code: Arc::new(ModuleCode {
range: (start, end),
code_memory,
dbg_jit_registration,
}),
finished_functions,
trampolines,
}))
}
/// Extracts `CompilationArtifacts` from the compiled module.
pub fn compilation_artifacts(&self) -> &CompilationArtifacts {
&self.artifacts
}
/// Return a reference-counting pointer to a module.
pub fn module(&self) -> &Arc<Module> {
&self.artifacts.module
}
/// Return a reference to a mutable module (if possible).
pub fn module_mut(&mut self) -> Option<&mut Module> {
Arc::get_mut(&mut self.artifacts.module)
}
/// Returns the map of all finished JIT functions compiled for this module
#[inline]
pub fn finished_functions(&self) -> &PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]> {
&self.finished_functions.0
}
/// Returns the per-signature trampolines for this module.
pub fn trampolines(&self) -> &[(SignatureIndex, VMTrampoline)] {
&self.trampolines
}
/// Returns the stack map information for all functions defined in this
/// module.
///
/// The iterator returned iterates over the span of the compiled function in
/// memory with the stack maps associated with those bytes.
pub fn stack_maps(
&self,
) -> impl Iterator<Item = (*mut [VMFunctionBody], &[StackMapInformation])> {
self.finished_functions().values().copied().zip(
self.artifacts
.funcs
.values()
.map(|f| f.stack_maps.as_slice()),
)
}
/// Lookups a defined function by a program counter value.
///
/// Returns the defined function index, the start address, and the end address (exclusive).
pub fn func_by_pc(&self, pc: usize) -> Option<(DefinedFuncIndex, usize, usize)> {
let functions = self.finished_functions();
let index = match functions.binary_search_values_by_key(&pc, |body| unsafe {
debug_assert!(!(**body).is_empty());
// Return the inclusive "end" of the function
(**body).as_ptr() as usize + (**body).len() - 1
}) {
Ok(k) => {
// Exact match, pc is at the end of this function
k
}
Err(k) => {
// Not an exact match, k is where `pc` would be "inserted"
// Since we key based on the end, function `k` might contain `pc`,
// so we'll validate on the range check below
k
}
};
let body = functions.get(index)?;
let (start, end) = unsafe {
let ptr = (**body).as_ptr();
let len = (**body).len();
(ptr as usize, ptr as usize + len)
};
if pc < start || end < pc {
return None;
}
Some((index, start, end))
}
/// Gets the function information for a given function index.
pub fn func_info(&self, index: DefinedFuncIndex) -> &FunctionInfo {
self.artifacts
.funcs
.get(index)
.expect("defined function should be present")
}
/// Returns module's JIT code.
pub fn code(&self) -> &Arc<ModuleCode> {
&self.code
}
/// Creates a new symbolication context which can be used to further
/// symbolicate stack traces.
///
/// Basically this makes a thing which parses debuginfo and can tell you
/// what filename and line number a wasm pc comes from.
pub fn symbolize_context(&self) -> Result<Option<SymbolizeContext>, gimli::Error> {
use gimli::EndianSlice;
let info = match &self.artifacts.debug_info {
Some(info) => info,
None => return Ok(None),
};
// For now we clone the data into the `SymbolizeContext`, but if this
// becomes prohibitive we could always `Arc` it with our own allocation
// here.
let data = info.data.clone();
let endian = gimli::LittleEndian;
let cx = addr2line::Context::from_sections(
EndianSlice::new(&data[info.debug_abbrev.clone()], endian).into(),
EndianSlice::new(&data[info.debug_addr.clone()], endian).into(),
EndianSlice::new(&data[info.debug_aranges.clone()], endian).into(),
EndianSlice::new(&data[info.debug_info.clone()], endian).into(),
EndianSlice::new(&data[info.debug_line.clone()], endian).into(),
EndianSlice::new(&data[info.debug_line_str.clone()], endian).into(),
EndianSlice::new(&data[info.debug_ranges.clone()], endian).into(),
EndianSlice::new(&data[info.debug_rnglists.clone()], endian).into(),
EndianSlice::new(&data[info.debug_str.clone()], endian).into(),
EndianSlice::new(&data[info.debug_str_offsets.clone()], endian).into(),
EndianSlice::new(&[], endian),
)?;
Ok(Some(SymbolizeContext {
// See comments on `SymbolizeContext` for why we do this static
// lifetime promotion.
inner: unsafe {
std::mem::transmute::<Addr2LineContext<'_>, Addr2LineContext<'static>>(cx)
},
code_section_offset: info.code_section_offset,
_data: data,
}))
}
/// Returns whether the original wasm module had unparsed debug information
/// based on the tunables configuration.
pub fn has_unparsed_debuginfo(&self) -> bool {
self.artifacts.has_unparsed_debuginfo
}
}
type Addr2LineContext<'a> = addr2line::Context<gimli::EndianSlice<'a, gimli::LittleEndian>>;
/// A context which contains dwarf debug information to translate program
/// counters back to filenames and line numbers.
pub struct SymbolizeContext {
// Note the `'static` lifetime on `inner`. That's actually a bunch of slices
// which point back into the `_data` field. We currently unsafely manage
// this by saying that when inside the struct it's `'static` (since we own
// the referenced data just next to it) and we only loan out borrowed
// references.
_data: Box<[u8]>,
inner: Addr2LineContext<'static>,
code_section_offset: u64,
}
impl SymbolizeContext {
/// Returns access to the [`addr2line::Context`] which can be used to query
/// frame information with.
pub fn addr2line(&self) -> &Addr2LineContext<'_> {
// Here we demote our synthetic `'static` lifetime which doesn't
// actually exist back to a lifetime that's tied to `&self`, which
// should be safe.
unsafe {
std::mem::transmute::<&Addr2LineContext<'static>, &Addr2LineContext<'_>>(&self.inner)
}
}
/// Returns the offset of the code section in the original wasm file, used
/// to calculate lookup values into the DWARF.
pub fn code_section_offset(&self) -> u64 {
self.code_section_offset
}
}
fn create_dbg_image(
obj: Vec<u8>,
code_range: (*const u8, usize),
module: &Module,
finished_functions: &PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
) -> Result<Vec<u8>, SetupError> {
let funcs = finished_functions
.values()
.map(|allocated: &*mut [VMFunctionBody]| (*allocated) as *const u8)
.collect::<Vec<_>>();
create_gdbjit_image(obj, code_range, module.num_imported_funcs, &funcs)
.map_err(SetupError::DebugInfo)
}
fn build_code_memory(
obj: &[u8],
module: &Module,
) -> Result<(
CodeMemory,
(*const u8, usize),
PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
Vec<(SignatureIndex, VMTrampoline)>,
)> {
let mut code_memory = CodeMemory::new();
let allocation = code_memory.allocate_for_object(obj)?;
// Populate the finished functions from the allocation
let mut finished_functions = PrimaryMap::with_capacity(allocation.funcs_len());
for (i, fat_ptr) in allocation.funcs() {
let start = fat_ptr.as_ptr() as usize;
let fat_ptr: *mut [VMFunctionBody] = fat_ptr;
// Assert that the function bodies are pushed in sort order
// This property is relied upon to search for functions by PC values
assert!(
start
> finished_functions
.last()
.map(|f: &*mut [VMFunctionBody]| unsafe { (**f).as_ptr() as usize })
.unwrap_or(0)
);
assert_eq!(
Some(finished_functions.push(fat_ptr)),
module.defined_func_index(i)
);
}
// Populate the trampolines from the allocation
let mut trampolines = Vec::with_capacity(allocation.trampolines_len());
for (i, fat_ptr) in allocation.trampolines() {
let fnptr =
unsafe { std::mem::transmute::<*const VMFunctionBody, VMTrampoline>(fat_ptr.as_ptr()) };
trampolines.push((i, fnptr));
}
link_module(
&allocation.obj,
&module,
allocation.code_range,
&finished_functions,
);
let code_range = (allocation.code_range.as_ptr(), allocation.code_range.len());
// Make all code compiled thus far executable.
code_memory.publish();
Ok((code_memory, code_range, finished_functions, trampolines))
}
impl From<DebugInfoData<'_>> for DebugInfo {
fn from(raw: DebugInfoData<'_>) -> DebugInfo {
use gimli::Section;
let mut data = Vec::new();
let mut push = |section: &[u8]| {
data.extend_from_slice(section);
data.len() - section.len()..data.len()
};
let debug_abbrev = push(raw.dwarf.debug_abbrev.reader().slice());
let debug_addr = push(raw.dwarf.debug_addr.reader().slice());
let debug_aranges = push(raw.dwarf.debug_aranges.reader().slice());
let debug_info = push(raw.dwarf.debug_info.reader().slice());
let debug_line = push(raw.dwarf.debug_line.reader().slice());
let debug_line_str = push(raw.dwarf.debug_line_str.reader().slice());
let debug_ranges = push(raw.debug_ranges.reader().slice());
let debug_rnglists = push(raw.debug_rnglists.reader().slice());
let debug_str = push(raw.dwarf.debug_str.reader().slice());
let debug_str_offsets = push(raw.dwarf.debug_str_offsets.reader().slice());
DebugInfo {
data: data.into(),
debug_abbrev,
debug_addr,
debug_aranges,
debug_info,
debug_line,
debug_line_str,
debug_ranges,
debug_rnglists,
debug_str,
debug_str_offsets,
code_section_offset: raw.wasm_file.code_section_offset,
}
}
}
mod arc_serde {
use super::Arc;
use serde::{de::Deserialize, ser::Serialize, Deserializer, Serializer};
pub(super) fn serialize<S, T>(arc: &Arc<T>, ser: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
T: Serialize,
{
(**arc).serialize(ser)
}
pub(super) fn deserialize<'de, D, T>(de: D) -> Result<Arc<T>, D::Error>
where
D: Deserializer<'de>,
T: Deserialize<'de>,
{
Ok(Arc::new(T::deserialize(de)?))
}
}