Implement lowered-then-lifted functions (#4327)
* Implement lowered-then-lifted functions This commit is a few features bundled into one, culminating in the implementation of lowered-then-lifted functions for the component model. It's probably not going to be used all that often but this is possible within a valid component so Wasmtime needs to do something relatively reasonable. The main things implemented in this commit are: * Component instances are now assigned a `RuntimeComponentInstanceIndex` to differentiate each one. This will be used in the future to detect fusion (one instance lowering a function from another instance). For now it's used to allocate separate `VMComponentFlags` for each internal component instance. * The `CoreExport<FuncIndex>` of lowered functions was changed to a `CoreDef` since technically a lowered function can use another lowered function as the callee. This ended up being not too difficult to plumb through as everything else was already in place. * A need arose to compile host-to-wasm trampolines which weren't already present. Currently wasm in a component is always entered through a host-to-wasm trampoline but core wasm modules are the source of all the trampolines. In the case of a lowered-then-lifted function there may not actually be any core wasm modules, so component objects now contain necessary trampolines not otherwise provided by the core wasm objects. This feature required splitting a new function into the `Compiler` trait for creating a host-to-wasm trampoline. After doing this core wasm compilation was also updated to leverage this which further enabled compiling trampolines in parallel as opposed to the previous synchronous compilation. * Review comments
This commit is contained in:
@@ -2,13 +2,14 @@ use crate::signatures::SignatureCollection;
|
||||
use crate::{Engine, Module};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::fs;
|
||||
use std::ops::Range;
|
||||
use std::path::Path;
|
||||
use std::ptr::NonNull;
|
||||
use std::sync::Arc;
|
||||
use wasmtime_environ::component::{
|
||||
ComponentTypes, GlobalInitializer, LoweredIndex, StaticModuleIndex, TrampolineInfo, Translator,
|
||||
ComponentTypes, GlobalInitializer, LoweredIndex, LoweringInfo, StaticModuleIndex, Translator,
|
||||
};
|
||||
use wasmtime_environ::PrimaryMap;
|
||||
use wasmtime_jit::CodeMemory;
|
||||
@@ -52,7 +53,7 @@ struct ComponentInner {
|
||||
|
||||
/// Where trampolines are located within the `text` section of
|
||||
/// `trampoline_obj`.
|
||||
trampolines: PrimaryMap<LoweredIndex, TrampolineInfo>,
|
||||
trampolines: PrimaryMap<LoweredIndex, LoweringInfo>,
|
||||
}
|
||||
|
||||
impl Component {
|
||||
@@ -116,6 +117,40 @@ impl Component {
|
||||
.context("failed to parse WebAssembly module")?;
|
||||
let types = Arc::new(types.finish());
|
||||
|
||||
// All lowered functions will require a trampoline to be available in
|
||||
// case they're used when entering wasm. For example a lowered function
|
||||
// could be immediately lifted in which case we'll need a trampoline to
|
||||
// call that lowered function.
|
||||
//
|
||||
// Most of the time trampolines can come from the core wasm modules
|
||||
// since lifted functions come from core wasm. For these esoteric cases
|
||||
// though we may have to compile trampolines specifically into the
|
||||
// component object as well in case core wasm doesn't provide the
|
||||
// necessary trampoline.
|
||||
let lowerings = component
|
||||
.initializers
|
||||
.iter()
|
||||
.filter_map(|init| match init {
|
||||
GlobalInitializer::LowerImport(i) => Some(i),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let required_trampolines = lowerings
|
||||
.iter()
|
||||
.map(|l| l.canonical_abi)
|
||||
.collect::<HashSet<_>>();
|
||||
let provided_trampolines = modules
|
||||
.iter()
|
||||
.flat_map(|(_, m)| m.exported_signatures.iter().copied())
|
||||
.collect::<HashSet<_>>();
|
||||
let mut trampolines_to_compile = required_trampolines
|
||||
.difference(&provided_trampolines)
|
||||
.collect::<Vec<_>>();
|
||||
// Ensure a deterministically compiled artifact by sorting this list
|
||||
// which was otherwise created with nondeterministically ordered hash
|
||||
// tables.
|
||||
trampolines_to_compile.sort();
|
||||
|
||||
let (static_modules, trampolines) = engine.join_maybe_parallel(
|
||||
// In one (possibly) parallel task all the modules found within this
|
||||
// component are compiled. Note that this will further parallelize
|
||||
@@ -139,28 +174,40 @@ impl Component {
|
||||
// In another (possibly) parallel task we compile lowering
|
||||
// trampolines necessary found in the component.
|
||||
|| -> Result<_> {
|
||||
let lowerings = component
|
||||
.initializers
|
||||
.iter()
|
||||
.filter_map(|init| match init {
|
||||
GlobalInitializer::LowerImport(i) => Some(i),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let compiler = engine.compiler().component_compiler();
|
||||
let trampolines = engine
|
||||
.run_maybe_parallel(lowerings, |lowering| {
|
||||
compiler.compile_lowered_trampoline(&component, lowering, &types)
|
||||
})?
|
||||
.into_iter()
|
||||
.collect();
|
||||
let compiler = engine.compiler();
|
||||
let (lowered_trampolines, core_trampolines) = engine.join_maybe_parallel(
|
||||
// Compile all the lowered trampolines here which implement
|
||||
// `canon lower` and are used to exit wasm into the host.
|
||||
|| -> Result<_> {
|
||||
Ok(engine
|
||||
.run_maybe_parallel(lowerings, |lowering| {
|
||||
compiler
|
||||
.component_compiler()
|
||||
.compile_lowered_trampoline(&component, lowering, &types)
|
||||
})?
|
||||
.into_iter()
|
||||
.collect())
|
||||
},
|
||||
// Compile all entry host-to-wasm trampolines here that
|
||||
// aren't otherwise provided by core wasm modules.
|
||||
|| -> Result<_> {
|
||||
engine.run_maybe_parallel(trampolines_to_compile.clone(), |i| {
|
||||
let ty = &types[*i];
|
||||
Ok((*i, compiler.compile_host_to_wasm_trampoline(ty)?))
|
||||
})
|
||||
},
|
||||
);
|
||||
let mut obj = engine.compiler().object()?;
|
||||
let trampolines = compiler.emit_obj(trampolines, &mut obj)?;
|
||||
let trampolines = compiler.component_compiler().emit_obj(
|
||||
lowered_trampolines?,
|
||||
core_trampolines?,
|
||||
&mut obj,
|
||||
)?;
|
||||
Ok((trampolines, wasmtime_jit::mmap_vec_from_obj(obj)?))
|
||||
},
|
||||
);
|
||||
let static_modules = static_modules?;
|
||||
let (trampolines, trampoline_obj) = trampolines?;
|
||||
let ((lowering_trampolines, core_trampolines), trampoline_obj) = trampolines?;
|
||||
let mut trampoline_obj = CodeMemory::new(trampoline_obj);
|
||||
let code = trampoline_obj.publish()?;
|
||||
let text = wasmtime_jit::subslice_range(code.text, code.mmap);
|
||||
@@ -184,6 +231,13 @@ impl Component {
|
||||
vmtrampolines.insert(idx, trampoline);
|
||||
}
|
||||
}
|
||||
for (signature, trampoline) in trampolines_to_compile.iter().zip(core_trampolines) {
|
||||
vmtrampolines.insert(**signature, unsafe {
|
||||
let ptr =
|
||||
code.text[trampoline.start as usize..][..trampoline.length as usize].as_ptr();
|
||||
std::mem::transmute::<*const u8, wasmtime_runtime::VMTrampoline>(ptr)
|
||||
});
|
||||
}
|
||||
|
||||
// FIXME: for the same reason as above where each module is
|
||||
// re-registering everything this should only be registered once. This
|
||||
@@ -202,7 +256,7 @@ impl Component {
|
||||
signatures,
|
||||
trampoline_obj,
|
||||
text,
|
||||
trampolines,
|
||||
trampolines: lowering_trampolines,
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -5,8 +5,9 @@ use anyhow::{Context, Result};
|
||||
use std::mem::MaybeUninit;
|
||||
use std::ptr::NonNull;
|
||||
use std::sync::Arc;
|
||||
use wasmtime_environ::component::{CanonicalOptions, ComponentTypes, CoreExport, TypeFuncIndex};
|
||||
use wasmtime_environ::FuncIndex;
|
||||
use wasmtime_environ::component::{
|
||||
CanonicalOptions, ComponentTypes, CoreDef, RuntimeComponentInstanceIndex, TypeFuncIndex,
|
||||
};
|
||||
use wasmtime_runtime::{Export, ExportFunction, VMTrampoline};
|
||||
|
||||
const MAX_STACK_PARAMS: usize = 16;
|
||||
@@ -82,6 +83,7 @@ pub struct FuncData {
|
||||
types: Arc<ComponentTypes>,
|
||||
options: Options,
|
||||
instance: Instance,
|
||||
component_instance: RuntimeComponentInstanceIndex,
|
||||
post_return: Option<(ExportFunction, VMTrampoline)>,
|
||||
post_return_arg: Option<ValRaw>,
|
||||
}
|
||||
@@ -92,10 +94,10 @@ impl Func {
|
||||
instance: &Instance,
|
||||
data: &InstanceData,
|
||||
ty: TypeFuncIndex,
|
||||
func: &CoreExport<FuncIndex>,
|
||||
func: &CoreDef,
|
||||
options: &CanonicalOptions,
|
||||
) -> Func {
|
||||
let export = match data.lookup_export(store, func) {
|
||||
let export = match data.lookup_def(store, func) {
|
||||
Export::Function(f) => f,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
@@ -109,6 +111,7 @@ impl Func {
|
||||
let trampoline = store.lookup_trampoline(unsafe { anyfunc.as_ref() });
|
||||
(ExportFunction { anyfunc }, trampoline)
|
||||
});
|
||||
let component_instance = options.instance;
|
||||
let options = unsafe { Options::new(store.id(), memory, realloc, options.string_encoding) };
|
||||
Func(store.store_data_mut().insert(FuncData {
|
||||
trampoline,
|
||||
@@ -117,6 +120,7 @@ impl Func {
|
||||
ty,
|
||||
types: data.component_types().clone(),
|
||||
instance: *instance,
|
||||
component_instance,
|
||||
post_return,
|
||||
post_return_arg: None,
|
||||
}))
|
||||
|
||||
@@ -27,6 +27,7 @@ pub trait IntoComponentFunc<T, Params, Return> {
|
||||
extern "C" fn entrypoint(
|
||||
cx: *mut VMOpaqueContext,
|
||||
data: *mut u8,
|
||||
flags: *mut VMComponentFlags,
|
||||
memory: *mut VMMemoryDefinition,
|
||||
realloc: *mut VMCallerCheckedAnyfunc,
|
||||
string_encoding: StringEncoding,
|
||||
@@ -105,6 +106,7 @@ where
|
||||
/// the select few places it's intended to be called from.
|
||||
unsafe fn call_host<T, Params, Return, F>(
|
||||
cx: *mut VMOpaqueContext,
|
||||
flags: *mut VMComponentFlags,
|
||||
memory: *mut VMMemoryDefinition,
|
||||
realloc: *mut VMCallerCheckedAnyfunc,
|
||||
string_encoding: StringEncoding,
|
||||
@@ -136,7 +138,6 @@ where
|
||||
|
||||
let cx = VMComponentContext::from_opaque(cx);
|
||||
let instance = (*cx).instance();
|
||||
let flags = (*instance).flags();
|
||||
let mut cx = StoreContextMut::from_raw((*instance).store());
|
||||
|
||||
let options = Options::new(
|
||||
@@ -282,6 +283,7 @@ macro_rules! impl_into_component_func {
|
||||
extern "C" fn entrypoint(
|
||||
cx: *mut VMOpaqueContext,
|
||||
data: *mut u8,
|
||||
flags: *mut VMComponentFlags,
|
||||
memory: *mut VMMemoryDefinition,
|
||||
realloc: *mut VMCallerCheckedAnyfunc,
|
||||
string_encoding: StringEncoding,
|
||||
@@ -292,6 +294,7 @@ macro_rules! impl_into_component_func {
|
||||
unsafe {
|
||||
handle_result(|| call_host::<T, _, _, _>(
|
||||
cx,
|
||||
flags,
|
||||
memory,
|
||||
realloc,
|
||||
string_encoding,
|
||||
@@ -318,6 +321,7 @@ macro_rules! impl_into_component_func {
|
||||
extern "C" fn entrypoint(
|
||||
cx: *mut VMOpaqueContext,
|
||||
data: *mut u8,
|
||||
flags: *mut VMComponentFlags,
|
||||
memory: *mut VMMemoryDefinition,
|
||||
realloc: *mut VMCallerCheckedAnyfunc,
|
||||
string_encoding: StringEncoding,
|
||||
@@ -328,6 +332,7 @@ macro_rules! impl_into_component_func {
|
||||
unsafe {
|
||||
handle_result(|| call_host::<T, _, _, _>(
|
||||
cx,
|
||||
flags,
|
||||
memory,
|
||||
realloc,
|
||||
string_encoding,
|
||||
|
||||
@@ -308,6 +308,7 @@ where
|
||||
export,
|
||||
options,
|
||||
instance,
|
||||
component_instance,
|
||||
..
|
||||
} = store.0[self.func.0];
|
||||
|
||||
@@ -329,7 +330,7 @@ where
|
||||
assert!(mem::align_of_val(map_maybe_uninit!(space.ret)) == val_align);
|
||||
|
||||
let instance = store.0[instance.0].as_ref().unwrap().instance();
|
||||
let flags = instance.flags();
|
||||
let flags = instance.flags(component_instance);
|
||||
|
||||
unsafe {
|
||||
if !(*flags).may_enter() {
|
||||
@@ -448,9 +449,10 @@ where
|
||||
let data = &mut store.0[self.func.0];
|
||||
let instance = data.instance;
|
||||
let post_return = data.post_return;
|
||||
let component_instance = data.component_instance;
|
||||
let post_return_arg = data.post_return_arg.take();
|
||||
let instance = store.0[instance.0].as_ref().unwrap().instance();
|
||||
let flags = instance.flags();
|
||||
let flags = instance.flags(component_instance);
|
||||
|
||||
unsafe {
|
||||
// First assert that the instance is in a "needs post return" state.
|
||||
|
||||
@@ -143,7 +143,7 @@ impl InstanceData {
|
||||
}
|
||||
}
|
||||
|
||||
fn lookup_def(&self, store: &mut StoreOpaque, def: &CoreDef) -> wasmtime_runtime::Export {
|
||||
pub fn lookup_def(&self, store: &mut StoreOpaque, def: &CoreDef) -> wasmtime_runtime::Export {
|
||||
match def {
|
||||
CoreDef::Lowered(idx) => {
|
||||
wasmtime_runtime::Export::Function(wasmtime_runtime::ExportFunction {
|
||||
|
||||
@@ -367,18 +367,20 @@ impl Module {
|
||||
mut translation: ModuleTranslation<'_>,
|
||||
types: &ModuleTypes,
|
||||
) -> Result<(MmapVec, Option<CompiledModuleInfo>)> {
|
||||
// Compile all functions in parallel using rayon. This will also perform
|
||||
// validation of function bodies.
|
||||
let tunables = &engine.config().tunables;
|
||||
let functions = mem::take(&mut translation.function_body_inputs);
|
||||
let functions = functions.into_iter().collect::<Vec<_>>();
|
||||
let funcs = engine
|
||||
.run_maybe_parallel(functions, |(index, func)| {
|
||||
let offset = func.body.range().start;
|
||||
engine
|
||||
.compiler()
|
||||
.compile_function(&translation, index, func, tunables, types)
|
||||
.with_context(|| {
|
||||
let compiler = engine.compiler();
|
||||
let (funcs, trampolines) = engine.join_maybe_parallel(
|
||||
// In one (possibly) parallel task all wasm functions are compiled
|
||||
// in parallel. Note that this is also where the actual validation
|
||||
// of all function bodies happens as well.
|
||||
|| -> Result<_> {
|
||||
let funcs = engine.run_maybe_parallel(functions, |(index, func)| {
|
||||
let offset = func.body.range().start;
|
||||
let result =
|
||||
compiler.compile_function(&translation, index, func, tunables, types);
|
||||
result.with_context(|| {
|
||||
let index = translation.module.func_index(index);
|
||||
let name = match translation.debuginfo.name_section.func_names.get(&index) {
|
||||
Some(name) => format!(" (`{}`)", name),
|
||||
@@ -389,16 +391,28 @@ impl Module {
|
||||
"failed to compile wasm function {index}{name} at offset {offset:#x}"
|
||||
)
|
||||
})
|
||||
})?
|
||||
.into_iter()
|
||||
.collect();
|
||||
})?;
|
||||
|
||||
Ok(funcs.into_iter().collect())
|
||||
},
|
||||
// In another (possibly) parallel task all trampolines necessary
|
||||
// for untyped host-to-wasm entry are compiled. Note that this
|
||||
// isn't really expected to take all that long, it's moreso "well
|
||||
// if we're using rayon why not use it here too".
|
||||
|| -> Result<_> {
|
||||
engine.run_maybe_parallel(translation.exported_signatures.clone(), |sig| {
|
||||
let ty = &types[sig];
|
||||
Ok(compiler.compile_host_to_wasm_trampoline(ty)?)
|
||||
})
|
||||
},
|
||||
);
|
||||
|
||||
// Collect all the function results into a final ELF object.
|
||||
let mut obj = engine.compiler().object()?;
|
||||
let (funcs, trampolines) =
|
||||
engine
|
||||
.compiler()
|
||||
.emit_obj(&translation, types, funcs, tunables, &mut obj)?;
|
||||
.emit_obj(&translation, funcs?, trampolines?, tunables, &mut obj)?;
|
||||
|
||||
// If configured attempt to use static memory initialization which
|
||||
// can either at runtime be implemented as a single memcpy to
|
||||
|
||||
Reference in New Issue
Block a user