Add a pooling allocator mode based on copy-on-write mappings of memfds.

As first suggested by Jan on the Zulip here [1], a cheap and effective
way to obtain copy-on-write semantics of a "backing image" for a Wasm
memory is to mmap a file with `MAP_PRIVATE`. The `memfd` mechanism
provided by the Linux kernel allows us to create anonymous,
in-memory-only files that we can use for this mapping, so we can
construct the image contents on-the-fly then effectively create a CoW
overlay. Furthermore, and importantly, `madvise(MADV_DONTNEED, ...)`
will discard the CoW overlay, returning the mapping to its original
state.

By itself this is almost enough for a very fast
instantiation-termination loop of the same image over and over,
without changing the address space mapping at all (which is
expensive). The only missing bit is how to implement
heap *growth*. But here memfds can help us again: if we create another
anonymous file and map it where the extended parts of the heap would
go, we can take advantage of the fact that a `mmap()` mapping can
be *larger than the file itself*, with accesses beyond the end
generating a `SIGBUS`, and the fact that we can cheaply resize the
file with `ftruncate`, even after a mapping exists. So we can map the
"heap extension" file once with the maximum memory-slot size and grow
the memfd itself as `memory.grow` operations occur.

The above CoW technique and heap-growth technique together allow us a
fastpath of `madvise()` and `ftruncate()` only when we re-instantiate
the same module over and over, as long as we can reuse the same
slot. This fastpath avoids all whole-process address-space locks in
the Linux kernel, which should mean it is highly scalable. It also
avoids the cost of copying data on read, as the `uffd` heap backend
does when servicing pagefaults; the kernel's own optimized CoW
logic (same as used by all file mmaps) is used instead.

[1] https://bytecodealliance.zulipchat.com/#narrow/stream/206238-general/topic/Copy.20on.20write.20based.20instance.20reuse/near/266657772
This commit is contained in:
Chris Fallin
2022-01-18 16:42:24 -08:00
parent 90e7cef56c
commit b73ac83c37
26 changed files with 1070 additions and 135 deletions

View File

@@ -89,3 +89,5 @@ all-arch = ["wasmtime-cranelift/all-arch"]
# It is useful for applications that do not bind their own exception ports and
# need portable signal handling.
posix-signals-on-macos = ["wasmtime-runtime/posix-signals-on-macos"]
memfd-allocator = ["wasmtime-runtime/memfd-allocator", "pooling-allocator"]

View File

@@ -7,7 +7,7 @@ use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
#[cfg(feature = "cache")]
use wasmtime_cache::CacheConfig;
use wasmtime_runtime::{debug_builtins, InstanceAllocator};
use wasmtime_runtime::{debug_builtins, CompiledModuleIdAllocator, InstanceAllocator};
/// An `Engine` which is a global context for compilation and management of wasm
/// modules.
@@ -43,6 +43,7 @@ struct EngineInner {
allocator: Box<dyn InstanceAllocator>,
signatures: SignatureRegistry,
epoch: AtomicU64,
unique_id_allocator: CompiledModuleIdAllocator,
}
impl Engine {
@@ -68,6 +69,7 @@ impl Engine {
allocator,
signatures: registry,
epoch: AtomicU64::new(0),
unique_id_allocator: CompiledModuleIdAllocator::new(),
}),
})
}
@@ -153,6 +155,10 @@ impl Engine {
self.inner.epoch.fetch_add(1, Ordering::Relaxed);
}
pub(crate) fn unique_id_allocator(&self) -> &CompiledModuleIdAllocator {
&self.inner.unique_id_allocator
}
/// Ahead-of-time (AOT) compiles a WebAssembly module.
///
/// The `bytes` provided must be in one of two formats:

View File

@@ -651,7 +651,7 @@ impl<'a> Instantiator<'a> {
artifacts,
modules,
&self.cur.modules,
);
)?;
self.cur.modules.push(submodule);
}
@@ -707,6 +707,7 @@ impl<'a> Instantiator<'a> {
.allocator()
.allocate(InstanceAllocationRequest {
module: compiled_module.module().clone(),
memfds: self.cur.module.memfds().clone(),
image_base: compiled_module.code().as_ptr() as usize,
functions: compiled_module.functions(),
imports: self.cur.build(),

View File

@@ -11,6 +11,7 @@ use std::sync::Arc;
use wasmparser::{Parser, ValidPayload, Validator};
use wasmtime_environ::{ModuleEnvironment, ModuleIndex, PrimaryMap};
use wasmtime_jit::{CompiledModule, CompiledModuleInfo, MmapVec, TypeTables};
use wasmtime_runtime::ModuleMemFds;
mod registry;
mod serialization;
@@ -107,6 +108,8 @@ struct ModuleInner {
types: Arc<TypeTables>,
/// Registered shared signature for the module.
signatures: Arc<SignatureCollection>,
/// a set of memfd images for memories, if any.
memfds: Option<Arc<ModuleMemFds>>,
}
impl Module {
@@ -336,7 +339,12 @@ impl Module {
};
let modules = engine.run_maybe_parallel(artifacts, |(a, b)| {
CompiledModule::from_artifacts(a, b, &*engine.config().profiler)
CompiledModule::from_artifacts(
a,
b,
&*engine.config().profiler,
engine.unique_id_allocator(),
)
})?;
Self::from_parts(engine, modules, main_module, Arc::new(types), &[])
@@ -523,6 +531,8 @@ impl Module {
})
.collect::<Result<Vec<_>>>()?;
let memfds = ModuleMemFds::new(module.module(), module.wasm_data())?;
return Ok(Self {
inner: Arc::new(ModuleInner {
engine: engine.clone(),
@@ -531,6 +541,7 @@ impl Module {
artifact_upvars: modules,
module_upvars,
signatures,
memfds,
}),
});
@@ -543,11 +554,14 @@ impl Module {
module_upvars: &[serialization::SerializedModuleUpvar],
signatures: &Arc<SignatureCollection>,
) -> Result<Module> {
let module = artifacts[module_index].clone();
let memfds = ModuleMemFds::new(module.module(), module.wasm_data())?;
Ok(Module {
inner: Arc::new(ModuleInner {
engine: engine.clone(),
types: types.clone(),
module: artifacts[module_index].clone(),
module,
memfds,
artifact_upvars: artifact_upvars
.iter()
.map(|i| artifacts[*i].clone())
@@ -666,12 +680,15 @@ impl Module {
artifact_upvars: &[usize],
module_upvars: &[wasmtime_environ::ModuleUpvar],
modules: &PrimaryMap<ModuleIndex, Module>,
) -> Module {
Module {
) -> Result<Module> {
let module = self.inner.artifact_upvars[artifact_index].clone();
let memfds = ModuleMemFds::new(module.module(), module.wasm_data())?;
Ok(Module {
inner: Arc::new(ModuleInner {
types: self.inner.types.clone(),
engine: self.inner.engine.clone(),
module: self.inner.artifact_upvars[artifact_index].clone(),
module,
memfds,
artifact_upvars: artifact_upvars
.iter()
.map(|i| self.inner.artifact_upvars[*i].clone())
@@ -687,7 +704,7 @@ impl Module {
.collect(),
signatures: self.inner.signatures.clone(),
}),
}
})
}
pub(crate) fn compiled_module(&self) -> &Arc<CompiledModule> {
@@ -706,6 +723,10 @@ impl Module {
&self.inner.signatures
}
pub(crate) fn memfds(&self) -> &Option<Arc<ModuleMemFds>> {
&self.inner.memfds
}
/// Looks up the module upvar value at the `index` specified.
///
/// Note that this panics if `index` is out of bounds since this should

View File

@@ -274,7 +274,12 @@ impl<'a> SerializedModule<'a> {
pub fn into_module(self, engine: &Engine) -> Result<Module> {
let (main_module, modules, types, upvars) = self.into_parts(engine)?;
let modules = engine.run_maybe_parallel(modules, |(i, m)| {
CompiledModule::from_artifacts(i, m, &*engine.config().profiler)
CompiledModule::from_artifacts(
i,
m,
&*engine.config().profiler,
engine.unique_id_allocator(),
)
})?;
Module::from_parts(engine, modules, main_module, Arc::new(types), &upvars)

View File

@@ -421,11 +421,13 @@ impl<T> Store<T> {
shared_signatures: None.into(),
imports: Default::default(),
module: Arc::new(wasmtime_environ::Module::default()),
memfds: None,
store: StorePtr::empty(),
wasm_data: &[],
})
.expect("failed to allocate default callee")
};
let mut inner = Box::new(StoreInner {
inner: StoreOpaque {
_marker: marker::PhantomPinned,

View File

@@ -41,6 +41,7 @@ fn create_handle(
let handle = OnDemandInstanceAllocator::new(config.mem_creator.clone(), 0).allocate(
InstanceAllocationRequest {
module: Arc::new(module),
memfds: None,
functions,
image_base: 0,
imports,

View File

@@ -161,6 +161,7 @@ pub unsafe fn create_raw_function(
Ok(
OnDemandInstanceAllocator::default().allocate(InstanceAllocationRequest {
module: Arc::new(module),
memfds: None,
functions: &functions,
image_base: (*func).as_ptr() as usize,
imports: Imports::default(),