Enable copy-on-write heap initialization by default (#3825)

* Enable copy-on-write heap initialization by default

This commit enables the `Config::memfd` feature by default now that it's
been fuzzed for a few weeks on oss-fuzz, and will continue to be fuzzed
leading up to the next release of Wasmtime in early March. The
documentation of the `Config` option has been updated as well as adding
a CLI flag to disable the feature.

* Remove ubiquitous "memfd" terminology

Switch instead to forms of "memory image" or "cow" or some combination
thereof.

* Update new option names
This commit is contained in:
Alex Crichton
2022-02-22 17:12:18 -06:00
committed by GitHub
parent 593f8d96aa
commit bbd4a4a500
16 changed files with 294 additions and 275 deletions

View File

@@ -59,7 +59,7 @@ default = [
'parallel-compilation',
'cranelift',
'pooling-allocator',
'memfd',
'memory-init-cow',
'vtune',
]
@@ -101,10 +101,10 @@ all-arch = ["wasmtime-cranelift/all-arch"]
# need portable signal handling.
posix-signals-on-macos = ["wasmtime-runtime/posix-signals-on-macos"]
# Enables, on Linux, the usage of memfd mappings to enable instantiation to use
# copy-on-write to initialize linear memory for wasm modules which have
# compatible linear memories.
# Enables, on supported platforms, the usage of copy-on-write initialization of
# compatible linear memories. For more information see the documentation of
# `Config::memory_init_cow`.
#
# Enabling this feature has no effect on non-Linux platforms or when the `uffd`
# feature is enabled.
memfd = ["wasmtime-runtime/memfd"]
# Enabling this feature has no effect on unsupported platforms or when the
# `uffd` feature is enabled.
memory-init-cow = ["wasmtime-runtime/memory-init-cow"]

View File

@@ -104,8 +104,8 @@ pub struct Config {
pub(crate) module_version: ModuleVersionStrategy,
pub(crate) parallel_compilation: bool,
pub(crate) paged_memory_initialization: bool,
pub(crate) memfd: bool,
pub(crate) memfd_guaranteed_dense_image_size: u64,
pub(crate) memory_init_cow: bool,
pub(crate) memory_guaranteed_dense_image_size: u64,
}
impl Config {
@@ -131,8 +131,8 @@ impl Config {
parallel_compilation: true,
// Default to paged memory initialization when using uffd on linux
paged_memory_initialization: cfg!(all(target_os = "linux", feature = "uffd")),
memfd: false,
memfd_guaranteed_dense_image_size: 16 << 20,
memory_init_cow: true,
memory_guaranteed_dense_image_size: 16 << 20,
};
#[cfg(compiler)]
{
@@ -1178,71 +1178,91 @@ impl Config {
self
}
/// Configures whether `memfd`, if supported, will be used to initialize
/// applicable module memories.
/// Configures whether copy-on-write memory-mapped data is used to
/// initialize a linear memory.
///
/// This is a Linux-specific feature since `memfd` is only supported on
/// Linux. Support for this is also enabled by default at compile time but
/// is otherwise disabled at runtime by default. This feature needs to be
/// enabled to `true` for support to be used.
/// Initializing linear memory via a copy-on-write mapping can drastically
/// improve instantiation costs of a WebAssembly module because copying
/// memory is deferred. Additionally if a page of memory is only ever read
/// from WebAssembly and never written too then the same underlying page of
/// data will be reused between all instantiations of a module meaning that
/// if a module is instantiated many times this can lower the overall memory
/// required needed to run that module.
///
/// Also note that even if this feature is enabled it may not be applicable
/// to all memories in all wasm modules. At this time memories must meet
/// specific criteria to be memfd-initialized:
/// This feature is only applicable when a WebAssembly module meets specific
/// criteria to be initialized in this fashion, such as:
///
/// * Only memories defined in the module can be initialized this way.
/// * Data segments for memory must use statically known offsets.
/// * Data segments for memory must all be in-bounds.
///
/// If all of the above applies, this setting is enabled, and the current
/// platform is Linux the `memfd` will be used to efficiently initialize
/// linear memories with `mmap` to avoid copying data from initializers into
/// linear memory.
#[cfg(feature = "memfd")]
#[cfg_attr(nightlydoc, doc(cfg(feature = "memfd")))]
pub fn memfd(&mut self, memfd: bool) -> &mut Self {
self.memfd = memfd;
/// Modules which do not meet these criteria will fall back to
/// initialization of linear memory based on copying memory.
///
/// This feature of Wasmtime is also platform-specific:
///
/// * Linux - this feature is supported for all instances of [`Module`].
/// Modules backed by an existing mmap (such as those created by
/// [`Module::deserialize_file`]) will reuse that mmap to cow-initialize
/// memory. Other instance of [`Module`] may use the `memfd_create`
/// syscall to create an initialization image to `mmap`.
/// * Unix (not Linux) - this feature is only supported when loading modules
/// from a precompiled file via [`Module::deserialize_file`] where there
/// is a file descriptor to use to map data into the process. Note that
/// the module must have been compiled with this setting enabled as well.
/// * Windows - there is no support for this feature at this time. Memory
/// initialization will always copy bytes.
///
/// By default this option is enabled.
///
/// [`Module::deserialize_file`]: crate::Module::deserialize_file
/// [`Module`]: crate::Module
#[cfg(feature = "memory-init-cow")]
#[cfg_attr(nightlydoc, doc(cfg(feature = "memory-init-cow")))]
pub fn memory_init_cow(&mut self, enable: bool) -> &mut Self {
self.memory_init_cow = enable;
self
}
/// Configures the "guaranteed dense image size" for memfd.
/// Configures the "guaranteed dense image size" for copy-on-write
/// initialized memories.
///
/// When using the memfd feature to initialize memory efficiently,
/// compiled modules contain an image of the module's initial
/// heap. If the module has a fairly sparse initial heap, with
/// just a few data segments at very different offsets, this could
/// result in a large region of zero bytes in the image. In other
/// words, it's not very memory-efficient.
/// When using the [`Config::memory_init_cow`] feature to initialize memory
/// efficiently (which is enabled by default), compiled modules contain an
/// image of the module's initial heap. If the module has a fairly sparse
/// initial heap, with just a few data segments at very different offsets,
/// this could result in a large region of zero bytes in the image. In
/// other words, it's not very memory-efficient.
///
/// We normally use a heuristic to avoid this: if less than half
/// of the initialized range (first non-zero to last non-zero
/// byte) of any memory in the module has pages with nonzero
/// bytes, then we avoid memfd for the entire module.
/// bytes, then we avoid creating a memory image for the entire module.
///
/// However, if the embedder always needs the instantiation-time
/// efficiency of memfd, and is otherwise carefully controlling
/// parameters of the modules (for example, by limiting the
/// maximum heap size of the modules), then it may be desirable to
/// ensure memfd is used even if this could go against the
/// heuristic above. Thus, we add another condition: there is a
/// size of initialized data region up to which we *always* allow
/// memfd. The embedder can set this to a known maximum heap size
/// if they desire to always get the benefits of memfd.
/// However, if the embedder always needs the instantiation-time efficiency
/// of copy-on-write initialization, and is otherwise carefully controlling
/// parameters of the modules (for example, by limiting the maximum heap
/// size of the modules), then it may be desirable to ensure a memory image
/// is created even if this could go against the heuristic above. Thus, we
/// add another condition: there is a size of initialized data region up to
/// which we *always* allow a memory image. The embedder can set this to a
/// known maximum heap size if they desire to always get the benefits of
/// copy-on-write images.
///
/// In the future we may implement a "best of both worlds"
/// solution where we have a dense image up to some limit, and
/// then support a sparse list of initializers beyond that; this
/// would get most of the benefit of memfd and pay the incremental
/// would get most of the benefit of copy-on-write and pay the incremental
/// cost of eager initialization only for those bits of memory
/// that are out-of-bounds. However, for now, an embedder desiring
/// fast instantiation should ensure that this setting is as large
/// as the maximum module initial memory content size.
///
/// By default this value is 16 MiB.
#[cfg(feature = "memfd")]
#[cfg_attr(nightlydoc, doc(cfg(feature = "memfd")))]
pub fn memfd_guaranteed_dense_image_size(&mut self, size_in_bytes: u64) -> &mut Self {
self.memfd_guaranteed_dense_image_size = size_in_bytes;
#[cfg(feature = "memory-init-cow")]
#[cfg_attr(nightlydoc, doc(cfg(feature = "memory-init-cow")))]
pub fn memory_guaranteed_dense_image_size(&mut self, size_in_bytes: u64) -> &mut Self {
self.memory_guaranteed_dense_image_size = size_in_bytes;
self
}
@@ -1315,8 +1335,8 @@ impl Clone for Config {
module_version: self.module_version.clone(),
parallel_compilation: self.parallel_compilation,
paged_memory_initialization: self.paged_memory_initialization,
memfd: self.memfd,
memfd_guaranteed_dense_image_size: self.memfd_guaranteed_dense_image_size,
memory_init_cow: self.memory_init_cow,
memory_guaranteed_dense_image_size: self.memory_guaranteed_dense_image_size,
}
}
}

View File

@@ -281,14 +281,14 @@
//! efficient reuse of resources for high-concurrency and
//! high-instantiation-count scenarios.
//!
//! * `memfd` - Enabled by default, this feature builds in support for a
//! Linux-specific feature of creating a `memfd` where applicable for a
//! [`Module`]'s initial memory. This makes instantiation much faster by
//! * `memory-init-cow` - Enabled by default, this feature builds in support
//! for, on supported platforms, initializing wasm linear memories with
//! copy-on-write heap mappings. This makes instantiation much faster by
//! `mmap`-ing the initial memory image into place instead of copying memory
//! into place, allowing sharing pages that end up only getting read and
//! otherwise using copy-on-write for efficient initialization of memory. Note
//! into place, allowing sharing pages that end up only getting read. Note
//! that this is simply compile-time support and this must also be enabled at
//! run-time via [`Config::memfd`].
//! run-time via [`Config::memory_init_cow`] (which is also enabled by
//! default).
//!
//! ## Examples
//!

View File

@@ -16,7 +16,7 @@ use wasmtime_environ::{
};
use wasmtime_jit::{CompiledModule, CompiledModuleInfo, TypeTables};
use wasmtime_runtime::{
CompiledModuleId, MemoryMemFd, MmapVec, ModuleMemFds, VMSharedSignatureIndex,
CompiledModuleId, MemoryImage, MmapVec, ModuleMemoryImages, VMSharedSignatureIndex,
};
mod registry;
@@ -114,11 +114,10 @@ struct ModuleInner {
types: Arc<TypeTables>,
/// Registered shared signature for the module.
signatures: Arc<SignatureCollection>,
/// A set of memfd images for memories, if any. Note that module
/// instantiation (hence the need for lazy init) may happen for
/// the same module concurrently in multiple Stores, so we use a
/// OnceCell.
memfds: OnceCell<Option<ModuleMemFds>>,
/// A set of initialization images for memories, if any. Note that module
/// instantiation (hence the need for lazy init) may happen for the same
/// module concurrently in multiple Stores, so we use a OnceCell.
memory_images: OnceCell<Option<ModuleMemoryImages>>,
}
impl Module {
@@ -430,9 +429,9 @@ impl Module {
// can either at runtime be implemented as a single memcpy to
// initialize memory or otherwise enabling virtual-memory-tricks
// such as mmap'ing from a file to get copy-on-write.
if engine.config().memfd {
if engine.config().memory_init_cow {
let align = engine.compiler().page_size_align();
let max_always_allowed = engine.config().memfd_guaranteed_dense_image_size;
let max_always_allowed = engine.config().memory_guaranteed_dense_image_size;
translation.try_static_init(align, max_always_allowed);
}
@@ -575,7 +574,7 @@ impl Module {
artifact_upvars: modules,
module_upvars,
signatures,
memfds: OnceCell::new(),
memory_images: OnceCell::new(),
}),
});
@@ -594,7 +593,7 @@ impl Module {
engine: engine.clone(),
types: types.clone(),
module,
memfds: OnceCell::new(),
memory_images: OnceCell::new(),
artifact_upvars: artifact_upvars
.iter()
.map(|i| artifacts[*i].clone())
@@ -720,7 +719,7 @@ impl Module {
types: self.inner.types.clone(),
engine: self.inner.engine.clone(),
module,
memfds: OnceCell::new(),
memory_images: OnceCell::new(),
artifact_upvars: artifact_upvars
.iter()
.map(|i| self.inner.artifact_upvars[*i].clone())
@@ -1032,21 +1031,21 @@ impl wasmtime_runtime::ModuleRuntimeInfo for ModuleInner {
self.module.func_info(index)
}
fn memfd_image(&self, memory: DefinedMemoryIndex) -> Result<Option<&Arc<MemoryMemFd>>> {
if !self.engine.config().memfd {
fn memory_image(&self, memory: DefinedMemoryIndex) -> Result<Option<&Arc<MemoryImage>>> {
if !self.engine.config().memory_init_cow {
return Ok(None);
}
let memfds = self.memfds.get_or_try_init(|| {
ModuleMemFds::new(
let images = self.memory_images.get_or_try_init(|| {
ModuleMemoryImages::new(
self.module.module(),
self.module.wasm_data(),
Some(self.module.mmap()),
)
})?;
Ok(memfds
Ok(images
.as_ref()
.and_then(|memfds| memfds.get_memory_image(memory)))
.and_then(|images| images.get_memory_image(memory)))
}
fn unique_id(&self) -> Option<CompiledModuleId> {
@@ -1138,7 +1137,7 @@ impl wasmtime_runtime::ModuleRuntimeInfo for BareModuleInfo {
&self.function_info[index]
}
fn memfd_image(&self, _memory: DefinedMemoryIndex) -> Result<Option<&Arc<MemoryMemFd>>> {
fn memory_image(&self, _memory: DefinedMemoryIndex) -> Result<Option<&Arc<MemoryImage>>> {
Ok(None)
}

View File

@@ -7,7 +7,7 @@ use std::convert::TryFrom;
use std::sync::Arc;
use wasmtime_environ::{EntityIndex, MemoryPlan, MemoryStyle, Module, WASM_PAGE_SIZE};
use wasmtime_runtime::{
MemoryMemFd, RuntimeLinearMemory, RuntimeMemoryCreator, VMMemoryDefinition,
MemoryImage, RuntimeLinearMemory, RuntimeMemoryCreator, VMMemoryDefinition,
};
pub fn create_memory(store: &mut StoreOpaque, memory: &MemoryType) -> Result<InstanceId> {
@@ -63,7 +63,7 @@ impl RuntimeMemoryCreator for MemoryCreatorProxy {
plan: &MemoryPlan,
minimum: usize,
maximum: Option<usize>,
_: Option<&Arc<MemoryMemFd>>,
_: Option<&Arc<MemoryImage>>,
) -> Result<Box<dyn RuntimeLinearMemory>> {
let ty = MemoryType::from_wasmtime_memory(&plan.memory);
let reserved_size_in_bytes = match plan.style {