From 41594dc5d9fcd67a8da67a05559f5e45b5d310f3 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 18 Mar 2022 13:51:55 -0500 Subject: [PATCH] Expose details for mlocking modules externally (#3944) This commit exposes some various details and config options for having finer-grain control over mlock-ing the memory of modules. This amounts to three different changes being present in this commit: * A new `Module::image_range` API is added to expose the range in host memory of where the compiled image resides. This enables embedders to make mlock-ing decisions independently of Wasmtime. Otherwise though there's not too much useful that can be done with this range information at this time. * A new `Config::force_memory_init_memfd` option has been added. This option is used to force the usage of `memfd_create` on Linux even when the original module comes from a file on disk. With mlock-ing the main purpose for Wasmtime is likely to be avoiding major page faults that go back to disk, so this is another major source of avoiding page faults by ensuring that the initialization contents of memory are always in RAM. * The `memory_images` field of a `Module` has gone back to being lazily created on the first instantiation, effectively reverting #3914. This enables embedders to defer the creation of the image to as late as possible to allow modules to be created from precompiled images without actually loading all the contents of the data segments from disk immediately. These changes are all somewhat low-level controls which aren't intended to be generally used by embedders. If fine-grained control is desired though it's hoped that these knobs provide what's necessary to be achieved. --- crates/jit/src/instantiate.rs | 8 +++++ crates/wasmtime/src/config.rs | 30 ++++++++++++++++++ crates/wasmtime/src/module.rs | 58 +++++++++++++++++++++++++++-------- 3 files changed, 84 insertions(+), 12 deletions(-) diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index 36d13033d5..76624260d3 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -682,6 +682,14 @@ impl CompiledModule { pub fn has_address_map(&self) -> bool { !self.address_map_data().is_empty() } + + /// Returns the bounds, in host memory, of where this module's compiled + /// image resides. + pub fn image_range(&self) -> Range { + let base = self.mmap().as_ptr() as usize; + let len = self.mmap().len(); + base..base + len + } } type Addr2LineContext<'a> = addr2line::Context>; diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index ed01cca812..646a795497 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -100,6 +100,7 @@ pub struct Config { pub(crate) paged_memory_initialization: bool, pub(crate) memory_init_cow: bool, pub(crate) memory_guaranteed_dense_image_size: u64, + pub(crate) force_memory_init_memfd: bool, } impl Config { @@ -135,6 +136,7 @@ impl Config { paged_memory_initialization: cfg!(all(target_os = "linux", feature = "uffd")), memory_init_cow: true, memory_guaranteed_dense_image_size: 16 << 20, + force_memory_init_memfd: false, }; #[cfg(compiler)] { @@ -1208,6 +1210,33 @@ impl Config { self } + /// A configuration option to force the usage of `memfd_create` on Linux to + /// be used as the backing source for a module's initial memory image. + /// + /// When [`Config::memory_init_cow`] is enabled, which is enabled by + /// default, module memory initialization images are taken from a module's + /// original mmap if possible. If a precompiled module was loaded from disk + /// this means that the disk's file is used as an mmap source for the + /// initial linear memory contents. This option can be used to force, on + /// Linux, that instead of using the original file on disk a new in-memory + /// file is created with `memfd_create` to hold the contents of the initial + /// image. + /// + /// This option can be used to avoid possibly loading the contents of memory + /// from disk through a page fault. Instead with `memfd_create` the contents + /// of memory are always in RAM, meaning that even page faults which + /// initially populate a wasm linear memory will only work with RAM instead + /// of ever hitting the disk that the original precompiled module is stored + /// on. + /// + /// This option is disabled by default. + #[cfg(feature = "memory-init-cow")] + #[cfg_attr(nightlydoc, doc(cfg(feature = "memory-init-cow")))] + pub fn force_memory_init_memfd(&mut self, enable: bool) -> &mut Self { + self.force_memory_init_memfd = enable; + self + } + /// Configures the "guaranteed dense image size" for copy-on-write /// initialized memories. /// @@ -1330,6 +1359,7 @@ impl Clone for Config { paged_memory_initialization: self.paged_memory_initialization, memory_init_cow: self.memory_init_cow, memory_guaranteed_dense_image_size: self.memory_guaranteed_dense_image_size, + force_memory_init_memfd: self.force_memory_init_memfd, } } } diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index f47fc3324b..d5da545b25 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -4,8 +4,10 @@ use crate::{ }; use crate::{Engine, ModuleType}; use anyhow::{bail, Context, Result}; +use once_cell::sync::OnceCell; use std::fs; use std::mem; +use std::ops::Range; use std::path::Path; use std::sync::Arc; use wasmparser::{Parser, ValidPayload, Validator}; @@ -114,7 +116,13 @@ struct ModuleInner { /// Registered shared signature for the module. signatures: Arc, /// A set of initialization images for memories, if any. - memory_images: Option, + /// + /// Note that this is behind a `OnceCell` to lazily create this image. On + /// Linux where `memfd_create` may be used to create the backing memory + /// image this is a pretty expensive operation, so by deferring it this + /// improves memory usage for modules that are created but may not ever be + /// instantiated. + memory_images: OnceCell>, } impl Module { @@ -554,7 +562,7 @@ impl Module { &signatures, ) }) - .collect::>>()?; + .collect(); return Ok(Self { inner: Arc::new(ModuleInner { @@ -563,7 +571,7 @@ impl Module { artifact_upvars: modules, module_upvars, signatures, - memory_images: memory_images(engine, &module)?, + memory_images: OnceCell::new(), module, }), }); @@ -576,13 +584,13 @@ impl Module { artifact_upvars: &[usize], module_upvars: &[serialization::SerializedModuleUpvar], signatures: &Arc, - ) -> Result { + ) -> Module { let module = artifacts[module_index].clone(); - Ok(Module { + Module { inner: Arc::new(ModuleInner { engine: engine.clone(), types: types.clone(), - memory_images: memory_images(engine, &module)?, + memory_images: OnceCell::new(), module, artifact_upvars: artifact_upvars .iter() @@ -601,10 +609,10 @@ impl Module { signatures, ) }) - .collect::>>()?, + .collect(), signatures: signatures.clone(), }), - }) + } } } @@ -708,7 +716,7 @@ impl Module { inner: Arc::new(ModuleInner { types: self.inner.types.clone(), engine: self.inner.engine.clone(), - memory_images: memory_images(&self.inner.engine, &module)?, + memory_images: OnceCell::new(), module, artifact_upvars: artifact_upvars .iter() @@ -969,6 +977,24 @@ impl Module { // statically cast the &Arc to &Arc. self.inner.clone() } + + /// Returns the range of bytes in memory where this module's compilation + /// image resides. + /// + /// The compilation image for a module contains executable code, data, debug + /// information, etc. This is roughly the same as the `Module::serialize` + /// but not the exact same. + /// + /// The range of memory reported here is exposed to allow low-level + /// manipulation of the memory in platform-specific manners such as using + /// `mlock` to force the contents to be paged in immediately or keep them + /// paged in after they're loaded. + /// + /// It is not safe to modify the memory in this range, nor is it safe to + /// modify the protections of memory in this range. + pub fn image_range(&self) -> Range { + self.compiled_module().image_range() + } } fn _assert_send_sync() { @@ -1022,8 +1048,10 @@ impl wasmtime_runtime::ModuleRuntimeInfo for ModuleInner { } fn memory_image(&self, memory: DefinedMemoryIndex) -> Result>> { - Ok(self + let images = self .memory_images + .get_or_try_init(|| memory_images(&self.engine, &self.module))?; + Ok(images .as_ref() .and_then(|images| images.get_memory_image(memory))) } @@ -1146,6 +1174,12 @@ fn memory_images(engine: &Engine, module: &CompiledModule) -> Result