Expose details for mlocking modules externally (#3944)

This commit exposes some various details and config options for having
finer-grain control over mlock-ing the memory of modules. This amounts
to three different changes being present in this commit:

* A new `Module::image_range` API is added to expose the range in host
  memory of where the compiled image resides. This enables embedders to
  make mlock-ing decisions independently of Wasmtime. Otherwise though
  there's not too much useful that can be done with this range
  information at this time.

* A new `Config::force_memory_init_memfd` option has been added. This
  option is used to force the usage of `memfd_create` on Linux even when
  the original module comes from a file on disk. With mlock-ing the main
  purpose for Wasmtime is likely to be avoiding major page faults that
  go back to disk, so this is another major source of avoiding page
  faults by ensuring that the initialization contents of memory are
  always in RAM.

* The `memory_images` field of a `Module` has gone back to being lazily
  created on the first instantiation, effectively reverting #3914. This
  enables embedders to defer the creation of the image to as late as
  possible to allow modules to be created from precompiled images
  without actually loading all the contents of the data segments from
  disk immediately.

These changes are all somewhat low-level controls which aren't intended
to be generally used by embedders. If fine-grained control is desired
though it's hoped that these knobs provide what's necessary to be
achieved.
This commit is contained in:
Alex Crichton
2022-03-18 13:51:55 -05:00
committed by GitHub
parent e92cbfb283
commit 41594dc5d9
3 changed files with 84 additions and 12 deletions

View File

@@ -682,6 +682,14 @@ impl CompiledModule {
pub fn has_address_map(&self) -> bool { pub fn has_address_map(&self) -> bool {
!self.address_map_data().is_empty() !self.address_map_data().is_empty()
} }
/// Returns the bounds, in host memory, of where this module's compiled
/// image resides.
pub fn image_range(&self) -> Range<usize> {
let base = self.mmap().as_ptr() as usize;
let len = self.mmap().len();
base..base + len
}
} }
type Addr2LineContext<'a> = addr2line::Context<gimli::EndianSlice<'a, gimli::LittleEndian>>; type Addr2LineContext<'a> = addr2line::Context<gimli::EndianSlice<'a, gimli::LittleEndian>>;

View File

@@ -100,6 +100,7 @@ pub struct Config {
pub(crate) paged_memory_initialization: bool, pub(crate) paged_memory_initialization: bool,
pub(crate) memory_init_cow: bool, pub(crate) memory_init_cow: bool,
pub(crate) memory_guaranteed_dense_image_size: u64, pub(crate) memory_guaranteed_dense_image_size: u64,
pub(crate) force_memory_init_memfd: bool,
} }
impl Config { impl Config {
@@ -135,6 +136,7 @@ impl Config {
paged_memory_initialization: cfg!(all(target_os = "linux", feature = "uffd")), paged_memory_initialization: cfg!(all(target_os = "linux", feature = "uffd")),
memory_init_cow: true, memory_init_cow: true,
memory_guaranteed_dense_image_size: 16 << 20, memory_guaranteed_dense_image_size: 16 << 20,
force_memory_init_memfd: false,
}; };
#[cfg(compiler)] #[cfg(compiler)]
{ {
@@ -1208,6 +1210,33 @@ impl Config {
self self
} }
/// A configuration option to force the usage of `memfd_create` on Linux to
/// be used as the backing source for a module's initial memory image.
///
/// When [`Config::memory_init_cow`] is enabled, which is enabled by
/// default, module memory initialization images are taken from a module's
/// original mmap if possible. If a precompiled module was loaded from disk
/// this means that the disk's file is used as an mmap source for the
/// initial linear memory contents. This option can be used to force, on
/// Linux, that instead of using the original file on disk a new in-memory
/// file is created with `memfd_create` to hold the contents of the initial
/// image.
///
/// This option can be used to avoid possibly loading the contents of memory
/// from disk through a page fault. Instead with `memfd_create` the contents
/// of memory are always in RAM, meaning that even page faults which
/// initially populate a wasm linear memory will only work with RAM instead
/// of ever hitting the disk that the original precompiled module is stored
/// on.
///
/// This option is disabled by default.
#[cfg(feature = "memory-init-cow")]
#[cfg_attr(nightlydoc, doc(cfg(feature = "memory-init-cow")))]
pub fn force_memory_init_memfd(&mut self, enable: bool) -> &mut Self {
self.force_memory_init_memfd = enable;
self
}
/// Configures the "guaranteed dense image size" for copy-on-write /// Configures the "guaranteed dense image size" for copy-on-write
/// initialized memories. /// initialized memories.
/// ///
@@ -1330,6 +1359,7 @@ impl Clone for Config {
paged_memory_initialization: self.paged_memory_initialization, paged_memory_initialization: self.paged_memory_initialization,
memory_init_cow: self.memory_init_cow, memory_init_cow: self.memory_init_cow,
memory_guaranteed_dense_image_size: self.memory_guaranteed_dense_image_size, memory_guaranteed_dense_image_size: self.memory_guaranteed_dense_image_size,
force_memory_init_memfd: self.force_memory_init_memfd,
} }
} }
} }

View File

@@ -4,8 +4,10 @@ use crate::{
}; };
use crate::{Engine, ModuleType}; use crate::{Engine, ModuleType};
use anyhow::{bail, Context, Result}; use anyhow::{bail, Context, Result};
use once_cell::sync::OnceCell;
use std::fs; use std::fs;
use std::mem; use std::mem;
use std::ops::Range;
use std::path::Path; use std::path::Path;
use std::sync::Arc; use std::sync::Arc;
use wasmparser::{Parser, ValidPayload, Validator}; use wasmparser::{Parser, ValidPayload, Validator};
@@ -114,7 +116,13 @@ struct ModuleInner {
/// Registered shared signature for the module. /// Registered shared signature for the module.
signatures: Arc<SignatureCollection>, signatures: Arc<SignatureCollection>,
/// A set of initialization images for memories, if any. /// A set of initialization images for memories, if any.
memory_images: Option<ModuleMemoryImages>, ///
/// Note that this is behind a `OnceCell` to lazily create this image. On
/// Linux where `memfd_create` may be used to create the backing memory
/// image this is a pretty expensive operation, so by deferring it this
/// improves memory usage for modules that are created but may not ever be
/// instantiated.
memory_images: OnceCell<Option<ModuleMemoryImages>>,
} }
impl Module { impl Module {
@@ -554,7 +562,7 @@ impl Module {
&signatures, &signatures,
) )
}) })
.collect::<Result<Vec<_>>>()?; .collect();
return Ok(Self { return Ok(Self {
inner: Arc::new(ModuleInner { inner: Arc::new(ModuleInner {
@@ -563,7 +571,7 @@ impl Module {
artifact_upvars: modules, artifact_upvars: modules,
module_upvars, module_upvars,
signatures, signatures,
memory_images: memory_images(engine, &module)?, memory_images: OnceCell::new(),
module, module,
}), }),
}); });
@@ -576,13 +584,13 @@ impl Module {
artifact_upvars: &[usize], artifact_upvars: &[usize],
module_upvars: &[serialization::SerializedModuleUpvar], module_upvars: &[serialization::SerializedModuleUpvar],
signatures: &Arc<SignatureCollection>, signatures: &Arc<SignatureCollection>,
) -> Result<Module> { ) -> Module {
let module = artifacts[module_index].clone(); let module = artifacts[module_index].clone();
Ok(Module { Module {
inner: Arc::new(ModuleInner { inner: Arc::new(ModuleInner {
engine: engine.clone(), engine: engine.clone(),
types: types.clone(), types: types.clone(),
memory_images: memory_images(engine, &module)?, memory_images: OnceCell::new(),
module, module,
artifact_upvars: artifact_upvars artifact_upvars: artifact_upvars
.iter() .iter()
@@ -601,10 +609,10 @@ impl Module {
signatures, signatures,
) )
}) })
.collect::<Result<Vec<_>>>()?, .collect(),
signatures: signatures.clone(), signatures: signatures.clone(),
}), }),
}) }
} }
} }
@@ -708,7 +716,7 @@ impl Module {
inner: Arc::new(ModuleInner { inner: Arc::new(ModuleInner {
types: self.inner.types.clone(), types: self.inner.types.clone(),
engine: self.inner.engine.clone(), engine: self.inner.engine.clone(),
memory_images: memory_images(&self.inner.engine, &module)?, memory_images: OnceCell::new(),
module, module,
artifact_upvars: artifact_upvars artifact_upvars: artifact_upvars
.iter() .iter()
@@ -969,6 +977,24 @@ impl Module {
// statically cast the &Arc<ModuleInner> to &Arc<dyn Trait...>. // statically cast the &Arc<ModuleInner> to &Arc<dyn Trait...>.
self.inner.clone() self.inner.clone()
} }
/// Returns the range of bytes in memory where this module's compilation
/// image resides.
///
/// The compilation image for a module contains executable code, data, debug
/// information, etc. This is roughly the same as the `Module::serialize`
/// but not the exact same.
///
/// The range of memory reported here is exposed to allow low-level
/// manipulation of the memory in platform-specific manners such as using
/// `mlock` to force the contents to be paged in immediately or keep them
/// paged in after they're loaded.
///
/// It is not safe to modify the memory in this range, nor is it safe to
/// modify the protections of memory in this range.
pub fn image_range(&self) -> Range<usize> {
self.compiled_module().image_range()
}
} }
fn _assert_send_sync() { fn _assert_send_sync() {
@@ -1022,8 +1048,10 @@ impl wasmtime_runtime::ModuleRuntimeInfo for ModuleInner {
} }
fn memory_image(&self, memory: DefinedMemoryIndex) -> Result<Option<&Arc<MemoryImage>>> { fn memory_image(&self, memory: DefinedMemoryIndex) -> Result<Option<&Arc<MemoryImage>>> {
Ok(self let images = self
.memory_images .memory_images
.get_or_try_init(|| memory_images(&self.engine, &self.module))?;
Ok(images
.as_ref() .as_ref()
.and_then(|images| images.get_memory_image(memory))) .and_then(|images| images.get_memory_image(memory)))
} }
@@ -1146,6 +1174,12 @@ fn memory_images(engine: &Engine, module: &CompiledModule) -> Result<Option<Modu
return Ok(None); return Ok(None);
} }
// ... otherwise logic is delegated to the `ModuleMemoryImages::new` constructor // ... otherwise logic is delegated to the `ModuleMemoryImages::new`
ModuleMemoryImages::new(module.module(), module.wasm_data(), Some(module.mmap())) // constructor.
let mmap = if engine.config().force_memory_init_memfd {
None
} else {
Some(module.mmap())
};
ModuleMemoryImages::new(module.module(), module.wasm_data(), mmap)
} }