From b73ac83c375f953e9433021343f3c85f15beff58 Mon Sep 17 00:00:00 2001
From: Chris Fallin <chris@cfallin.org>
Date: Tue, 18 Jan 2022 16:42:24 -0800
Subject: [PATCH] Add a pooling allocator mode based on copy-on-write mappings
 of memfds.

As first suggested by Jan on the Zulip here [1], a cheap and effective
way to obtain copy-on-write semantics of a "backing image" for a Wasm
memory is to mmap a file with `MAP_PRIVATE`. The `memfd` mechanism
provided by the Linux kernel allows us to create anonymous,
in-memory-only files that we can use for this mapping, so we can
construct the image contents on-the-fly then effectively create a CoW
overlay. Furthermore, and importantly, `madvise(MADV_DONTNEED, ...)`
will discard the CoW overlay, returning the mapping to its original
state.

By itself this is almost enough for a very fast
instantiation-termination loop of the same image over and over,
without changing the address space mapping at all (which is
expensive). The only missing bit is how to implement
heap *growth*. But here memfds can help us again: if we create another
anonymous file and map it where the extended parts of the heap would
go, we can take advantage of the fact that a `mmap()` mapping can
be *larger than the file itself*, with accesses beyond the end
generating a `SIGBUS`, and the fact that we can cheaply resize the
file with `ftruncate`, even after a mapping exists. So we can map the
"heap extension" file once with the maximum memory-slot size and grow
the memfd itself as `memory.grow` operations occur.

The above CoW technique and heap-growth technique together allow us a
fastpath of `madvise()` and `ftruncate()` only when we re-instantiate
the same module over and over, as long as we can reuse the same
slot. This fastpath avoids all whole-process address-space locks in
the Linux kernel, which should mean it is highly scalable. It also
avoids the cost of copying data on read, as the `uffd` heap backend
does when servicing pagefaults; the kernel's own optimized CoW
logic (same as used by all file mmaps) is used instead.

[1] https://bytecodealliance.zulipchat.com/#narrow/stream/206238-general/topic/Copy.20on.20write.20based.20instance.20reuse/near/266657772
---
 .github/workflows/main.yml                    |   5 +-
 Cargo.lock                                    |  10 +
 Cargo.toml                                    |   2 +
 crates/environ/src/module.rs                  |  23 ++
 crates/jit/src/instantiate.rs                 |  15 +-
 crates/runtime/Cargo.toml                     |   3 +
 crates/runtime/src/instance.rs                |  23 ++
 crates/runtime/src/instance/allocator.rs      |  63 ++--
 .../runtime/src/instance/allocator/memfd.rs   | 290 ++++++++++++++++++
 .../src/instance/allocator/memfd_disabled.rs  |  49 +++
 .../runtime/src/instance/allocator/pooling.rs | 260 ++++++++++------
 .../src/instance/allocator/pooling/uffd.rs    |   1 +
 crates/runtime/src/lib.rs                     |  44 +++
 crates/runtime/src/memfd.rs                   | 236 ++++++++++++++
 crates/runtime/src/memory.rs                  |  57 +++-
 crates/runtime/src/module_id.rs               |  28 ++
 crates/runtime/src/traphandlers/unix.rs       |  14 +-
 crates/wasmtime/Cargo.toml                    |   2 +
 crates/wasmtime/src/engine.rs                 |   8 +-
 crates/wasmtime/src/instance.rs               |   3 +-
 crates/wasmtime/src/module.rs                 |  33 +-
 crates/wasmtime/src/module/serialization.rs   |   7 +-
 crates/wasmtime/src/store.rs                  |   2 +
 crates/wasmtime/src/trampoline.rs             |   1 +
 crates/wasmtime/src/trampoline/func.rs        |   1 +
 src/lib.rs                                    |  25 ++
 26 files changed, 1070 insertions(+), 135 deletions(-)
 create mode 100644 crates/runtime/src/instance/allocator/memfd.rs
 create mode 100644 crates/runtime/src/instance/allocator/memfd_disabled.rs
 create mode 100644 crates/runtime/src/memfd.rs
 create mode 100644 crates/runtime/src/module_id.rs

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index e89e33e165..5e5e0c64d9 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -136,6 +136,7 @@ jobs:
     - run: cargo check -p wasmtime --no-default-features --features async
     - run: cargo check -p wasmtime --no-default-features --features uffd
     - run: cargo check -p wasmtime --no-default-features --features pooling-allocator
+    - run: cargo check -p wasmtime --no-default-features --features memfd-allocator
     - run: cargo check -p wasmtime --no-default-features --features cranelift
     - run: cargo check -p wasmtime --no-default-features --features cranelift,wat,async,cache
 
@@ -310,11 +311,13 @@ jobs:
       env:
         RUST_BACKTRACE: 1
 
-    # Test uffd functionality on Linux
+    # Test Linux-specific functionality
     - run: |
         cargo test --features uffd -p wasmtime-runtime instance::allocator::pooling
         cargo test --features uffd -p wasmtime-cli pooling_allocator
         cargo test --features uffd -p wasmtime-cli wast::Cranelift
+        cargo test --features memfd-allocator -p wasmtime-cli pooling_allocator
+        cargo test --features memfd-allocator -p wasmtime-cli wast::Cranelift
       if: matrix.os == 'ubuntu-latest' && matrix.target == ''
       env:
         RUST_BACKTRACE: 1
diff --git a/Cargo.lock b/Cargo.lock
index 6362f3b413..dbe67fe9c7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1602,6 +1602,15 @@ version = "2.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
 
+[[package]]
+name = "memfd"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6627dc657574b49d6ad27105ed671822be56e0d2547d413bfbf3e8d8fa92e7a"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "memmap2"
 version = "0.2.3"
@@ -3587,6 +3596,7 @@ dependencies = [
  "libc",
  "log",
  "mach",
+ "memfd",
  "memoffset",
  "more-asserts",
  "rand 0.8.3",
diff --git a/Cargo.toml b/Cargo.toml
index 748cb801da..51c4843fcc 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -95,6 +95,8 @@ vtune = ["wasmtime/vtune"]
 wasi-crypto = ["wasmtime-wasi-crypto"]
 wasi-nn = ["wasmtime-wasi-nn"]
 uffd = ["wasmtime/uffd"]
+pooling-allocator = ["wasmtime/pooling-allocator"]
+memfd-allocator = ["pooling-allocator", "wasmtime/memfd-allocator"]
 all-arch = ["wasmtime/all-arch"]
 posix-signals-on-macos = ["wasmtime/posix-signals-on-macos"]
 
diff --git a/crates/environ/src/module.rs b/crates/environ/src/module.rs
index 8b05e2eb1c..d941801658 100644
--- a/crates/environ/src/module.rs
+++ b/crates/environ/src/module.rs
@@ -95,6 +95,19 @@ impl MemoryPlan {
             },
         }
     }
+
+    /// Determine whether a data segment (memory initializer) is
+    /// possibly out-of-bounds. Returns `true` if the initializer has a
+    /// dynamic location and this question cannot be resolved
+    /// pre-instantiation; hence, this method's result should not be
+    /// used to signal an error, only to exit optimized/simple fastpaths.
+    pub fn initializer_possibly_out_of_bounds(&self, init: &MemoryInitializer) -> bool {
+        match init.end() {
+            // Not statically known, so possibly out of bounds (we can't guarantee in-bounds).
+            None => true,
+            Some(end) => end > self.memory.minimum * (WASM_PAGE_SIZE as u64),
+        }
+    }
 }
 
 /// A WebAssembly linear memory initializer.
@@ -113,6 +126,16 @@ pub struct MemoryInitializer {
     pub data: Range<u32>,
 }
 
+impl MemoryInitializer {
+    /// If this initializer has a definite, static, non-overflowed end address, return it.
+    pub fn end(&self) -> Option<u64> {
+        if self.base.is_some() {
+            return None;
+        }
+        self.offset.checked_add(self.data.len() as u64)
+    }
+}
+
 /// The type of WebAssembly linear memory initialization to use for a module.
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub enum MemoryInitialization {
diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs
index cc6a3844d1..6a41160070 100644
--- a/crates/jit/src/instantiate.rs
+++ b/crates/jit/src/instantiate.rs
@@ -19,7 +19,10 @@ use wasmtime_environ::{
     StackMapInformation, Trampoline, Tunables, WasmFuncType, ELF_WASMTIME_ADDRMAP,
     ELF_WASMTIME_TRAPS,
 };
-use wasmtime_runtime::{GdbJitImageRegistration, InstantiationError, VMFunctionBody, VMTrampoline};
+use wasmtime_runtime::{
+    CompiledModuleId, CompiledModuleIdAllocator, GdbJitImageRegistration, InstantiationError,
+    VMFunctionBody, VMTrampoline,
+};
 
 /// This is the name of the section in the final ELF image which contains
 /// concatenated data segments from the original wasm module.
@@ -248,6 +251,8 @@ pub struct CompiledModule {
     code: Range<usize>,
     code_memory: CodeMemory,
     dbg_jit_registration: Option<GdbJitImageRegistration>,
+    /// A unique ID used to register this module with the engine.
+    unique_id: CompiledModuleId,
 }
 
 impl CompiledModule {
@@ -271,6 +276,7 @@ impl CompiledModule {
         mmap: MmapVec,
         info: Option<CompiledModuleInfo>,
         profiler: &dyn ProfilingAgent,
+        id_allocator: &CompiledModuleIdAllocator,
     ) -> Result<Arc<Self>> {
         // Transfer ownership of `obj` to a `CodeMemory` object which will
         // manage permissions, such as the executable bit. Once it's located
@@ -312,6 +318,7 @@ impl CompiledModule {
             dbg_jit_registration: None,
             code_memory,
             meta: info.meta,
+            unique_id: id_allocator.alloc(),
         };
         ret.register_debug_and_profiling(profiler)?;
 
@@ -333,6 +340,12 @@ impl CompiledModule {
         Ok(())
     }
 
+    /// Get this module's unique ID. It is unique with respect to a
+    /// single allocator (which is ordinarily held on a Wasm engine).
+    pub fn unique_id(&self) -> CompiledModuleId {
+        self.unique_id
+    }
+
     /// Returns the underlying memory which contains the compiled module's
     /// image.
     pub fn mmap(&self) -> &MmapVec {
diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml
index 827439d1d5..aaef30f677 100644
--- a/crates/runtime/Cargo.toml
+++ b/crates/runtime/Cargo.toml
@@ -37,6 +37,7 @@ winapi = { version = "0.3.7", features = ["winbase", "memoryapi", "errhandlingap
 
 [target.'cfg(target_os = "linux")'.dependencies]
 userfaultfd = { version = "0.4.1", optional = true }
+memfd = { version = "0.4.1", optional = true }
 
 [build-dependencies]
 cc = "1.0"
@@ -59,3 +60,5 @@ uffd = ["userfaultfd", "pooling-allocator"]
 # It is useful for applications that do not bind their own exception ports and
 # need portable signal handling.
 posix-signals-on-macos = []
+
+memfd-allocator = ["pooling-allocator", "memfd"]
diff --git a/crates/runtime/src/instance.rs b/crates/runtime/src/instance.rs
index 2c9487d75e..9c56dfb222 100644
--- a/crates/runtime/src/instance.rs
+++ b/crates/runtime/src/instance.rs
@@ -97,6 +97,29 @@ pub(crate) struct Instance {
 
 #[allow(clippy::cast_ptr_alignment)]
 impl Instance {
+    /// Helper for allocators; not a public API.
+    pub(crate) fn create_raw(
+        module: &Arc<Module>,
+        wasm_data: &'static [u8],
+        memories: PrimaryMap<DefinedMemoryIndex, Memory>,
+        tables: PrimaryMap<DefinedTableIndex, Table>,
+        host_state: Box<dyn Any + Send + Sync>,
+    ) -> Instance {
+        Instance {
+            module: module.clone(),
+            offsets: VMOffsets::new(HostPtr, &module),
+            memories,
+            tables,
+            dropped_elements: EntitySet::with_capacity(module.passive_elements.len()),
+            dropped_data: EntitySet::with_capacity(module.passive_data_map.len()),
+            host_state,
+            wasm_data,
+            vmctx: VMContext {
+                _marker: std::marker::PhantomPinned,
+            },
+        }
+    }
+
     /// Helper function to access various locations offset from our `*mut
     /// VMContext` object.
     unsafe fn vmctx_plus_offset<T>(&self, offset: u32) -> *mut T {
diff --git a/crates/runtime/src/instance/allocator.rs b/crates/runtime/src/instance/allocator.rs
index 82c1eec31e..12fa88ddc8 100644
--- a/crates/runtime/src/instance/allocator.rs
+++ b/crates/runtime/src/instance/allocator.rs
@@ -4,28 +4,37 @@ use crate::memory::{DefaultMemoryCreator, Memory};
 use crate::table::Table;
 use crate::traphandlers::Trap;
 use crate::vmcontext::{
-    VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMContext, VMGlobalDefinition,
-    VMSharedSignatureIndex,
+    VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMGlobalDefinition, VMSharedSignatureIndex,
 };
+use crate::ModuleMemFds;
 use crate::Store;
 use anyhow::Result;
 use std::alloc;
 use std::any::Any;
 use std::convert::TryFrom;
-use std::marker;
 use std::ptr::{self, NonNull};
 use std::slice;
 use std::sync::Arc;
 use thiserror::Error;
 use wasmtime_environ::{
-    DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, EntityRef, EntitySet, FunctionInfo,
-    GlobalInit, HostPtr, MemoryInitialization, MemoryInitializer, Module, ModuleType, PrimaryMap,
-    SignatureIndex, TableInitializer, TrapCode, VMOffsets, WasmType, WASM_PAGE_SIZE,
+    DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, EntityRef, FunctionInfo, GlobalInit,
+    MemoryInitialization, MemoryInitializer, Module, ModuleType, PrimaryMap, SignatureIndex,
+    TableInitializer, TrapCode, WasmType, WASM_PAGE_SIZE,
 };
 
 #[cfg(feature = "pooling-allocator")]
 mod pooling;
 
+#[cfg(feature = "memfd-allocator")]
+mod memfd;
+#[cfg(feature = "memfd-allocator")]
+pub use self::memfd::MemFdSlot;
+
+#[cfg(not(feature = "memfd-allocator"))]
+mod memfd_disabled;
+#[cfg(not(feature = "memfd-allocator"))]
+pub use self::memfd_disabled::MemFdSlot;
+
 #[cfg(feature = "pooling-allocator")]
 pub use self::pooling::{
     InstanceLimits, ModuleLimits, PoolingAllocationStrategy, PoolingInstanceAllocator,
@@ -39,6 +48,9 @@ pub struct InstanceAllocationRequest<'a> {
     /// The base address of where JIT functions are located.
     pub image_base: usize,
 
+    /// If using MemFD-based memories, the backing MemFDs.
+    pub memfds: Option<Arc<ModuleMemFds>>,
+
     /// Descriptors about each compiled function, such as the offset from
     /// `image_base`.
     pub functions: &'a PrimaryMap<DefinedFuncIndex, FunctionInfo>,
@@ -376,9 +388,23 @@ fn check_memory_init_bounds(
 
 fn initialize_memories(
     instance: &mut Instance,
+    module: &Module,
     initializers: &[MemoryInitializer],
 ) -> Result<(), InstantiationError> {
     for init in initializers {
+        // Check whether this is a MemFD memory; if so, we can skip
+        // all initializers.
+        let memory = init.memory_index;
+        if let Some(defined_index) = module.defined_memory_index(memory) {
+            // We can only skip if there is actually a MemFD image. In
+            // some situations the MemFD image creation code will bail
+            // (e.g. due to an out of bounds data segment) and so we
+            // need to fall back on the usual initialization below.
+            if instance.memories[defined_index].is_memfd_with_image() {
+                continue;
+            }
+        }
+
         instance
             .memory_init_segment(
                 init.memory_index,
@@ -432,6 +458,14 @@ fn initialize_instance(
     match &module.memory_initialization {
         MemoryInitialization::Paged { map, out_of_bounds } => {
             for (index, pages) in map {
+                // We can only skip if there is actually a MemFD image. In
+                // some situations the MemFD image creation code will bail
+                // (e.g. due to an out of bounds data segment) and so we
+                // need to fall back on the usual initialization below.
+                if instance.memories[index].is_memfd_with_image() {
+                    continue;
+                }
+
                 let memory = instance.memory(index);
                 let slice =
                     unsafe { slice::from_raw_parts_mut(memory.base, memory.current_length) };
@@ -453,7 +487,7 @@ fn initialize_instance(
             }
         }
         MemoryInitialization::Segmented(initializers) => {
-            initialize_memories(instance, initializers)?;
+            initialize_memories(instance, module, initializers)?;
         }
     }
 
@@ -691,19 +725,8 @@ unsafe impl InstanceAllocator for OnDemandInstanceAllocator {
         let host_state = std::mem::replace(&mut req.host_state, Box::new(()));
 
         let mut handle = {
-            let instance = Instance {
-                module: req.module.clone(),
-                offsets: VMOffsets::new(HostPtr, &req.module),
-                memories,
-                tables,
-                dropped_elements: EntitySet::with_capacity(req.module.passive_elements.len()),
-                dropped_data: EntitySet::with_capacity(req.module.passive_data_map.len()),
-                host_state,
-                wasm_data: &*req.wasm_data,
-                vmctx: VMContext {
-                    _marker: marker::PhantomPinned,
-                },
-            };
+            let instance =
+                Instance::create_raw(&req.module, &*req.wasm_data, memories, tables, host_state);
             let layout = instance.alloc_layout();
             let instance_ptr = alloc::alloc(layout) as *mut Instance;
             if instance_ptr.is_null() {
diff --git a/crates/runtime/src/instance/allocator/memfd.rs b/crates/runtime/src/instance/allocator/memfd.rs
new file mode 100644
index 0000000000..8713794824
--- /dev/null
+++ b/crates/runtime/src/instance/allocator/memfd.rs
@@ -0,0 +1,290 @@
+//! memfd mapping logic for use by the pooling allocator.
+
+use crate::memfd::MemoryMemFd;
+use crate::InstantiationError;
+use anyhow::Result;
+use libc::c_void;
+use rustix::fd::AsRawFd;
+use std::convert::TryFrom;
+use std::fs::File;
+use std::sync::Arc;
+
+/// A single slot handled by the memfd instance-heap mechanism.
+///
+/// The mmap scheme is:
+///
+/// base ==> (points here)
+/// - (image.offset bytes)   anonymous zero memory, pre-image
+/// - (image.len bytes)      CoW mapping of memfd heap image
+/// - (up to extension_offset)  anonymous zero memory, post-image
+/// - (up to static_size)    heap expansion region; CoW mapping of per-slot memfd
+///
+/// The ordering of mmaps to set this up is:
+///
+/// - once, when pooling allocator is created:
+///   - one large mmap to create 8GiB * instances * memories slots
+///
+/// - per instantiation of new image in a slot:
+///   - mmap of anonymous zero memory, from 0 to initial heap size
+///   - mmap of CoW'd memfd image, from `image.offset` to
+///     `image.offset + image.len`. This overwrites part of the
+///     anonymous zero memory, potentially splitting it into a pre-
+///     and post-region.
+///   - mmap of CoW'd extension file, past the initial heap size up to
+///     the end of the max memory size (just before the
+///     post-guard). This is always adjacent to the above mmaps, but
+///     does not overlap/overwrite them.
+#[derive(Debug)]
+pub struct MemFdSlot {
+    /// The base of the actual heap memory. Bytes at this address are
+    /// what is seen by the Wasm guest code.
+    base: usize,
+    /// The maximum static memory size, plus post-guard.
+    static_size: usize,
+    /// The memfd image that backs this memory. May be `None`, in
+    /// which case the memory is all zeroes.
+    pub(crate) image: Option<Arc<MemoryMemFd>>,
+    /// The offset at which the "extension file", which is used to
+    /// allow for efficient heap growth, is mapped. This is always
+    /// immediately after the end of the initial memory size.
+    extension_offset: usize,
+    /// The anonymous memfd, owned by this slot, which we mmap in the
+    /// area where the heap may grow during runtime. We use the
+    /// ftruncate() syscall (invoked via `File::set_len()`) to set its
+    /// size. We never write any data to it -- we CoW-map it so we can
+    /// throw away dirty data on termination. Instead, we just use its
+    /// size as a "watermark" that delineates the boundary between
+    /// safe-to-access memory and SIGBUS-causing memory. (This works
+    /// because one can mmap a file beyond its end, and is good
+    /// because ftruncate does not take the process-wide lock that
+    /// mmap and mprotect do.)
+    extension_file: File,
+    /// Whether this slot may have "dirty" pages (pages written by an
+    /// instantiation). Set by `instantiate()` and cleared by
+    /// `clear_and_remain_ready()`, and used in assertions to ensure
+    /// those methods are called properly.
+    dirty: bool,
+}
+
+impl MemFdSlot {
+    pub(crate) fn create(
+        base_addr: *mut c_void,
+        static_size: usize,
+    ) -> Result<Self, InstantiationError> {
+        let base = base_addr as usize;
+
+        // Create a MemFD for the memory growth first -- this covers
+        // extended heap beyond the initial image.
+        let extension_memfd = memfd::MemfdOptions::new()
+            .allow_sealing(true)
+            .create("wasm-anonymous-heap")
+            .map_err(|e| InstantiationError::Resource(e.into()))?;
+        // Seal the ability to write the extension file (make it
+        // permanently read-only). This is a defense-in-depth
+        // mitigation to make extra-sure that we don't leak
+        // information between instantiations. See note in `memfd.rs`
+        // for more about why we use seals.
+        extension_memfd
+            .add_seal(memfd::FileSeal::SealWrite)
+            .map_err(|e| InstantiationError::Resource(e.into()))?;
+        extension_memfd
+            .add_seal(memfd::FileSeal::SealSeal)
+            .map_err(|e| InstantiationError::Resource(e.into()))?;
+        let extension_file = extension_memfd.into_file();
+        extension_file
+            .set_len(0)
+            .map_err(|e| InstantiationError::Resource(e.into()))?;
+
+        Ok(MemFdSlot {
+            base,
+            static_size,
+            image: None,
+            extension_file,
+            extension_offset: 0,
+            dirty: false,
+        })
+    }
+
+    pub(crate) fn set_heap_limit(&mut self, size_bytes: usize) -> Result<()> {
+        assert!(size_bytes >= self.extension_offset);
+        // This is all that is needed to make the new memory
+        // accessible; we don't need to mprotect anything. (The
+        // mapping itself is always R+W for the max possible heap
+        // size, and only the anonymous-backing file length catches
+        // out-of-bounds accesses.)
+        self.extension_file
+            .set_len(u64::try_from(size_bytes - self.extension_offset).unwrap())?;
+        Ok(())
+    }
+
+    pub(crate) fn instantiate(
+        &mut self,
+        initial_size_bytes: usize,
+        maybe_image: Option<&Arc<MemoryMemFd>>,
+    ) -> Result<(), InstantiationError> {
+        assert!(!self.dirty);
+
+        if let Some(existing_image) = &self.image {
+            // Fast-path: previously instantiated with the same image,
+            // so the mappings are already correct; there is no need
+            // to mmap anything. Given that we asserted not-dirty
+            // above, any dirty pages will have already been thrown
+            // away by madvise() during the previous termination.
+            if let Some(image) = maybe_image {
+                if existing_image.fd.as_file().as_raw_fd() == image.fd.as_file().as_raw_fd() {
+                    self.dirty = true;
+                    return Ok(());
+                }
+            }
+        }
+
+        // Otherwise, we need to redo (i) the anonymous-mmap backing
+        // for the initial heap size, (ii) the extension-file backing,
+        // and (iii) the initial-heap-image mapping if present.
+
+        // Security/audit note: we map all of these MAP_PRIVATE, so
+        // all instance data is local to the mapping, not propagated
+        // to the backing fd. We throw away this CoW overlay with
+        // madvise() below, from base up to extension_offset (which is
+        // at least initial_size_bytes, and extended when the
+        // extension file is, so it covers all three mappings) when
+        // terminating the instance.
+
+        // Anonymous mapping behind the initial heap size: this gives
+        // zeroes for any "holes" in the initial heap image. Anonymous
+        // mmap memory is faster to fault in than a CoW of a file,
+        // even a file with zero holes, because the kernel's CoW path
+        // unconditionally copies *something* (even if just a page of
+        // zeroes). Anonymous zero pages are fast: the kernel
+        // pre-zeroes them, and even if it runs out of those, a memset
+        // is half as expensive as a memcpy (only writes, no reads).
+        if initial_size_bytes > 0 {
+            unsafe {
+                let ptr = rustix::io::mmap_anonymous(
+                    self.base as *mut c_void,
+                    initial_size_bytes,
+                    rustix::io::ProtFlags::READ | rustix::io::ProtFlags::WRITE,
+                    rustix::io::MapFlags::PRIVATE | rustix::io::MapFlags::FIXED,
+                )
+                .map_err(|e| InstantiationError::Resource(e.into()))?;
+                assert_eq!(ptr as usize, self.base);
+            }
+        }
+
+        // An "extension file": this allows us to grow the heap by
+        // doing just an ftruncate(), without changing any
+        // mappings. This is important to avoid the process-wide mmap
+        // lock on Linux.
+        self.extension_offset = initial_size_bytes;
+        let extension_map_len = self.static_size - initial_size_bytes;
+        if extension_map_len > 0 {
+            unsafe {
+                let fd = rustix::fd::BorrowedFd::borrow_raw_fd(self.extension_file.as_raw_fd());
+                let ptr = rustix::io::mmap(
+                    (self.base + initial_size_bytes) as *mut c_void,
+                    extension_map_len,
+                    rustix::io::ProtFlags::READ | rustix::io::ProtFlags::WRITE,
+                    rustix::io::MapFlags::PRIVATE | rustix::io::MapFlags::FIXED,
+                    &fd,
+                    0,
+                )
+                .map_err(|e| InstantiationError::Resource(e.into()))?;
+                assert_eq!(ptr as usize, self.base + initial_size_bytes);
+            }
+        }
+
+        // Finally, the initial memory image.
+        if let Some(image) = maybe_image {
+            if image.len > 0 {
+                let image = image.clone();
+
+                unsafe {
+                    let fd = rustix::fd::BorrowedFd::borrow_raw_fd(image.fd.as_file().as_raw_fd());
+                    let ptr = rustix::io::mmap(
+                        (self.base + image.offset) as *mut c_void,
+                        image.len,
+                        rustix::io::ProtFlags::READ | rustix::io::ProtFlags::WRITE,
+                        rustix::io::MapFlags::PRIVATE | rustix::io::MapFlags::FIXED,
+                        &fd,
+                        image.offset as u64,
+                    )
+                    .map_err(|e| InstantiationError::Resource(e.into()))?;
+                    assert_eq!(ptr as usize, self.base + image.offset);
+                }
+
+                self.image = Some(image);
+            }
+        }
+
+        self.dirty = true;
+        Ok(())
+    }
+
+    pub(crate) fn clear_and_remain_ready(&mut self) -> Result<()> {
+        assert!(self.dirty);
+        // madvise the image range; that's it! This will throw away
+        // dirty pages, which are CoW-private pages on top of the
+        // initial heap image memfd.
+        unsafe {
+            rustix::io::madvise(
+                self.base as *mut c_void,
+                self.extension_offset,
+                rustix::io::Advice::LinuxDontNeed,
+            )?;
+        }
+
+        // truncate the extension file down to zero bytes to reset heap length.
+        self.extension_file
+            .set_len(0)
+            .map_err(|e| InstantiationError::Resource(e.into()))?;
+        self.dirty = false;
+        Ok(())
+    }
+
+    pub(crate) fn has_image(&self) -> bool {
+        self.image.is_some()
+    }
+
+    pub(crate) fn is_dirty(&self) -> bool {
+        self.dirty
+    }
+}
+
+#[cfg(feature = "memfd-allocator")]
+impl Drop for MemFdSlot {
+    fn drop(&mut self) {
+        // The MemFdSlot may be dropped if there is an error during
+        // instantiation: for example, if a memory-growth limiter
+        // disallows a guest from having a memory of a certain size,
+        // after we've already initialized the MemFdSlot.
+        //
+        // We need to return this region of the large pool mmap to a
+        // safe state (with no module-specific mappings). The
+        // MemFdSlot will not be returned to the MemoryPool, so a new
+        // MemFdSlot will be created and overwrite the mappings anyway
+        // on the slot's next use; but for safety and to avoid
+        // resource leaks it's better not to have stale mappings to a
+        // possibly-otherwise-dead module's image.
+        //
+        // To "wipe the slate clean", let's do a mmap of anonymous
+        // memory over the whole region, with PROT_NONE. Note that we
+        // *can't* simply munmap, because that leaves a hole in the
+        // middle of the pooling allocator's big memory area that some
+        // other random mmap may swoop in and take, to be trampled
+        // over by the next MemFdSlot later.
+        //
+        // Since we're in drop(), we can't sanely return an error if
+        // this mmap fails. Let's ignore the failure if so; the next
+        // MemFdSlot to be created for this slot will try to overwrite
+        // the existing stale mappings, and return a failure properly
+        // if we still cannot map new memory.
+        unsafe {
+            let _ = rustix::io::mmap_anonymous(
+                self.base as *mut _,
+                self.static_size,
+                rustix::io::ProtFlags::empty(),
+                rustix::io::MapFlags::FIXED | rustix::io::MapFlags::NORESERVE,
+            );
+        }
+    }
+}
diff --git a/crates/runtime/src/instance/allocator/memfd_disabled.rs b/crates/runtime/src/instance/allocator/memfd_disabled.rs
new file mode 100644
index 0000000000..9c87591bd5
--- /dev/null
+++ b/crates/runtime/src/instance/allocator/memfd_disabled.rs
@@ -0,0 +1,49 @@
+//! Shims for MemFdSlot when the memfd allocator is not
+//! included. Enables unconditional use of the type and its methods
+//! throughout higher-level code.
+
+use crate::InstantiationError;
+use anyhow::Result;
+use std::sync::Arc;
+
+/// A placeholder for MemFdSlot when we have not included the pooling
+/// allocator.
+///
+/// To allow MemFdSlot to be unconditionally passed around in various
+/// places (e.g. a `Memory`), we define a zero-sized type when memfd is
+/// not included in the build.
+#[cfg(not(feature = "memfd-allocator"))]
+#[derive(Debug)]
+pub struct MemFdSlot;
+
+#[cfg(not(feature = "memfd-allocator"))]
+#[allow(dead_code)]
+impl MemFdSlot {
+    pub(crate) fn create(_: *mut libc::c_void, _: usize) -> Result<Self, InstantiationError> {
+        panic!("create() on invalid MemFdSlot");
+    }
+
+    pub(crate) fn instantiate(
+        &mut self,
+        _: usize,
+        _: Option<&Arc<crate::memfd::MemoryMemFd>>,
+    ) -> Result<Self, InstantiationError> {
+        panic!("instantiate() on invalid MemFdSlot");
+    }
+
+    pub(crate) fn clear_and_remain_ready(&mut self) -> Result<()> {
+        Ok(())
+    }
+
+    pub(crate) fn has_image(&self) -> bool {
+        false
+    }
+
+    pub(crate) fn is_dirty(&self) -> bool {
+        false
+    }
+
+    pub(crate) fn set_heap_limit(&mut self, _: usize) -> Result<()> {
+        panic!("set_heap_limit on invalid MemFdSlot");
+    }
+}
diff --git a/crates/runtime/src/instance/allocator/pooling.rs b/crates/runtime/src/instance/allocator/pooling.rs
index 76614137d5..6aa291d7a9 100644
--- a/crates/runtime/src/instance/allocator/pooling.rs
+++ b/crates/runtime/src/instance/allocator/pooling.rs
@@ -7,19 +7,21 @@
 //! Using the pooling instance allocator can speed up module instantiation
 //! when modules can be constrained based on configurable limits.
 
+use super::MemFdSlot;
 use super::{
     initialize_instance, initialize_vmcontext, InstanceAllocationRequest, InstanceAllocator,
     InstanceHandle, InstantiationError,
 };
-use crate::{instance::Instance, Memory, Mmap, Table, VMContext};
+use crate::{instance::Instance, Memory, Mmap, ModuleMemFds, Table};
 use anyhow::{anyhow, bail, Context, Result};
+use libc::c_void;
 use rand::Rng;
 use std::convert::TryFrom;
-use std::marker;
 use std::mem;
-use std::sync::{Arc, Mutex};
+use std::sync::Arc;
+use std::sync::Mutex;
 use wasmtime_environ::{
-    EntitySet, HostPtr, MemoryStyle, Module, PrimaryMap, Tunables, VMOffsets, VMOffsetsFields,
+    HostPtr, MemoryIndex, MemoryStyle, Module, PrimaryMap, Tunables, VMOffsets, VMOffsetsFields,
     WASM_PAGE_SIZE,
 };
 
@@ -284,7 +286,6 @@ struct InstancePool {
     free_list: Mutex<Vec<usize>>,
     memories: MemoryPool,
     tables: TablePool,
-    empty_module: Arc<Module>,
 }
 
 impl InstancePool {
@@ -332,14 +333,8 @@ impl InstancePool {
             free_list: Mutex::new((0..max_instances).collect()),
             memories: MemoryPool::new(module_limits, instance_limits, tunables)?,
             tables: TablePool::new(module_limits, instance_limits)?,
-            empty_module: Arc::new(Module::default()),
         };
 
-        // Use a default module to initialize the instances to start
-        for i in 0..instance_limits.count as usize {
-            pool.initialize(module_limits, i);
-        }
-
         Ok(pool)
     }
 
@@ -348,41 +343,26 @@ impl InstancePool {
         &mut *(self.mapping.as_mut_ptr().add(index * self.instance_size) as *mut Instance)
     }
 
-    fn initialize(&self, limits: &ModuleLimits, index: usize) {
-        unsafe {
-            let instance = self.instance(index);
-
-            // Write a default instance with preallocated memory/table map storage to the ptr
-            std::ptr::write(
-                instance as _,
-                Instance {
-                    module: self.empty_module.clone(),
-                    offsets: VMOffsets::new(HostPtr, &self.empty_module),
-                    memories: PrimaryMap::with_capacity(limits.memories as usize),
-                    tables: PrimaryMap::with_capacity(limits.tables as usize),
-                    dropped_elements: EntitySet::new(),
-                    dropped_data: EntitySet::new(),
-                    host_state: Box::new(()),
-                    wasm_data: &[],
-                    vmctx: VMContext {
-                        _marker: marker::PhantomPinned,
-                    },
-                },
-            );
-        }
-    }
-
     unsafe fn setup_instance(
         &self,
         index: usize,
         mut req: InstanceAllocationRequest,
     ) -> Result<InstanceHandle, InstantiationError> {
-        let instance = self.instance(index);
+        let host_state = std::mem::replace(&mut req.host_state, Box::new(()));
+        let instance_data = Instance::create_raw(
+            &req.module,
+            &*req.wasm_data,
+            PrimaryMap::default(),
+            PrimaryMap::default(),
+            host_state,
+        );
 
-        instance.module = req.module.clone();
-        instance.offsets = VMOffsets::new(HostPtr, instance.module.as_ref());
-        instance.host_state = std::mem::replace(&mut req.host_state, Box::new(()));
-        instance.wasm_data = &*req.wasm_data;
+        // Instances are uninitialized memory at first; we need to
+        // write an empty but initialized `Instance` struct into the
+        // chosen slot before we do anything else with it. (This is
+        // paired with a `drop_in_place` in deallocate below.)
+        let instance = self.instance(index);
+        std::ptr::write(instance as _, instance_data);
 
         // set_instance_memories and _tables will need the store before we can completely
         // initialize the vmcontext.
@@ -391,8 +371,10 @@ impl InstancePool {
         }
 
         Self::set_instance_memories(
+            index,
             instance,
-            self.memories.get(index),
+            &self.memories,
+            &req.memfds,
             self.memories.max_wasm_pages,
         )?;
 
@@ -448,20 +430,44 @@ impl InstancePool {
         let instance = unsafe { &mut *handle.instance };
 
         // Decommit any linear memories that were used
-        for (memory, base) in instance.memories.values_mut().zip(self.memories.get(index)) {
+        for ((def_mem_idx, memory), base) in
+            instance.memories.iter_mut().zip(self.memories.get(index))
+        {
             let mut memory = mem::take(memory);
             debug_assert!(memory.is_static());
 
-            // Reset any faulted guard pages as the physical memory may be reused for another instance in the future
-            #[cfg(all(feature = "uffd", target_os = "linux"))]
-            memory
-                .reset_guard_pages()
-                .expect("failed to reset guard pages");
-            drop(&mut memory); // require mutable on all platforms, not just uffd
+            match memory {
+                Memory::Static {
+                    memfd_slot: Some(mut memfd_slot),
+                    ..
+                } => {
+                    let mem_idx = instance.module.memory_index(def_mem_idx);
+                    // If there was any error clearing the memfd, just
+                    // drop it here, and let the drop handler for the
+                    // MemFdSlot unmap in a way that retains the
+                    // address space reservation.
+                    if memfd_slot.clear_and_remain_ready().is_ok() {
+                        self.memories.return_memfd_slot(index, mem_idx, memfd_slot);
+                    }
+                }
 
-            let size = memory.byte_size();
-            drop(memory);
-            decommit_memory_pages(base, size).expect("failed to decommit linear memory pages");
+                _ => {
+                    // Reset any faulted guard pages as the physical
+                    // memory may be reused for another instance in
+                    // the future.
+                    #[cfg(all(feature = "uffd", target_os = "linux"))]
+                    memory
+                        .reset_guard_pages()
+                        .expect("failed to reset guard pages");
+                    // require mutable on all platforms, not just uffd
+                    drop(&mut memory);
+
+                    let size = memory.byte_size();
+                    drop(memory);
+                    decommit_memory_pages(base, size)
+                        .expect("failed to decommit linear memory pages");
+                }
+            }
         }
 
         instance.memories.clear();
@@ -481,50 +487,81 @@ impl InstancePool {
             decommit_table_pages(base, size).expect("failed to decommit table pages");
         }
 
-        instance.tables.clear();
-        instance.dropped_elements.clear();
-
-        // Drop all `global` values which need a destructor, such as externref
-        // values which now need their reference count dropped.
-        instance.drop_globals();
-
-        // Drop any host state
-        instance.host_state = Box::new(());
-
-        // And finally reset the module/offsets back to their original. This
-        // should put everything back in a relatively pristine state for each
-        // fresh allocation later on.
-        instance.module = self.empty_module.clone();
-        instance.offsets = VMOffsets::new(HostPtr, &self.empty_module);
-        instance.wasm_data = &[];
+        // We've now done all of the pooling-allocator-specific
+        // teardown, so we can drop the Instance and let destructors
+        // take care of any other fields (host state, globals, etc.).
+        unsafe {
+            std::ptr::drop_in_place(instance as *mut _);
+        }
+        // The instance is now uninitialized memory and cannot be
+        // touched again until we write a fresh Instance in-place with
+        // std::ptr::write in allocate() above.
 
         self.free_list.lock().unwrap().push(index);
     }
 
     fn set_instance_memories(
+        instance_idx: usize,
         instance: &mut Instance,
-        mut memories: impl Iterator<Item = *mut u8>,
+        memories: &MemoryPool,
+        maybe_memfds: &Option<Arc<ModuleMemFds>>,
         max_pages: u64,
     ) -> Result<(), InstantiationError> {
         let module = instance.module.as_ref();
 
         debug_assert!(instance.memories.is_empty());
 
-        for plan in
-            (&module.memory_plans.values().as_slice()[module.num_imported_memories..]).iter()
+        for (memory_index, plan) in module
+            .memory_plans
+            .iter()
+            .skip(module.num_imported_memories)
         {
+            let defined_index = module
+                .defined_memory_index(memory_index)
+                .expect("should be a defined memory since we skipped imported ones");
+
             let memory = unsafe {
                 std::slice::from_raw_parts_mut(
-                    memories.next().unwrap(),
+                    memories.get_base(instance_idx, memory_index),
                     (max_pages as usize) * (WASM_PAGE_SIZE as usize),
                 )
             };
-            instance.memories.push(
-                Memory::new_static(plan, memory, commit_memory_pages, unsafe {
-                    &mut *instance.store()
-                })
-                .map_err(InstantiationError::Resource)?,
-            );
+
+            if let Some(memfds) = maybe_memfds {
+                let image = memfds.get_memory_image(defined_index);
+                let mut slot = memories.take_memfd_slot(instance_idx, memory_index)?;
+                let initial_size = plan.memory.minimum * WASM_PAGE_SIZE as u64;
+
+                // If instantiation fails, we can propagate the error
+                // upward and drop the slot. This will cause the Drop
+                // handler to attempt to map the range with PROT_NONE
+                // memory, to reserve the space while releasing any
+                // stale mappings. The next use of this slot will then
+                // create a new MemFdSlot that will try to map over
+                // this, returning errors as well if the mapping
+                // errors persist. The unmap-on-drop is best effort;
+                // if it fails, then we can still soundly continue
+                // using the rest of the pool and allowing the rest of
+                // the process to continue, because we never perform a
+                // mmap that would leave an open space for someone
+                // else to come in and map something.
+                slot.instantiate(initial_size as usize, image)
+                    .map_err(|e| InstantiationError::Resource(e.into()))?;
+
+                instance.memories.push(
+                    Memory::new_static(plan, memory, None, Some(slot), unsafe {
+                        &mut *instance.store()
+                    })
+                    .map_err(InstantiationError::Resource)?,
+                );
+            } else {
+                instance.memories.push(
+                    Memory::new_static(plan, memory, Some(commit_memory_pages), None, unsafe {
+                        &mut *instance.store()
+                    })
+                    .map_err(InstantiationError::Resource)?,
+                );
+            }
         }
 
         debug_assert!(instance.dropped_data.is_empty());
@@ -566,17 +603,6 @@ impl InstancePool {
     }
 }
 
-impl Drop for InstancePool {
-    fn drop(&mut self) {
-        unsafe {
-            for i in 0..self.max_instances {
-                let ptr = self.mapping.as_mut_ptr().add(i * self.instance_size) as *mut Instance;
-                std::ptr::drop_in_place(ptr);
-            }
-        }
-    }
-}
-
 /// Represents a pool of WebAssembly linear memories.
 ///
 /// A linear memory is divided into accessible pages and guard pages.
@@ -589,6 +615,10 @@ impl Drop for InstancePool {
 #[derive(Debug)]
 struct MemoryPool {
     mapping: Mmap,
+    // If using the memfd allocation scheme, the MemFd slots. We
+    // dynamically transfer ownership of a slot to a Memory when in
+    // use.
+    memfd_slots: Vec<Mutex<Option<MemFdSlot>>>,
     // The size, in bytes, of each linear memory's reservation plus the guard
     // region allocated for it.
     memory_size: usize,
@@ -673,8 +703,18 @@ impl MemoryPool {
         let mapping = Mmap::accessible_reserved(0, allocation_size)
             .context("failed to create memory pool mapping")?;
 
+        let num_memfd_slots = if cfg!(feature = "memfd-allocator") {
+            max_instances * max_memories
+        } else {
+            0
+        };
+        let memfd_slots: Vec<_> = std::iter::repeat_with(|| Mutex::new(None))
+            .take(num_memfd_slots)
+            .collect();
+
         let pool = Self {
             mapping,
+            memfd_slots,
             memory_size,
             initial_memory_offset,
             max_memories,
@@ -689,17 +729,43 @@ impl MemoryPool {
         Ok(pool)
     }
 
-    fn get(&self, instance_index: usize) -> impl Iterator<Item = *mut u8> {
+    fn get_base(&self, instance_index: usize, memory_index: MemoryIndex) -> *mut u8 {
         debug_assert!(instance_index < self.max_instances);
+        let memory_index = memory_index.as_u32() as usize;
+        debug_assert!(memory_index < self.max_memories);
+        let idx = instance_index * self.max_memories + memory_index;
+        let offset = self.initial_memory_offset + idx * self.memory_size;
+        unsafe { self.mapping.as_mut_ptr().offset(offset as isize) }
+    }
 
-        let base: *mut u8 = unsafe {
-            self.mapping.as_mut_ptr().add(
-                self.initial_memory_offset + instance_index * self.memory_size * self.max_memories,
-            ) as _
-        };
+    fn get<'a>(&'a self, instance_index: usize) -> impl Iterator<Item = *mut u8> + 'a {
+        (0..self.max_memories)
+            .map(move |i| self.get_base(instance_index, MemoryIndex::from_u32(i as u32)))
+    }
 
-        let size = self.memory_size;
-        (0..self.max_memories).map(move |i| unsafe { base.add(i * size) })
+    /// Take ownership of the given memfd slot. Must be returned via
+    /// `return_memfd_slot` when the instance is done using it.
+    fn take_memfd_slot(
+        &self,
+        instance_index: usize,
+        memory_index: MemoryIndex,
+    ) -> Result<MemFdSlot, InstantiationError> {
+        let idx = instance_index * self.max_memories + (memory_index.as_u32() as usize);
+        let maybe_slot = self.memfd_slots[idx].lock().unwrap().take();
+
+        maybe_slot.map(|slot| Ok(slot)).unwrap_or_else(|| {
+            MemFdSlot::create(
+                self.get_base(instance_index, memory_index) as *mut c_void,
+                self.memory_size,
+            )
+        })
+    }
+
+    /// Return ownership of the given memfd slot.
+    fn return_memfd_slot(&self, instance_index: usize, memory_index: MemoryIndex, slot: MemFdSlot) {
+        assert!(!slot.is_dirty());
+        let idx = instance_index * self.max_memories + (memory_index.as_u32() as usize);
+        *self.memfd_slots[idx].lock().unwrap() = Some(slot);
     }
 }
 
@@ -1413,6 +1479,7 @@ mod test {
                             host_state: Box::new(()),
                             store: StorePtr::empty(),
                             wasm_data: &[],
+                            memfds: None,
                         },
                     )
                     .expect("allocation should succeed"),
@@ -1437,6 +1504,7 @@ mod test {
                 host_state: Box::new(()),
                 store: StorePtr::empty(),
                 wasm_data: &[],
+                memfds: None,
             },
         ) {
             Err(InstantiationError::Limit(3)) => {}
diff --git a/crates/runtime/src/instance/allocator/pooling/uffd.rs b/crates/runtime/src/instance/allocator/pooling/uffd.rs
index 55b4479fd1..87dd9a0c57 100644
--- a/crates/runtime/src/instance/allocator/pooling/uffd.rs
+++ b/crates/runtime/src/instance/allocator/pooling/uffd.rs
@@ -577,6 +577,7 @@ mod test {
                             PoolingAllocationStrategy::Random,
                             InstanceAllocationRequest {
                                 module: module.clone(),
+                                memfds: None,
                                 image_base: 0,
                                 functions,
                                 imports: Imports {
diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs
index f96e7d8dda..806c8c9c5c 100644
--- a/crates/runtime/src/lib.rs
+++ b/crates/runtime/src/lib.rs
@@ -19,6 +19,7 @@
         clippy::use_self
     )
 )]
+#![cfg_attr(feature = "memfd-allocator", allow(dead_code))]
 
 use std::sync::atomic::AtomicU64;
 
@@ -63,6 +64,49 @@ pub use crate::vmcontext::{
     VMSharedSignatureIndex, VMTableDefinition, VMTableImport, VMTrampoline, ValRaw,
 };
 
+mod module_id;
+pub use module_id::{CompiledModuleId, CompiledModuleIdAllocator};
+
+#[cfg(feature = "memfd-allocator")]
+mod memfd;
+
+/// When memfd support is not included, provide a shim type and
+/// constructor instead so that higher-level code does not need
+/// feature-conditional compilation.
+#[cfg(not(feature = "memfd-allocator"))]
+#[allow(dead_code)]
+mod memfd {
+    use anyhow::Result;
+    use std::sync::Arc;
+    use wasmtime_environ::{DefinedMemoryIndex, Module};
+
+    /// A shim for the memfd image container when memfd support is not
+    /// included.
+    pub enum ModuleMemFds {}
+
+    /// A shim for an individual memory image.
+    #[allow(dead_code)]
+    pub enum MemoryMemFd {}
+
+    impl ModuleMemFds {
+        /// Construct a new set of memfd images. This variant is used
+        /// when memfd support is not included; it always returns no
+        /// images.
+        pub fn new(_: &Module, _: &[u8]) -> Result<Option<Arc<ModuleMemFds>>> {
+            Ok(None)
+        }
+
+        /// Get the memfd image for a particular memory.
+        pub(crate) fn get_memory_image(&self, _: DefinedMemoryIndex) -> Option<&Arc<MemoryMemFd>> {
+            // Should be unreachable because the `Self` type is
+            // uninhabitable.
+            match *self {}
+        }
+    }
+}
+
+pub use crate::memfd::ModuleMemFds;
+
 /// Version number of this crate.
 pub const VERSION: &str = env!("CARGO_PKG_VERSION");
 
diff --git a/crates/runtime/src/memfd.rs b/crates/runtime/src/memfd.rs
new file mode 100644
index 0000000000..46ebc4e228
--- /dev/null
+++ b/crates/runtime/src/memfd.rs
@@ -0,0 +1,236 @@
+//! memfd support.
+
+use anyhow::Result;
+use memfd::{Memfd, MemfdOptions};
+use rustix::fs::FileExt;
+use std::convert::TryFrom;
+use std::sync::Arc;
+use wasmtime_environ::{
+    DefinedMemoryIndex, MemoryInitialization, MemoryInitializer, MemoryPlan, Module, PrimaryMap,
+};
+
+/// MemFDs containing backing images for certain memories in a module.
+///
+/// This is meant to be built once, when a module is first
+/// loaded/constructed, and then used many times for instantiation.
+pub struct ModuleMemFds {
+    memories: PrimaryMap<DefinedMemoryIndex, Option<Arc<MemoryMemFd>>>,
+}
+
+const MAX_MEMFD_IMAGE_SIZE: u64 = 1024 * 1024 * 1024; // limit to 1GiB.
+
+impl ModuleMemFds {
+    pub(crate) fn get_memory_image(
+        &self,
+        defined_index: DefinedMemoryIndex,
+    ) -> Option<&Arc<MemoryMemFd>> {
+        self.memories[defined_index].as_ref()
+    }
+}
+
+/// One backing image for one memory.
+#[derive(Debug)]
+pub(crate) struct MemoryMemFd {
+    pub(crate) fd: Memfd,
+    /// Length of image. Note that initial memory size may be larger;
+    /// leading and trailing zeroes are truncated (handled by
+    /// anonymous backing memfd).
+    pub(crate) len: usize,
+    /// Image starts this many bytes into heap space. Note that the
+    /// memfd's offsets are always equal to the heap offsets, so we
+    /// map at an offset into the fd as well. (This simplifies
+    /// construction.)
+    pub(crate) offset: usize,
+}
+
+fn unsupported_initializer(segment: &MemoryInitializer, plan: &MemoryPlan) -> bool {
+    // If the segment has a base that is dynamically determined
+    // (by a global value, which may be a function of an imported
+    // module, for example), then we cannot build a single static
+    // image that is used for every instantiation. So we skip this
+    // memory entirely.
+    let end = match segment.end() {
+        None => {
+            return true;
+        }
+        Some(end) => end,
+    };
+
+    // Cannot be out-of-bounds. If there is a *possibility* it may
+    // be, then we just fall back on ordinary initialization.
+    if plan.initializer_possibly_out_of_bounds(segment) {
+        return true;
+    }
+
+    // Must fit in our max size.
+    if end > MAX_MEMFD_IMAGE_SIZE {
+        return true;
+    }
+
+    false
+}
+
+impl ModuleMemFds {
+    /// Create a new `ModuleMemFds` for the given module. This can be
+    /// passed in as part of a `InstanceAllocationRequest` to speed up
+    /// instantiation and execution by using memfd-backed memories.
+    pub fn new(module: &Module, wasm_data: &[u8]) -> Result<Option<Arc<ModuleMemFds>>> {
+        let page_size = region::page::size() as u64;
+        let num_defined_memories = module.memory_plans.len() - module.num_imported_memories;
+
+        // Allocate a memfd file initially for every memory. We'll
+        // release those and set `excluded_memories` for those that we
+        // determine during initializer processing we cannot support a
+        // static image (e.g. due to dynamically-located segments).
+        let mut memfds: PrimaryMap<DefinedMemoryIndex, Option<Memfd>> = PrimaryMap::default();
+        let mut sizes: PrimaryMap<DefinedMemoryIndex, u64> = PrimaryMap::default();
+        let mut excluded_memories: PrimaryMap<DefinedMemoryIndex, bool> = PrimaryMap::new();
+
+        for _ in 0..num_defined_memories {
+            memfds.push(None);
+            sizes.push(0);
+            excluded_memories.push(false);
+        }
+
+        fn create_memfd() -> Result<Memfd> {
+            // Create the memfd. It needs a name, but the
+            // documentation for `memfd_create()` says that names can
+            // be duplicated with no issues.
+            MemfdOptions::new()
+                .allow_sealing(true)
+                .create("wasm-memory-image")
+                .map_err(|e| e.into())
+        }
+        let round_up_page = |len: u64| (len + page_size - 1) & !(page_size - 1);
+
+        match &module.memory_initialization {
+            &MemoryInitialization::Segmented(ref segments) => {
+                for (i, segment) in segments.iter().enumerate() {
+                    let defined_memory = match module.defined_memory_index(segment.memory_index) {
+                        Some(defined_memory) => defined_memory,
+                        None => continue,
+                    };
+                    if excluded_memories[defined_memory] {
+                        continue;
+                    }
+
+                    if unsupported_initializer(segment, &module.memory_plans[segment.memory_index])
+                    {
+                        memfds[defined_memory] = None;
+                        excluded_memories[defined_memory] = true;
+                        continue;
+                    }
+
+                    if memfds[defined_memory].is_none() {
+                        memfds[defined_memory] = Some(create_memfd()?);
+                    }
+                    let memfd = memfds[defined_memory].as_mut().unwrap();
+
+                    let end = round_up_page(segment.end().expect("must have statically-known end"));
+                    if end > sizes[defined_memory] {
+                        sizes[defined_memory] = end;
+                        memfd.as_file().set_len(end)?;
+                    }
+
+                    let base = segments[i].offset;
+                    let data = &wasm_data[segment.data.start as usize..segment.data.end as usize];
+                    memfd.as_file().write_at(data, base)?;
+                }
+            }
+            &MemoryInitialization::Paged { ref map, .. } => {
+                for (defined_memory, pages) in map {
+                    let top = pages
+                        .iter()
+                        .map(|(base, range)| *base + range.len() as u64)
+                        .max()
+                        .unwrap_or(0);
+
+                    let memfd = create_memfd()?;
+                    memfd.as_file().set_len(top)?;
+
+                    for (base, range) in pages {
+                        let data = &wasm_data[range.start as usize..range.end as usize];
+                        memfd.as_file().write_at(data, *base)?;
+                    }
+
+                    memfds[defined_memory] = Some(memfd);
+                    sizes[defined_memory] = top;
+                }
+            }
+        }
+
+        // Now finalize each memory.
+        let mut memories: PrimaryMap<DefinedMemoryIndex, Option<Arc<MemoryMemFd>>> =
+            PrimaryMap::default();
+        for (defined_memory, maybe_memfd) in memfds {
+            let memfd = match maybe_memfd {
+                Some(memfd) => memfd,
+                None => {
+                    memories.push(None);
+                    continue;
+                }
+            };
+            let size = sizes[defined_memory];
+
+            // Find leading and trailing zero data so that the mmap
+            // can precisely map only the nonzero data; anon-mmap zero
+            // memory is faster for anything that doesn't actually
+            // have content.
+            let mut page_data = vec![0; page_size as usize];
+            let mut page_is_nonzero = |page| {
+                let offset = page_size * page;
+                memfd.as_file().read_at(&mut page_data[..], offset).unwrap();
+                page_data.iter().any(|byte| *byte != 0)
+            };
+            let n_pages = size / page_size;
+
+            let mut offset = 0;
+            for page in 0..n_pages {
+                if page_is_nonzero(page) {
+                    break;
+                }
+                offset += page_size;
+            }
+            let len = if offset == size {
+                0
+            } else {
+                let mut len = 0;
+                for page in (0..n_pages).rev() {
+                    if page_is_nonzero(page) {
+                        len = (page + 1) * page_size - offset;
+                        break;
+                    }
+                }
+                len
+            };
+
+            // Seal the memfd's data and length.
+            //
+            // This is a defense-in-depth security mitigation. The
+            // memfd will serve as the starting point for the heap of
+            // every instance of this module. If anything were to
+            // write to this, it could affect every execution. The
+            // memfd object itself is owned by the machinery here and
+            // not exposed elsewhere, but it is still an ambient open
+            // file descriptor at the syscall level, so some other
+            // vulnerability that allowed writes to arbitrary fds
+            // could modify it. Or we could have some issue with the
+            // way that we map it into each instance. To be
+            // extra-super-sure that it never changes, and because
+            // this costs very little, we use the kernel's "seal" API
+            // to make the memfd image permanently read-only.
+            memfd.add_seal(memfd::FileSeal::SealGrow)?;
+            memfd.add_seal(memfd::FileSeal::SealShrink)?;
+            memfd.add_seal(memfd::FileSeal::SealWrite)?;
+            memfd.add_seal(memfd::FileSeal::SealSeal)?;
+
+            memories.push(Some(Arc::new(MemoryMemFd {
+                fd: memfd,
+                offset: usize::try_from(offset).unwrap(),
+                len: usize::try_from(len).unwrap(),
+            })));
+        }
+
+        Ok(Some(Arc::new(ModuleMemFds { memories })))
+    }
+}
diff --git a/crates/runtime/src/memory.rs b/crates/runtime/src/memory.rs
index 07c0c619cc..894a8afd96 100644
--- a/crates/runtime/src/memory.rs
+++ b/crates/runtime/src/memory.rs
@@ -2,6 +2,7 @@
 //!
 //! `RuntimeLinearMemory` is to WebAssembly linear memories what `Table` is to WebAssembly tables.
 
+use crate::instance::MemFdSlot;
 use crate::mmap::Mmap;
 use crate::vmcontext::VMMemoryDefinition;
 use crate::Store;
@@ -208,7 +209,11 @@ pub enum Memory {
         /// A callback which makes portions of `base` accessible for when memory
         /// is grown. Otherwise it's expected that accesses to `base` will
         /// fault.
-        make_accessible: fn(*mut u8, usize) -> Result<()>,
+        make_accessible: Option<fn(*mut u8, usize) -> Result<()>>,
+
+        /// The MemFdSlot, if any, for this memory. Owned here and
+        /// returned to the pooling allocator when termination occurs.
+        memfd_slot: Option<MemFdSlot>,
 
         /// Stores the pages in the linear memory that have faulted as guard pages when using the `uffd` feature.
         /// These pages need their protection level reset before the memory can grow.
@@ -236,7 +241,8 @@ impl Memory {
     pub fn new_static(
         plan: &MemoryPlan,
         base: &'static mut [u8],
-        make_accessible: fn(*mut u8, usize) -> Result<()>,
+        make_accessible: Option<fn(*mut u8, usize) -> Result<()>>,
+        memfd_slot: Option<MemFdSlot>,
         store: &mut dyn Store,
     ) -> Result<Self> {
         let (minimum, maximum) = Self::limit_new(plan, store)?;
@@ -246,14 +252,17 @@ impl Memory {
             _ => base,
         };
 
-        if minimum > 0 {
-            make_accessible(base.as_mut_ptr(), minimum)?;
+        if let Some(make_accessible) = make_accessible {
+            if minimum > 0 {
+                make_accessible(base.as_mut_ptr(), minimum)?;
+            }
         }
 
         Ok(Memory::Static {
             base,
             size: minimum,
             make_accessible,
+            memfd_slot,
             #[cfg(all(feature = "uffd", target_os = "linux"))]
             guard_page_faults: Vec::new(),
         })
@@ -373,6 +382,22 @@ impl Memory {
         }
     }
 
+    /// Returns whether or not this memory is backed by a MemFD
+    /// image. Note that this is testing whether there is actually an
+    /// *image* mapped, not just whether the MemFD mechanism is being
+    /// used. The distinction is important because if we are not using
+    /// a prevalidated and prepared image, we need to fall back to
+    /// ordinary initialization code.
+    pub(crate) fn is_memfd_with_image(&self) -> bool {
+        match self {
+            Memory::Static {
+                memfd_slot: Some(ref slot),
+                ..
+            } => slot.has_image(),
+            _ => false,
+        }
+    }
+
     /// Grow memory by the specified amount of wasm pages.
     ///
     /// Returns `None` if memory can't be grown by the specified amount
@@ -443,12 +468,33 @@ impl Memory {
         }
 
         match self {
+            Memory::Static {
+                base,
+                size,
+                memfd_slot: Some(ref mut memfd_slot),
+                ..
+            } => {
+                // Never exceed static memory size
+                if new_byte_size > base.len() {
+                    store.memory_grow_failed(&format_err!("static memory size exceeded"));
+                    return Ok(None);
+                }
+
+                if let Err(e) = memfd_slot.set_heap_limit(new_byte_size) {
+                    store.memory_grow_failed(&e);
+                    return Ok(None);
+                }
+                *size = new_byte_size;
+            }
             Memory::Static {
                 base,
                 size,
                 make_accessible,
                 ..
             } => {
+                let make_accessible = make_accessible
+                    .expect("make_accessible must be Some if this is not a MemFD memory");
+
                 // Never exceed static memory size
                 if new_byte_size > base.len() {
                     store.memory_grow_failed(&format_err!("static memory size exceeded"));
@@ -540,7 +586,8 @@ impl Default for Memory {
         Memory::Static {
             base: &mut [],
             size: 0,
-            make_accessible: |_, _| unreachable!(),
+            make_accessible: Some(|_, _| unreachable!()),
+            memfd_slot: None,
             #[cfg(all(feature = "uffd", target_os = "linux"))]
             guard_page_faults: Vec::new(),
         }
diff --git a/crates/runtime/src/module_id.rs b/crates/runtime/src/module_id.rs
new file mode 100644
index 0000000000..481a63e0bd
--- /dev/null
+++ b/crates/runtime/src/module_id.rs
@@ -0,0 +1,28 @@
+//! Unique IDs for modules in the runtime.
+
+use std::sync::atomic::{AtomicU64, Ordering};
+
+/// A unique identifier (within an engine or similar) for a compiled
+/// module.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct CompiledModuleId(u64);
+
+/// An allocator for compiled module IDs.
+pub struct CompiledModuleIdAllocator {
+    next: AtomicU64,
+}
+
+impl CompiledModuleIdAllocator {
+    /// Create a compiled-module ID allocator.
+    pub fn new() -> Self {
+        Self {
+            next: AtomicU64::new(1),
+        }
+    }
+
+    /// Allocate a new ID.
+    pub fn alloc(&self) -> CompiledModuleId {
+        let id = self.next.fetch_add(1, Ordering::Relaxed);
+        CompiledModuleId(id)
+    }
+}
diff --git a/crates/runtime/src/traphandlers/unix.rs b/crates/runtime/src/traphandlers/unix.rs
index cf41176cb7..fd16bfcdd1 100644
--- a/crates/runtime/src/traphandlers/unix.rs
+++ b/crates/runtime/src/traphandlers/unix.rs
@@ -51,9 +51,17 @@ pub unsafe fn platform_init() {
         register(&mut PREV_SIGFPE, libc::SIGFPE);
     }
 
-    // On ARM, handle Unaligned Accesses.
-    // On Darwin, guard page accesses are raised as SIGBUS.
-    if cfg!(target_arch = "arm") || cfg!(target_os = "macos") || cfg!(target_os = "freebsd") {
+    // Sometimes we need to handle SIGBUS too:
+    // - On ARM, handle Unaligned Accesses.
+    // - On Darwin, guard page accesses are raised as SIGBUS.
+    // - With the MemFD allocator, heap growth is controlled by
+    //   ftruncate'ing an mmap'd file, and so out-of-bounds accesses
+    //   are raised as SIGBUS.
+    if cfg!(target_arch = "arm")
+        || cfg!(target_os = "macos")
+        || cfg!(target_os = "freebsd")
+        || cfg!(feature = "memfd-allocator")
+    {
         register(&mut PREV_SIGBUS, libc::SIGBUS);
     }
 }
diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml
index b5912ceb83..c7b0037d0e 100644
--- a/crates/wasmtime/Cargo.toml
+++ b/crates/wasmtime/Cargo.toml
@@ -89,3 +89,5 @@ all-arch = ["wasmtime-cranelift/all-arch"]
 # It is useful for applications that do not bind their own exception ports and
 # need portable signal handling.
 posix-signals-on-macos = ["wasmtime-runtime/posix-signals-on-macos"]
+
+memfd-allocator = ["wasmtime-runtime/memfd-allocator", "pooling-allocator"]
\ No newline at end of file
diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs
index 8a419c5170..48420ff492 100644
--- a/crates/wasmtime/src/engine.rs
+++ b/crates/wasmtime/src/engine.rs
@@ -7,7 +7,7 @@ use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::Arc;
 #[cfg(feature = "cache")]
 use wasmtime_cache::CacheConfig;
-use wasmtime_runtime::{debug_builtins, InstanceAllocator};
+use wasmtime_runtime::{debug_builtins, CompiledModuleIdAllocator, InstanceAllocator};
 
 /// An `Engine` which is a global context for compilation and management of wasm
 /// modules.
@@ -43,6 +43,7 @@ struct EngineInner {
     allocator: Box<dyn InstanceAllocator>,
     signatures: SignatureRegistry,
     epoch: AtomicU64,
+    unique_id_allocator: CompiledModuleIdAllocator,
 }
 
 impl Engine {
@@ -68,6 +69,7 @@ impl Engine {
                 allocator,
                 signatures: registry,
                 epoch: AtomicU64::new(0),
+                unique_id_allocator: CompiledModuleIdAllocator::new(),
             }),
         })
     }
@@ -153,6 +155,10 @@ impl Engine {
         self.inner.epoch.fetch_add(1, Ordering::Relaxed);
     }
 
+    pub(crate) fn unique_id_allocator(&self) -> &CompiledModuleIdAllocator {
+        &self.inner.unique_id_allocator
+    }
+
     /// Ahead-of-time (AOT) compiles a WebAssembly module.
     ///
     /// The `bytes` provided must be in one of two formats:
diff --git a/crates/wasmtime/src/instance.rs b/crates/wasmtime/src/instance.rs
index aec6c1ba06..7f5b5e823d 100644
--- a/crates/wasmtime/src/instance.rs
+++ b/crates/wasmtime/src/instance.rs
@@ -651,7 +651,7 @@ impl<'a> Instantiator<'a> {
                     artifacts,
                     modules,
                     &self.cur.modules,
-                );
+                )?;
                 self.cur.modules.push(submodule);
             }
 
@@ -707,6 +707,7 @@ impl<'a> Instantiator<'a> {
                     .allocator()
                     .allocate(InstanceAllocationRequest {
                         module: compiled_module.module().clone(),
+                        memfds: self.cur.module.memfds().clone(),
                         image_base: compiled_module.code().as_ptr() as usize,
                         functions: compiled_module.functions(),
                         imports: self.cur.build(),
diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs
index 04c695f214..09c2d3f485 100644
--- a/crates/wasmtime/src/module.rs
+++ b/crates/wasmtime/src/module.rs
@@ -11,6 +11,7 @@ use std::sync::Arc;
 use wasmparser::{Parser, ValidPayload, Validator};
 use wasmtime_environ::{ModuleEnvironment, ModuleIndex, PrimaryMap};
 use wasmtime_jit::{CompiledModule, CompiledModuleInfo, MmapVec, TypeTables};
+use wasmtime_runtime::ModuleMemFds;
 
 mod registry;
 mod serialization;
@@ -107,6 +108,8 @@ struct ModuleInner {
     types: Arc<TypeTables>,
     /// Registered shared signature for the module.
     signatures: Arc<SignatureCollection>,
+    /// a set of memfd images for memories, if any.
+    memfds: Option<Arc<ModuleMemFds>>,
 }
 
 impl Module {
@@ -336,7 +339,12 @@ impl Module {
         };
 
         let modules = engine.run_maybe_parallel(artifacts, |(a, b)| {
-            CompiledModule::from_artifacts(a, b, &*engine.config().profiler)
+            CompiledModule::from_artifacts(
+                a,
+                b,
+                &*engine.config().profiler,
+                engine.unique_id_allocator(),
+            )
         })?;
 
         Self::from_parts(engine, modules, main_module, Arc::new(types), &[])
@@ -523,6 +531,8 @@ impl Module {
             })
             .collect::<Result<Vec<_>>>()?;
 
+        let memfds = ModuleMemFds::new(module.module(), module.wasm_data())?;
+
         return Ok(Self {
             inner: Arc::new(ModuleInner {
                 engine: engine.clone(),
@@ -531,6 +541,7 @@ impl Module {
                 artifact_upvars: modules,
                 module_upvars,
                 signatures,
+                memfds,
             }),
         });
 
@@ -543,11 +554,14 @@ impl Module {
             module_upvars: &[serialization::SerializedModuleUpvar],
             signatures: &Arc<SignatureCollection>,
         ) -> Result<Module> {
+            let module = artifacts[module_index].clone();
+            let memfds = ModuleMemFds::new(module.module(), module.wasm_data())?;
             Ok(Module {
                 inner: Arc::new(ModuleInner {
                     engine: engine.clone(),
                     types: types.clone(),
-                    module: artifacts[module_index].clone(),
+                    module,
+                    memfds,
                     artifact_upvars: artifact_upvars
                         .iter()
                         .map(|i| artifacts[*i].clone())
@@ -666,12 +680,15 @@ impl Module {
         artifact_upvars: &[usize],
         module_upvars: &[wasmtime_environ::ModuleUpvar],
         modules: &PrimaryMap<ModuleIndex, Module>,
-    ) -> Module {
-        Module {
+    ) -> Result<Module> {
+        let module = self.inner.artifact_upvars[artifact_index].clone();
+        let memfds = ModuleMemFds::new(module.module(), module.wasm_data())?;
+        Ok(Module {
             inner: Arc::new(ModuleInner {
                 types: self.inner.types.clone(),
                 engine: self.inner.engine.clone(),
-                module: self.inner.artifact_upvars[artifact_index].clone(),
+                module,
+                memfds,
                 artifact_upvars: artifact_upvars
                     .iter()
                     .map(|i| self.inner.artifact_upvars[*i].clone())
@@ -687,7 +704,7 @@ impl Module {
                     .collect(),
                 signatures: self.inner.signatures.clone(),
             }),
-        }
+        })
     }
 
     pub(crate) fn compiled_module(&self) -> &Arc<CompiledModule> {
@@ -706,6 +723,10 @@ impl Module {
         &self.inner.signatures
     }
 
+    pub(crate) fn memfds(&self) -> &Option<Arc<ModuleMemFds>> {
+        &self.inner.memfds
+    }
+
     /// Looks up the module upvar value at the `index` specified.
     ///
     /// Note that this panics if `index` is out of bounds since this should
diff --git a/crates/wasmtime/src/module/serialization.rs b/crates/wasmtime/src/module/serialization.rs
index 740d1eab92..cb643d795d 100644
--- a/crates/wasmtime/src/module/serialization.rs
+++ b/crates/wasmtime/src/module/serialization.rs
@@ -274,7 +274,12 @@ impl<'a> SerializedModule<'a> {
     pub fn into_module(self, engine: &Engine) -> Result<Module> {
         let (main_module, modules, types, upvars) = self.into_parts(engine)?;
         let modules = engine.run_maybe_parallel(modules, |(i, m)| {
-            CompiledModule::from_artifacts(i, m, &*engine.config().profiler)
+            CompiledModule::from_artifacts(
+                i,
+                m,
+                &*engine.config().profiler,
+                engine.unique_id_allocator(),
+            )
         })?;
 
         Module::from_parts(engine, modules, main_module, Arc::new(types), &upvars)
diff --git a/crates/wasmtime/src/store.rs b/crates/wasmtime/src/store.rs
index c6d7914e47..362fb59848 100644
--- a/crates/wasmtime/src/store.rs
+++ b/crates/wasmtime/src/store.rs
@@ -421,11 +421,13 @@ impl<T> Store<T> {
                     shared_signatures: None.into(),
                     imports: Default::default(),
                     module: Arc::new(wasmtime_environ::Module::default()),
+                    memfds: None,
                     store: StorePtr::empty(),
                     wasm_data: &[],
                 })
                 .expect("failed to allocate default callee")
         };
+
         let mut inner = Box::new(StoreInner {
             inner: StoreOpaque {
                 _marker: marker::PhantomPinned,
diff --git a/crates/wasmtime/src/trampoline.rs b/crates/wasmtime/src/trampoline.rs
index c1f8038a5a..790cbf9ef9 100644
--- a/crates/wasmtime/src/trampoline.rs
+++ b/crates/wasmtime/src/trampoline.rs
@@ -41,6 +41,7 @@ fn create_handle(
         let handle = OnDemandInstanceAllocator::new(config.mem_creator.clone(), 0).allocate(
             InstanceAllocationRequest {
                 module: Arc::new(module),
+                memfds: None,
                 functions,
                 image_base: 0,
                 imports,
diff --git a/crates/wasmtime/src/trampoline/func.rs b/crates/wasmtime/src/trampoline/func.rs
index 67d57fc334..47513f83cf 100644
--- a/crates/wasmtime/src/trampoline/func.rs
+++ b/crates/wasmtime/src/trampoline/func.rs
@@ -161,6 +161,7 @@ pub unsafe fn create_raw_function(
     Ok(
         OnDemandInstanceAllocator::default().allocate(InstanceAllocationRequest {
             module: Arc::new(module),
+            memfds: None,
             functions: &functions,
             image_base: (*func).as_ptr() as usize,
             imports: Imports::default(),
diff --git a/src/lib.rs b/src/lib.rs
index fb43affad1..b3cb8961f7 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -100,6 +100,8 @@ use std::collections::HashMap;
 use std::path::PathBuf;
 use structopt::StructOpt;
 use wasmtime::{Config, ProfilingStrategy};
+#[cfg(feature = "pooling-allocator")]
+use wasmtime::{InstanceLimits, ModuleLimits, PoolingAllocationStrategy};
 
 fn pick_profiling_strategy(jitdump: bool, vtune: bool) -> Result<ProfilingStrategy> {
     Ok(match (jitdump, vtune) {
@@ -250,6 +252,12 @@ struct CommonOptions {
     /// the data segments specified in the original wasm module.
     #[structopt(long)]
     paged_memory_initialization: bool,
+
+    /// Enables the pooling allocator, in place of the on-demand
+    /// allocator.
+    #[cfg(feature = "pooling-allocator")]
+    #[structopt(long)]
+    pooling_allocator: bool,
 }
 
 impl CommonOptions {
@@ -325,6 +333,23 @@ impl CommonOptions {
         config.generate_address_map(!self.disable_address_map);
         config.paged_memory_initialization(self.paged_memory_initialization);
 
+        #[cfg(feature = "pooling-allocator")]
+        {
+            if self.pooling_allocator {
+                let mut module_limits = ModuleLimits::default();
+                module_limits.functions = 50000;
+                module_limits.types = 10000;
+                module_limits.globals = 1000;
+                module_limits.memory_pages = 2048;
+                let instance_limits = InstanceLimits::default();
+                config.allocation_strategy(wasmtime::InstanceAllocationStrategy::Pooling {
+                    strategy: PoolingAllocationStrategy::NextAvailable,
+                    module_limits,
+                    instance_limits,
+                });
+            }
+        }
+
         Ok(config)
     }