diff --git a/crates/fuzzing/src/generators/pooling_config.rs b/crates/fuzzing/src/generators/pooling_config.rs
index f6a49a5e7b..f0cfef18bd 100644
--- a/crates/fuzzing/src/generators/pooling_config.rs
+++ b/crates/fuzzing/src/generators/pooling_config.rs
@@ -14,6 +14,9 @@ pub struct PoolingAllocationConfig {
     pub instance_table_elements: u32,
     pub instance_size: usize,
     pub async_stack_zeroing: bool,
+    pub async_stack_keep_resident: usize,
+    pub linear_memory_keep_resident: usize,
+    pub table_keep_resident: usize,
 }
 
 impl PoolingAllocationConfig {
@@ -28,7 +31,10 @@ impl PoolingAllocationConfig {
             .instance_memory_pages(self.instance_memory_pages)
             .instance_table_elements(self.instance_table_elements)
             .instance_size(self.instance_size)
-            .async_stack_zeroing(self.async_stack_zeroing);
+            .async_stack_zeroing(self.async_stack_zeroing)
+            .async_stack_keep_resident(self.async_stack_keep_resident)
+            .linear_memory_keep_resident(self.linear_memory_keep_resident)
+            .table_keep_resident(self.table_keep_resident);
         cfg
     }
 }
@@ -51,6 +57,9 @@ impl<'a> Arbitrary<'a> for PoolingAllocationConfig {
             instance_count: u.int_in_range(1..=MAX_COUNT)?,
             instance_size: u.int_in_range(0..=MAX_SIZE)?,
             async_stack_zeroing: u.arbitrary()?,
+            async_stack_keep_resident: u.int_in_range(0..=1 << 20)?,
+            linear_memory_keep_resident: u.int_in_range(0..=1 << 20)?,
+            table_keep_resident: u.int_in_range(0..=1 << 20)?,
         })
     }
 }
diff --git a/crates/runtime/src/cow.rs b/crates/runtime/src/cow.rs
index a4364e6f36..f40edc72e8 100644
--- a/crates/runtime/src/cow.rs
+++ b/crates/runtime/src/cow.rs
@@ -466,39 +466,23 @@ impl MemoryImageSlot {
         Ok(())
     }
 
+    /// Resets this linear memory slot back to a "pristine state".
+    ///
+    /// This will reset the memory back to its original contents on Linux or
+    /// reset the contents back to zero on other platforms. The `keep_resident`
+    /// argument is the maximum amount of memory to keep resident in this
+    /// process's memory on Linux. Up to that much memory will be `memset` to
+    /// zero where the rest of it will be reset or released with `madvise`.
     #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
-    pub(crate) fn clear_and_remain_ready(&mut self) -> Result<()> {
+    pub(crate) fn clear_and_remain_ready(&mut self, keep_resident: usize) -> Result<()> {
         assert!(self.dirty);
 
-        cfg_if::cfg_if! {
-            if #[cfg(target_os = "linux")] {
-                // On Linux we can use `madvise` to reset the virtual memory
-                // back to its original state. This means back to all zeros for
-                // anonymous-backed pages and back to the original contents for
-                // CoW memory (the initial heap image). This has the precise
-                // semantics we want for reuse between instances, so it's all we
-                // need to do.
-                unsafe {
-                    rustix::mm::madvise(
-                        self.base as *mut c_void,
-                        self.cur_size,
-                        rustix::mm::Advice::LinuxDontNeed,
-                    )?;
-                }
-            } else {
-                // If we're not on Linux, however, then there's no generic
-                // platform way to reset memory back to its original state, so
-                // instead this is "feigned" by resetting memory back to
-                // entirely zeros with an anonymous backing.
-                //
-                // Additionally the previous image, if any, is dropped here
-                // since it's no longer applicable to this mapping.
-                self.reset_with_anon_memory()?;
-                self.image = None;
-            }
+        unsafe {
+            self.reset_all_memory_contents(keep_resident)?;
         }
 
-        // mprotect the initial heap region beyond the initial heap size back to PROT_NONE.
+        // mprotect the initial heap region beyond the initial heap size back to
+        // PROT_NONE.
         self.set_protection(
             self.initial_size..self.cur_size,
             rustix::mm::MprotectFlags::empty(),
@@ -508,6 +492,136 @@ impl MemoryImageSlot {
         Ok(())
     }
 
+    #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
+    unsafe fn reset_all_memory_contents(&mut self, keep_resident: usize) -> Result<()> {
+        if !cfg!(target_os = "linux") {
+            // If we're not on Linux then there's no generic platform way to
+            // reset memory back to its original state, so instead reset memory
+            // back to entirely zeros with an anonymous backing.
+            //
+            // Additionally the previous image, if any, is dropped here
+            // since it's no longer applicable to this mapping.
+            return self.reset_with_anon_memory();
+        }
+
+        match &self.image {
+            Some(image) => {
+                assert!(self.cur_size >= image.linear_memory_offset + image.len);
+                if image.linear_memory_offset < keep_resident {
+                    // If the image starts below the `keep_resident` then
+                    // memory looks something like this:
+                    //
+                    //               up to `keep_resident` bytes
+                    //                          |
+                    //          +--------------------------+  remaining_memset
+                    //          |                          | /
+                    //  <-------------->                <------->
+                    //
+                    //                              image_end
+                    // 0        linear_memory_offset   |               cur_size
+                    // |                |              |                  |
+                    // +----------------+--------------+---------+--------+
+                    // |  dirty memory  |    image     |   dirty memory   |
+                    // +----------------+--------------+---------+--------+
+                    //
+                    //  <------+-------> <-----+----->  <---+---> <--+--->
+                    //         |               |            |        |
+                    //         |               |            |        |
+                    //   memset (1)            /            |   madvise (4)
+                    //                  mmadvise (2)       /
+                    //                                    /
+                    //                              memset (3)
+                    //
+                    //
+                    // In this situation there are two disjoint regions that are
+                    // `memset` manually to zero. Note that `memset (3)` may be
+                    // zero bytes large. Furthermore `madvise (4)` may also be
+                    // zero bytes large.
+
+                    let image_end = image.linear_memory_offset + image.len;
+                    let mem_after_image = self.cur_size - image_end;
+                    let remaining_memset =
+                        (keep_resident - image.linear_memory_offset).min(mem_after_image);
+
+                    // This is memset (1)
+                    std::ptr::write_bytes(self.base as *mut u8, 0u8, image.linear_memory_offset);
+
+                    // This is madvise (2)
+                    self.madvise_reset(image.linear_memory_offset, image.len)?;
+
+                    // This is memset (3)
+                    std::ptr::write_bytes(
+                        (self.base + image_end) as *mut u8,
+                        0u8,
+                        remaining_memset,
+                    );
+
+                    // This is madvise (4)
+                    self.madvise_reset(
+                        image_end + remaining_memset,
+                        mem_after_image - remaining_memset,
+                    )?;
+                } else {
+                    // If the image starts after the `keep_resident` threshold
+                    // then we memset the start of linear memory and then use
+                    // madvise below for the rest of it, including the image.
+                    //
+                    // 0             keep_resident                     cur_size
+                    // |                |                                 |
+                    // +----------------+---+----------+------------------+
+                    // |  dirty memory      |  image   |   dirty memory   |
+                    // +----------------+---+----------+------------------+
+                    //
+                    //  <------+-------> <-------------+----------------->
+                    //         |                       |
+                    //         |                       |
+                    //   memset (1)                 madvise (2)
+                    //
+                    // Here only a single memset is necessary since the image
+                    // started after the threshold which we're keeping resident.
+                    // Note that the memset may be zero bytes here.
+
+                    // This is memset (1)
+                    std::ptr::write_bytes(self.base as *mut u8, 0u8, keep_resident);
+
+                    // This is madvise (2)
+                    self.madvise_reset(keep_resident, self.cur_size - keep_resident)?;
+                }
+            }
+
+            // If there's no memory image for this slot then memset the first
+            // bytes in the memory back to zero while using `madvise` to purge
+            // the rest.
+            None => {
+                let size_to_memset = keep_resident.min(self.cur_size);
+                std::ptr::write_bytes(self.base as *mut u8, 0u8, size_to_memset);
+                self.madvise_reset(size_to_memset, self.cur_size - size_to_memset)?;
+            }
+        }
+
+        Ok(())
+    }
+
+    #[allow(dead_code)] // ignore warnings as this is only used in some cfgs
+    unsafe fn madvise_reset(&self, base: usize, len: usize) -> Result<()> {
+        assert!(base + len <= self.cur_size);
+        if len == 0 {
+            return Ok(());
+        }
+        cfg_if::cfg_if! {
+            if #[cfg(target_os = "linux")] {
+                rustix::mm::madvise(
+                    (self.base + base) as *mut c_void,
+                    len,
+                    rustix::mm::Advice::LinuxDontNeed,
+                )?;
+                Ok(())
+            } else {
+                unreachable!();
+            }
+        }
+    }
+
     fn set_protection(&self, range: Range<usize>, flags: rustix::mm::MprotectFlags) -> Result<()> {
         assert!(range.start <= range.end);
         assert!(range.end <= self.static_size);
@@ -532,7 +646,7 @@ impl MemoryImageSlot {
 
     /// Map anonymous zeroed memory across the whole slot,
     /// inaccessible. Used both during instantiate and during drop.
-    fn reset_with_anon_memory(&self) -> Result<()> {
+    fn reset_with_anon_memory(&mut self) -> Result<()> {
         unsafe {
             let ptr = rustix::mm::mmap_anonymous(
                 self.base as *mut c_void,
@@ -542,6 +656,11 @@ impl MemoryImageSlot {
             )?;
             assert_eq!(ptr as usize, self.base);
         }
+
+        self.image = None;
+        self.cur_size = 0;
+        self.initial_size = 0;
+
         Ok(())
     }
 }
@@ -638,7 +757,7 @@ mod test {
         assert_eq!(0, slice[131071]);
         // instantiate again; we should see zeroes, even as the
         // reuse-anon-mmap-opt kicks in
-        memfd.clear_and_remain_ready().unwrap();
+        memfd.clear_and_remain_ready(0).unwrap();
         assert!(!memfd.is_dirty());
         memfd.instantiate(64 << 10, None).unwrap();
         let slice = mmap.as_slice();
@@ -661,33 +780,69 @@ mod test {
         assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
         slice[4096] = 5;
         // Clear and re-instantiate same image
-        memfd.clear_and_remain_ready().unwrap();
+        memfd.clear_and_remain_ready(0).unwrap();
         memfd.instantiate(64 << 10, Some(&image)).unwrap();
         let slice = mmap.as_slice();
         // Should not see mutation from above
         assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
         // Clear and re-instantiate no image
-        memfd.clear_and_remain_ready().unwrap();
+        memfd.clear_and_remain_ready(0).unwrap();
         memfd.instantiate(64 << 10, None).unwrap();
         assert!(!memfd.has_image());
         let slice = mmap.as_slice();
         assert_eq!(&[0, 0, 0, 0], &slice[4096..4100]);
         // Clear and re-instantiate image again
-        memfd.clear_and_remain_ready().unwrap();
+        memfd.clear_and_remain_ready(0).unwrap();
         memfd.instantiate(64 << 10, Some(&image)).unwrap();
         let slice = mmap.as_slice();
         assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
         // Create another image with different data.
         let image2 = Arc::new(create_memfd_with_data(4096, &[10, 11, 12, 13]).unwrap());
-        memfd.clear_and_remain_ready().unwrap();
+        memfd.clear_and_remain_ready(0).unwrap();
         memfd.instantiate(128 << 10, Some(&image2)).unwrap();
         let slice = mmap.as_slice();
         assert_eq!(&[10, 11, 12, 13], &slice[4096..4100]);
         // Instantiate the original image again; we should notice it's
         // a different image and not reuse the mappings.
-        memfd.clear_and_remain_ready().unwrap();
+        memfd.clear_and_remain_ready(0).unwrap();
         memfd.instantiate(64 << 10, Some(&image)).unwrap();
         let slice = mmap.as_slice();
         assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
     }
+
+    #[test]
+    #[cfg(target_os = "linux")]
+    fn memset_instead_of_madvise() {
+        let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap();
+        let mut memfd = MemoryImageSlot::create(mmap.as_mut_ptr() as *mut _, 0, 4 << 20);
+        memfd.no_clear_on_drop();
+
+        // Test basics with the image
+        for image_off in [0, 4096, 8 << 10] {
+            let image = Arc::new(create_memfd_with_data(image_off, &[1, 2, 3, 4]).unwrap());
+            for amt_to_memset in [0, 4096, 10 << 12, 1 << 20, 10 << 20] {
+                memfd.instantiate(64 << 10, Some(&image)).unwrap();
+                assert!(memfd.has_image());
+                let slice = mmap.as_mut_slice();
+                if image_off > 0 {
+                    assert_eq!(slice[image_off - 1], 0);
+                }
+                assert_eq!(slice[image_off + 5], 0);
+                assert_eq!(&[1, 2, 3, 4], &slice[image_off..][..4]);
+                slice[image_off] = 5;
+                assert_eq!(&[5, 2, 3, 4], &slice[image_off..][..4]);
+                memfd.clear_and_remain_ready(amt_to_memset).unwrap();
+            }
+        }
+
+        // Test without an image
+        for amt_to_memset in [0, 4096, 10 << 12, 1 << 20, 10 << 20] {
+            memfd.instantiate(64 << 10, None).unwrap();
+            for chunk in mmap.as_mut_slice()[..64 << 10].chunks_mut(1024) {
+                assert_eq!(chunk[0], 0);
+                chunk[0] = 5;
+            }
+            memfd.clear_and_remain_ready(amt_to_memset).unwrap();
+        }
+    }
 }
diff --git a/crates/runtime/src/cow_disabled.rs b/crates/runtime/src/cow_disabled.rs
index be06a9f4a1..63a92bd0ce 100644
--- a/crates/runtime/src/cow_disabled.rs
+++ b/crates/runtime/src/cow_disabled.rs
@@ -57,7 +57,7 @@ impl MemoryImageSlot {
         unreachable!();
     }
 
-    pub(crate) fn clear_and_remain_ready(&mut self) -> Result<()> {
+    pub(crate) fn clear_and_remain_ready(&mut self, _keep_resident: usize) -> Result<()> {
         unreachable!();
     }
 
diff --git a/crates/runtime/src/instance/allocator/pooling.rs b/crates/runtime/src/instance/allocator/pooling.rs
index 424708d5b0..d938cd295f 100644
--- a/crates/runtime/src/instance/allocator/pooling.rs
+++ b/crates/runtime/src/instance/allocator/pooling.rs
@@ -126,6 +126,8 @@ struct InstancePool {
     index_allocator: Mutex<PoolingAllocationState>,
     memories: MemoryPool,
     tables: TablePool,
+    linear_memory_keep_resident: usize,
+    table_keep_resident: usize,
 }
 
 impl InstancePool {
@@ -156,6 +158,8 @@ impl InstancePool {
             )),
             memories: MemoryPool::new(&config.limits, tunables)?,
             tables: TablePool::new(&config.limits)?,
+            linear_memory_keep_resident: config.linear_memory_keep_resident,
+            table_keep_resident: config.table_keep_resident,
         };
 
         Ok(pool)
@@ -373,7 +377,10 @@ impl InstancePool {
                 // image, just drop it here, and let the drop handler for the
                 // slot unmap in a way that retains the address space
                 // reservation.
-                if image.clear_and_remain_ready().is_ok() {
+                if image
+                    .clear_and_remain_ready(self.linear_memory_keep_resident)
+                    .is_ok()
+                {
                     self.memories
                         .return_memory_image_slot(instance_index, def_mem_idx, image);
                 }
@@ -437,10 +444,20 @@ impl InstancePool {
             );
 
             drop(table);
-            decommit_table_pages(base, size).expect("failed to decommit table pages");
+            self.reset_table_pages_to_zero(base, size)
+                .expect("failed to decommit table pages");
         }
     }
 
+    fn reset_table_pages_to_zero(&self, base: *mut u8, size: usize) -> Result<()> {
+        let size_to_memset = size.min(self.table_keep_resident);
+        unsafe {
+            std::ptr::write_bytes(base, 0, size_to_memset);
+            decommit_table_pages(base.add(size_to_memset), size - size_to_memset)?;
+        }
+        Ok(())
+    }
+
     fn validate_table_plans(&self, module: &Module) -> Result<()> {
         let tables = module.table_plans.len() - module.num_imported_tables;
         if tables > self.tables.max_tables {
@@ -807,6 +824,7 @@ struct StackPool {
     page_size: usize,
     index_allocator: Mutex<PoolingAllocationState>,
     async_stack_zeroing: bool,
+    async_stack_keep_resident: usize,
 }
 
 #[cfg(all(feature = "async", unix))]
@@ -852,6 +870,7 @@ impl StackPool {
             max_instances,
             page_size,
             async_stack_zeroing: config.async_stack_zeroing,
+            async_stack_keep_resident: config.async_stack_keep_resident,
             // We always use a `NextAvailable` strategy for stack
             // allocation. We don't want or need an affinity policy
             // here: stacks do not benefit from being allocated to the
@@ -919,11 +938,32 @@ impl StackPool {
         assert!(index < self.max_instances);
 
         if self.async_stack_zeroing {
-            reset_stack_pages_to_zero(bottom_of_stack as _, stack_size).unwrap();
+            self.zero_stack(bottom_of_stack, stack_size);
         }
 
         self.index_allocator.lock().unwrap().free(SlotId(index));
     }
+
+    fn zero_stack(&self, bottom: usize, size: usize) {
+        // Manually zero the top of the stack to keep the pages resident in
+        // memory and avoid future page faults. Use the system to deallocate
+        // pages past this. This hopefully strikes a reasonable balance between:
+        //
+        // * memset for the whole range is probably expensive
+        // * madvise for the whole range incurs expensive future page faults
+        // * most threads probably don't use most of the stack anyway
+        let size_to_memset = size.min(self.async_stack_keep_resident);
+        unsafe {
+            std::ptr::write_bytes(
+                (bottom + size - size_to_memset) as *mut u8,
+                0,
+                size_to_memset,
+            );
+        }
+
+        // Use the system to reset remaining stack pages to zero.
+        reset_stack_pages_to_zero(bottom as _, size - size_to_memset).unwrap();
+    }
 }
 
 /// Configuration options for the pooling instance allocator supplied at
@@ -940,6 +980,22 @@ pub struct PoolingInstanceAllocatorConfig {
     pub limits: InstanceLimits,
     /// Whether or not async stacks are zeroed after use.
     pub async_stack_zeroing: bool,
+    /// If async stack zeroing is enabled and the host platform is Linux this is
+    /// how much memory to zero out with `memset`.
+    ///
+    /// The rest of memory will be zeroed out with `madvise`.
+    pub async_stack_keep_resident: usize,
+    /// How much linear memory, in bytes, to keep resident after resetting for
+    /// use with the next instance. This much memory will be `memset` to zero
+    /// when a linear memory is deallocated.
+    ///
+    /// Memory exceeding this amount in the wasm linear memory will be released
+    /// with `madvise` back to the kernel.
+    ///
+    /// Only applicable on Linux.
+    pub linear_memory_keep_resident: usize,
+    /// Same as `linear_memory_keep_resident` but for tables.
+    pub table_keep_resident: usize,
 }
 
 impl Default for PoolingInstanceAllocatorConfig {
@@ -949,6 +1005,9 @@ impl Default for PoolingInstanceAllocatorConfig {
             stack_size: 2 << 20,
             limits: InstanceLimits::default(),
             async_stack_zeroing: false,
+            async_stack_keep_resident: 0,
+            linear_memory_keep_resident: 0,
+            table_keep_resident: 0,
         }
     }
 }
diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs
index 836f942c96..7b51361c60 100644
--- a/crates/wasmtime/src/config.rs
+++ b/crates/wasmtime/src/config.rs
@@ -1664,14 +1664,11 @@ pub enum WasmBacktraceDetails {
     Environment,
 }
 
-/// Global configuration options used to create an [`Engine`](crate::Engine)
-/// and customize its behavior.
+/// Configuration options used with [`InstanceAllocationStrategy::Pooling`] to
+/// change the behavior of the pooling instance allocator.
 ///
-/// This structure exposed a builder-like interface and is primarily consumed by
-/// [`Engine::new()`](crate::Engine::new).
-///
-/// The validation of `Config` is deferred until the engine is being built, thus
-/// a problematic config may cause `Engine::new` to fail.
+/// This structure has a builder-style API in the same manner as [`Config`] and
+/// is configured with [`Config::allocation_strategy`].
 #[cfg(feature = "pooling-allocator")]
 #[derive(Debug, Clone, Default)]
 pub struct PoolingAllocationConfig {
@@ -1703,11 +1700,8 @@ impl PoolingAllocationConfig {
     /// Wasmtime and the [`call_async`] variant
     /// of calling WebAssembly is used then Wasmtime will create a separate
     /// runtime execution stack for each future produced by [`call_async`].
-    /// When using the pooling instance allocator
-    /// ([`InstanceAllocationStrategy::Pooling`]) this allocation will happen
-    /// from a pool of stacks and additionally deallocation will simply release
-    /// the stack back to the pool. During the deallocation process Wasmtime
-    /// won't by default reset the contents of the stack back to zero.
+    /// During the deallocation process Wasmtime won't by default reset the
+    /// contents of the stack back to zero.
     ///
     /// When this option is enabled it can be seen as a defense-in-depth
     /// mechanism to reset a stack back to zero. This is not required for
@@ -1725,6 +1719,57 @@ impl PoolingAllocationConfig {
         self
     }
 
+    /// How much memory, in bytes, to keep resident for async stacks allocated
+    /// with the pooling allocator.
+    ///
+    /// When [`PoolingAllocationConfig::async_stack_zeroing`] is enabled then
+    /// Wasmtime will reset the contents of async stacks back to zero upon
+    /// deallocation. This option can be used to perform the zeroing operation
+    /// with `memset` up to a certain threshold of bytes instead of using system
+    /// calls to reset the stack to zero.
+    ///
+    /// Note that when using this option the memory with async stacks will
+    /// never be decommitted.
+    #[cfg(feature = "async")]
+    #[cfg_attr(nightlydoc, doc(cfg(feature = "async")))]
+    pub fn async_stack_keep_resident(&mut self, size: usize) -> &mut Self {
+        let size = round_up_to_pages(size as u64) as usize;
+        self.config.async_stack_keep_resident = size;
+        self
+    }
+
+    /// How much memory, in bytes, to keep resident for each linear memory
+    /// after deallocation.
+    ///
+    /// This option is only applicable on Linux and has no effect on other
+    /// platforms.
+    ///
+    /// By default Wasmtime will use `madvise` to reset the entire contents of
+    /// linear memory back to zero when a linear memory is deallocated. This
+    /// option can be used to use `memset` instead to set memory back to zero
+    /// which can, in some configurations, reduce the number of page faults
+    /// taken when a slot is reused.
+    pub fn linear_memory_keep_resident(&mut self, size: usize) -> &mut Self {
+        let size = round_up_to_pages(size as u64) as usize;
+        self.config.linear_memory_keep_resident = size;
+        self
+    }
+
+    /// How much memory, in bytes, to keep resident for each table after
+    /// deallocation.
+    ///
+    /// This option is only applicable on Linux and has no effect on other
+    /// platforms.
+    ///
+    /// This option is the same as
+    /// [`PoolingAllocationConfig::linear_memory_keep_resident`] except that it
+    /// is applicable to tables instead.
+    pub fn table_keep_resident(&mut self, size: usize) -> &mut Self {
+        let size = round_up_to_pages(size as u64) as usize;
+        self.config.table_keep_resident = size;
+        self
+    }
+
     /// The maximum number of concurrent instances supported (default is 1000).
     ///
     /// This value has a direct impact on the amount of memory allocated by the pooling