diff --git a/crates/fuzzing/src/generators/pooling_config.rs b/crates/fuzzing/src/generators/pooling_config.rs
index f0cfef18bd..f670746dce 100644
--- a/crates/fuzzing/src/generators/pooling_config.rs
+++ b/crates/fuzzing/src/generators/pooling_config.rs
@@ -6,7 +6,7 @@ use arbitrary::{Arbitrary, Unstructured};
 #[derive(Debug, Clone, Eq, PartialEq, Hash)]
 #[allow(missing_docs)]
 pub struct PoolingAllocationConfig {
-    pub strategy: PoolingAllocationStrategy,
+    pub max_unused_warm_slots: u32,
     pub instance_count: u32,
     pub instance_memories: u32,
     pub instance_tables: u32,
@@ -24,7 +24,7 @@ impl PoolingAllocationConfig {
     pub fn to_wasmtime(&self) -> wasmtime::PoolingAllocationConfig {
         let mut cfg = wasmtime::PoolingAllocationConfig::default();
 
-        cfg.strategy(self.strategy.to_wasmtime())
+        cfg.max_unused_warm_slots(self.max_unused_warm_slots)
             .instance_count(self.instance_count)
             .instance_memories(self.instance_memories)
             .instance_tables(self.instance_tables)
@@ -48,13 +48,15 @@ impl<'a> Arbitrary<'a> for PoolingAllocationConfig {
         const MAX_MEMORY_PAGES: u64 = 160; // 10 MiB
         const MAX_SIZE: usize = 1 << 20; // 1 MiB
 
+        let instance_count = u.int_in_range(1..=MAX_COUNT)?;
+
         Ok(Self {
-            strategy: u.arbitrary()?,
+            max_unused_warm_slots: u.int_in_range(0..=instance_count + 10)?,
             instance_tables: u.int_in_range(0..=MAX_TABLES)?,
             instance_memories: u.int_in_range(0..=MAX_MEMORIES)?,
             instance_table_elements: u.int_in_range(0..=MAX_ELEMENTS)?,
             instance_memory_pages: u.int_in_range(0..=MAX_MEMORY_PAGES)?,
-            instance_count: u.int_in_range(1..=MAX_COUNT)?,
+            instance_count,
             instance_size: u.int_in_range(0..=MAX_SIZE)?,
             async_stack_zeroing: u.arbitrary()?,
             async_stack_keep_resident: u.int_in_range(0..=1 << 20)?,
@@ -63,28 +65,3 @@ impl<'a> Arbitrary<'a> for PoolingAllocationConfig {
         })
     }
 }
-
-/// Configuration for `wasmtime::PoolingAllocationStrategy`.
-#[derive(Arbitrary, Clone, Debug, PartialEq, Eq, Hash)]
-pub enum PoolingAllocationStrategy {
-    /// Use next available instance slot.
-    NextAvailable,
-    /// Use random instance slot.
-    Random,
-    /// Use an affinity-based strategy.
-    ReuseAffinity,
-}
-
-impl PoolingAllocationStrategy {
-    fn to_wasmtime(&self) -> wasmtime::PoolingAllocationStrategy {
-        match self {
-            PoolingAllocationStrategy::NextAvailable => {
-                wasmtime::PoolingAllocationStrategy::NextAvailable
-            }
-            PoolingAllocationStrategy::Random => wasmtime::PoolingAllocationStrategy::Random,
-            PoolingAllocationStrategy::ReuseAffinity => {
-                wasmtime::PoolingAllocationStrategy::ReuseAffinity
-            }
-        }
-    }
-}
diff --git a/crates/runtime/src/cow.rs b/crates/runtime/src/cow.rs
index 8f9ded83f0..12f8093271 100644
--- a/crates/runtime/src/cow.rs
+++ b/crates/runtime/src/cow.rs
@@ -305,7 +305,7 @@ impl ModuleMemoryImages {
 /// middle of it. Pictorially this data structure manages a virtual memory
 /// region that looks like:
 ///
-/// ```ignore
+/// ```text
 ///   +--------------------+-------------------+--------------+--------------+
 ///   |   anonymous        |      optional     |   anonymous  |    PROT_NONE |
 ///   |     zero           |       memory      |     zero     |     memory   |
@@ -333,7 +333,7 @@ impl ModuleMemoryImages {
 /// `accessible` limits are. Initially there is assumed to be no image in linear
 /// memory.
 ///
-/// When [`MemoryImageSlot::instantiate`] is called then the method will perform
+/// When `MemoryImageSlot::instantiate` is called then the method will perform
 /// a "synchronization" to take the image from its prior state to the new state
 /// for the image specified. The first instantiation for example will mmap the
 /// heap image into place. Upon reuse of a slot nothing happens except possibly
@@ -343,7 +343,7 @@ impl ModuleMemoryImages {
 /// A `MemoryImageSlot` is either `dirty` or it isn't. When a `MemoryImageSlot`
 /// is dirty then it is assumed that any memory beneath `self.accessible` could
 /// have any value. Instantiation cannot happen into a `dirty` slot, however, so
-/// the [`MemoryImageSlot::clear_and_remain_ready`] returns this memory back to
+/// the `MemoryImageSlot::clear_and_remain_ready` returns this memory back to
 /// its original state to mark `dirty = false`. This is done by resetting all
 /// anonymous memory back to zero and the image itself back to its initial
 /// contents.
diff --git a/crates/runtime/src/instance/allocator.rs b/crates/runtime/src/instance/allocator.rs
index e8f8e75f0d..f58dd3810e 100644
--- a/crates/runtime/src/instance/allocator.rs
+++ b/crates/runtime/src/instance/allocator.rs
@@ -19,10 +19,7 @@ use wasmtime_environ::{
 mod pooling;
 
 #[cfg(feature = "pooling-allocator")]
-pub use self::pooling::{
-    InstanceLimits, PoolingAllocationStrategy, PoolingInstanceAllocator,
-    PoolingInstanceAllocatorConfig,
-};
+pub use self::pooling::{InstanceLimits, PoolingInstanceAllocator, PoolingInstanceAllocatorConfig};
 
 /// Represents a request for a new runtime instance.
 pub struct InstanceAllocationRequest<'a> {
diff --git a/crates/runtime/src/instance/allocator/pooling.rs b/crates/runtime/src/instance/allocator/pooling.rs
index efd5b57fc5..b1dd75f58f 100644
--- a/crates/runtime/src/instance/allocator/pooling.rs
+++ b/crates/runtime/src/instance/allocator/pooling.rs
@@ -83,25 +83,6 @@ impl Default for InstanceLimits {
     }
 }
 
-/// The allocation strategy to use for the pooling instance allocator.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum PoolingAllocationStrategy {
-    /// Allocate from the next available instance.
-    NextAvailable,
-    /// Allocate from a random available instance.
-    Random,
-    /// Try to allocate an instance slot that was previously used for
-    /// the same module, potentially enabling faster instantiation by
-    /// reusing e.g. memory mappings.
-    ReuseAffinity,
-}
-
-impl Default for PoolingAllocationStrategy {
-    fn default() -> Self {
-        Self::ReuseAffinity
-    }
-}
-
 /// Represents a pool of maximal `Instance` structures.
 ///
 /// Each index in the pool provides enough space for a maximal `Instance`
@@ -142,7 +123,7 @@ impl InstancePool {
             mapping,
             instance_size,
             max_instances,
-            index_allocator: IndexAllocator::new(config.strategy, max_instances),
+            index_allocator: IndexAllocator::new(config.limits.count, config.max_unused_warm_slots),
             memories: MemoryPool::new(&config.limits, tunables)?,
             tables: TablePool::new(&config.limits)?,
             linear_memory_keep_resident: config.linear_memory_keep_resident,
@@ -248,7 +229,7 @@ impl InstancePool {
         // touched again until we write a fresh Instance in-place with
         // std::ptr::write in allocate() above.
 
-        self.index_allocator.free(SlotId(index));
+        self.index_allocator.free(SlotId(index as u32));
     }
 
     fn allocate_instance_resources(
@@ -546,7 +527,7 @@ impl InstancePool {
         // any sort of infinite loop since this should be the final operation
         // working with `module`.
         while let Some(index) = self.index_allocator.alloc_affine_and_clear_affinity(module) {
-            self.memories.clear_images(index.0);
+            self.memories.clear_images(index.index());
             self.index_allocator.free(index);
         }
     }
@@ -892,15 +873,10 @@ impl StackPool {
             page_size,
             async_stack_zeroing: config.async_stack_zeroing,
             async_stack_keep_resident: config.async_stack_keep_resident,
-            // We always use a `NextAvailable` strategy for stack
-            // allocation. We don't want or need an affinity policy
-            // here: stacks do not benefit from being allocated to the
-            // same compiled module with the same image (they always
-            // start zeroed just the same for everyone).
-            index_allocator: IndexAllocator::new(
-                PoolingAllocationStrategy::NextAvailable,
-                max_instances,
-            ),
+            // Note that `max_unused_warm_slots` is set to zero since stacks
+            // have no affinity so there's no need to keep intentionally unused
+            // warm slots around.
+            index_allocator: IndexAllocator::new(config.limits.count, 0),
         })
     }
 
@@ -965,7 +941,7 @@ impl StackPool {
             self.zero_stack(bottom_of_stack, stack_size);
         }
 
-        self.index_allocator.free(SlotId(index));
+        self.index_allocator.free(SlotId(index as u32));
     }
 
     fn zero_stack(&self, bottom: usize, size: usize) {
@@ -994,9 +970,8 @@ impl StackPool {
 /// construction.
 #[derive(Copy, Clone, Debug)]
 pub struct PoolingInstanceAllocatorConfig {
-    /// Allocation strategy to use for slot indexes in the pooling instance
-    /// allocator.
-    pub strategy: PoolingAllocationStrategy,
+    /// See `PoolingAllocatorConfig::max_unused_warm_slots` in `wasmtime`
+    pub max_unused_warm_slots: u32,
     /// The size, in bytes, of async stacks to allocate (not including the guard
     /// page).
     pub stack_size: usize,
@@ -1025,7 +1000,7 @@ pub struct PoolingInstanceAllocatorConfig {
 impl Default for PoolingInstanceAllocatorConfig {
     fn default() -> PoolingInstanceAllocatorConfig {
         PoolingInstanceAllocatorConfig {
-            strategy: Default::default(),
+            max_unused_warm_slots: 100,
             stack_size: 2 << 20,
             limits: InstanceLimits::default(),
             async_stack_zeroing: false,
@@ -1177,7 +1152,7 @@ mod test {
     #[test]
     fn test_instance_pool() -> Result<()> {
         let mut config = PoolingInstanceAllocatorConfig::default();
-        config.strategy = PoolingAllocationStrategy::NextAvailable;
+        config.max_unused_warm_slots = 0;
         config.limits = InstanceLimits {
             count: 3,
             tables: 1,
@@ -1199,10 +1174,7 @@ mod test {
         assert_eq!(instances.instance_size, 1008); // round 1000 up to alignment
         assert_eq!(instances.max_instances, 3);
 
-        assert_eq!(
-            instances.index_allocator.testing_freelist(),
-            [SlotId(0), SlotId(1), SlotId(2)]
-        );
+        assert_eq!(instances.index_allocator.testing_freelist(), []);
 
         let mut handles = Vec::new();
         let module = Arc::new(Module::default());
@@ -1248,7 +1220,7 @@ mod test {
 
         assert_eq!(
             instances.index_allocator.testing_freelist(),
-            [SlotId(2), SlotId(1), SlotId(0)]
+            [SlotId(0), SlotId(1), SlotId(2)]
         );
 
         Ok(())
@@ -1353,26 +1325,12 @@ mod test {
         assert_eq!(pool.max_instances, 10);
         assert_eq!(pool.page_size, native_page_size);
 
-        assert_eq!(
-            pool.index_allocator.testing_freelist(),
-            [
-                SlotId(0),
-                SlotId(1),
-                SlotId(2),
-                SlotId(3),
-                SlotId(4),
-                SlotId(5),
-                SlotId(6),
-                SlotId(7),
-                SlotId(8),
-                SlotId(9)
-            ],
-        );
+        assert_eq!(pool.index_allocator.testing_freelist(), []);
 
         let base = pool.mapping.as_ptr() as usize;
 
         let mut stacks = Vec::new();
-        for i in (0..10).rev() {
+        for i in 0..10 {
             let stack = pool.allocate().expect("allocation should succeed");
             assert_eq!(
                 ((stack.top().unwrap() as usize - base) / pool.stack_size) - 1,
@@ -1392,16 +1350,16 @@ mod test {
         assert_eq!(
             pool.index_allocator.testing_freelist(),
             [
-                SlotId(9),
-                SlotId(8),
-                SlotId(7),
-                SlotId(6),
-                SlotId(5),
-                SlotId(4),
-                SlotId(3),
-                SlotId(2),
+                SlotId(0),
                 SlotId(1),
-                SlotId(0)
+                SlotId(2),
+                SlotId(3),
+                SlotId(4),
+                SlotId(5),
+                SlotId(6),
+                SlotId(7),
+                SlotId(8),
+                SlotId(9)
             ],
         );
 
@@ -1475,7 +1433,7 @@ mod test {
     #[test]
     fn test_stack_zeroed() -> Result<()> {
         let config = PoolingInstanceAllocatorConfig {
-            strategy: PoolingAllocationStrategy::NextAvailable,
+            max_unused_warm_slots: 0,
             limits: InstanceLimits {
                 count: 1,
                 table_elements: 0,
@@ -1511,7 +1469,7 @@ mod test {
     #[test]
     fn test_stack_unzeroed() -> Result<()> {
         let config = PoolingInstanceAllocatorConfig {
-            strategy: PoolingAllocationStrategy::NextAvailable,
+            max_unused_warm_slots: 0,
             limits: InstanceLimits {
                 count: 1,
                 table_elements: 0,
diff --git a/crates/runtime/src/instance/allocator/pooling/index_allocator.rs b/crates/runtime/src/instance/allocator/pooling/index_allocator.rs
index 09c46839bd..b68e294560 100644
--- a/crates/runtime/src/instance/allocator/pooling/index_allocator.rs
+++ b/crates/runtime/src/instance/allocator/pooling/index_allocator.rs
@@ -1,40 +1,18 @@
 //! Index/slot allocator policies for the pooling allocator.
 
-use super::PoolingAllocationStrategy;
 use crate::CompiledModuleId;
-use rand::rngs::SmallRng;
-use rand::{Rng, SeedableRng};
-use std::collections::HashMap;
+use std::collections::hash_map::{Entry, HashMap};
+use std::mem;
 use std::sync::Mutex;
 
 /// A slot index. The job of this allocator is to hand out these
 /// indices.
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub struct SlotId(pub usize);
+#[derive(Hash, Clone, Copy, Debug, PartialEq, Eq)]
+pub struct SlotId(pub u32);
 impl SlotId {
     /// The index of this slot.
     pub fn index(self) -> usize {
-        self.0
-    }
-}
-
-/// An index in the global freelist.
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub struct GlobalFreeListIndex(usize);
-impl GlobalFreeListIndex {
-    /// The index of this slot.
-    fn index(self) -> usize {
-        self.0
-    }
-}
-
-/// An index in a per-module freelist.
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub struct PerModuleFreeListIndex(usize);
-impl PerModuleFreeListIndex {
-    /// The index of this slot.
-    fn index(self) -> usize {
-        self.0
+        self.0 as usize
     }
 }
 
@@ -43,153 +21,93 @@ pub struct IndexAllocator(Mutex<Inner>);
 
 #[derive(Debug)]
 struct Inner {
-    strategy: PoolingAllocationStrategy,
-    rng: SmallRng,
-
-    /// Free-list of all slots.
+    /// Maximum  number of "unused warm slots" which will be allowed during
+    /// allocation.
     ///
-    /// We use this to pick a victim when we don't have an appropriate slot with
-    /// the preferred affinity.
-    free_list: Vec<SlotId>,
+    /// This is a user-configurable knob which can be used to influence the
+    /// maximum number of unused slots at any one point in time. A "warm slot"
+    /// is one that's considered having been previously allocated.
+    max_unused_warm_slots: u32,
 
-    /// Affine slot management which tracks which slots are free and were last
-    /// used with the specified `CompiledModuleId`.
+    /// Current count of "warm slots", or those that were previously allocated
+    /// which are now no longer in use.
     ///
-    /// Invariant: any module ID in this hashmap must have a non-empty list of
-    /// free slots (otherwise we remove it). We remove a module's freelist when
-    /// we have no more slots with affinity for that module.
-    per_module: HashMap<CompiledModuleId, Vec<SlotId>>,
+    /// This is the size of the `warm` list.
+    unused_warm_slots: u32,
+
+    /// A linked list (via indices) which enumerates all "warm and unused"
+    /// slots, or those which have previously been allocated and then free'd.
+    warm: List,
+
+    /// Last slot that was allocated for the first time ever.
+    ///
+    /// This is initially 0 and is incremented during `pick_cold`. If this
+    /// matches `max_cold`, there are no more cold slots left.
+    last_cold: u32,
 
     /// The state of any given slot.
     ///
     /// Records indices in the above list (empty) or two lists (with affinity),
     /// and these indices are kept up-to-date to allow fast removal.
     slot_state: Vec<SlotState>,
+
+    /// Affine slot management which tracks which slots are free and were last
+    /// used with the specified `CompiledModuleId`.
+    ///
+    /// The `List` here is appended to during deallocation and removal happens
+    /// from the tail during allocation.
+    module_affine: HashMap<CompiledModuleId, List>,
+}
+
+/// A helper "linked list" data structure which is based on indices.
+#[derive(Default, Debug)]
+struct List {
+    head: Option<SlotId>,
+    tail: Option<SlotId>,
+}
+
+/// A helper data structure for an intrusive linked list, coupled with the
+/// `List` type.
+#[derive(Default, Debug, Copy, Clone)]
+struct Link {
+    prev: Option<SlotId>,
+    next: Option<SlotId>,
 }
 
 #[derive(Clone, Debug)]
-pub(crate) enum SlotState {
-    /// Currently allocated.
+enum SlotState {
+    /// This slot is currently in use and is affine to the specified module.
+    Used(Option<CompiledModuleId>),
+
+    /// This slot is not currently used, and has never been used.
+    UnusedCold,
+
+    /// This slot is not currently used, but was previously allocated.
     ///
-    /// Invariant: no slot in this state has its index in either
-    /// `free_list` or any list in `per_module`.
-    Taken(Option<CompiledModuleId>),
-    /// Currently free. A free slot is able to be allocated for any
-    /// request, but may have affinity to a certain module that we
-    /// prefer to use it for.
-    ///
-    /// Invariant: every slot in this state has its index in at least
-    /// `free_list`, and possibly a `per_module` free-list; see
-    /// FreeSlotState.
-    Free(FreeSlotState),
+    /// The payload here is metadata about the lists that this slot is contained
+    /// within.
+    UnusedWarm(Unused),
 }
 
 impl SlotState {
-    fn unwrap_free(&self) -> &FreeSlotState {
+    fn unwrap_unused(&mut self) -> &mut Unused {
         match self {
-            &Self::Free(ref free) => free,
-            _ => panic!("Slot not free"),
-        }
-    }
-
-    fn unwrap_free_mut(&mut self) -> &mut FreeSlotState {
-        match self {
-            &mut Self::Free(ref mut free) => free,
-            _ => panic!("Slot not free"),
-        }
-    }
-
-    fn unwrap_module_id(&self) -> Option<CompiledModuleId> {
-        match self {
-            &Self::Taken(module_id) => module_id,
-            _ => panic!("Slot not in Taken state"),
+            SlotState::UnusedWarm(u) => u,
+            _ => unreachable!(),
         }
     }
 }
 
-#[derive(Clone, Debug)]
-pub(crate) enum FreeSlotState {
-    /// The slot is free, and has no affinity.
-    ///
-    /// Invariant: every slot in this state has its index in
-    /// `free_list`. No slot in this state has its index in any other
-    /// (per-module) free-list.
-    NoAffinity {
-        /// Index in the global free list.
-        ///
-        /// Invariant: free_list[slot_state[i].free_list_index] == i.
-        free_list_index: GlobalFreeListIndex,
-    },
-    /// The slot is free, and has an affinity for some module. This
-    /// means we prefer to choose this slot (or some other one with
-    /// the same affinity) given a request to allocate a slot for this
-    /// module. It can, however, still be used for any other module if
-    /// needed.
-    ///
-    /// Invariant: every slot in this state has its index in both
-    /// `free_list` *and* exactly one list in `per_module`.
-    Affinity {
-        module: CompiledModuleId,
-        /// Index in the global free list.
-        ///
-        /// Invariant: free_list[slot_state[i].free_list_index] == i.
-        free_list_index: GlobalFreeListIndex,
-        /// Index in a per-module free list.
-        ///
-        /// Invariant: per_module[slot_state[i].module][slot_state[i].per_module_index]
-        /// == i.
-        per_module_index: PerModuleFreeListIndex,
-    },
-}
+#[derive(Default, Copy, Clone, Debug)]
+struct Unused {
+    /// Which module this slot was historically affine to, if any.
+    affinity: Option<CompiledModuleId>,
 
-impl FreeSlotState {
-    /// Get the index of this slot in the global free list.
-    fn free_list_index(&self) -> GlobalFreeListIndex {
-        match self {
-            &Self::NoAffinity { free_list_index }
-            | &Self::Affinity {
-                free_list_index, ..
-            } => free_list_index,
-        }
-    }
+    /// Metadata about the linked list for all slots affine to `affinity`.
+    affine_list_link: Link,
 
-    /// Update the index of this slot in the global free list.
-    fn update_free_list_index(&mut self, index: GlobalFreeListIndex) {
-        match self {
-            &mut Self::NoAffinity {
-                ref mut free_list_index,
-            }
-            | &mut Self::Affinity {
-                ref mut free_list_index,
-                ..
-            } => {
-                *free_list_index = index;
-            }
-        }
-    }
-
-    /// Get the index of this slot in its per-module free list.
-    fn per_module_index(&self) -> PerModuleFreeListIndex {
-        match self {
-            &Self::Affinity {
-                per_module_index, ..
-            } => per_module_index,
-            _ => panic!("per_module_index on slot with no affinity"),
-        }
-    }
-
-    /// Update the index of this slot in its per-module free list.
-    fn update_per_module_index(&mut self, index: PerModuleFreeListIndex) {
-        match self {
-            &mut Self::Affinity {
-                ref mut per_module_index,
-                ..
-            } => {
-                *per_module_index = index;
-            }
-            _ => panic!("per_module_index on slot with no affinity"),
-        }
-    }
+    /// Metadata within the `warm` list of the main allocator.
+    unused_list_link: Link,
 }
 
 enum AllocMode {
@@ -199,29 +117,14 @@ enum AllocMode {
 
 impl IndexAllocator {
     /// Create the default state for this strategy.
-    pub fn new(strategy: PoolingAllocationStrategy, max_instances: usize) -> Self {
-        let ids = (0..max_instances).map(|i| SlotId(i)).collect::<Vec<_>>();
-        // Use a deterministic seed during fuzzing to improve reproducibility of
-        // test cases, but otherwise outside of fuzzing use a random seed to
-        // shake things up.
-        let seed = if cfg!(fuzzing) {
-            [0; 32]
-        } else {
-            rand::thread_rng().gen()
-        };
-        let rng = SmallRng::from_seed(seed);
+    pub fn new(max_instances: u32, max_unused_warm_slots: u32) -> Self {
         IndexAllocator(Mutex::new(Inner {
-            rng,
-            strategy,
-            free_list: ids,
-            per_module: HashMap::new(),
-            slot_state: (0..max_instances)
-                .map(|i| {
-                    SlotState::Free(FreeSlotState::NoAffinity {
-                        free_list_index: GlobalFreeListIndex(i),
-                    })
-                })
-                .collect(),
+            last_cold: 0,
+            max_unused_warm_slots,
+            unused_warm_slots: 0,
+            module_affine: HashMap::new(),
+            slot_state: (0..max_instances).map(|_| SlotState::UnusedCold).collect(),
+            warm: List::default(),
         }))
     }
 
@@ -248,59 +151,51 @@ impl IndexAllocator {
         let mut inner = self.0.lock().unwrap();
         let inner = &mut *inner;
 
-        // Determine which `SlotId` will be chosen first. Below the free list
-        // metadata will be updated with our choice.
-        let slot_id = match mode {
-            // If any slot is desired then the pooling allocation strategy
-            // determines which index is chosen.
-            AllocMode::AnySlot => {
-                match inner.strategy {
-                    PoolingAllocationStrategy::NextAvailable => inner.pick_last_used()?,
-                    PoolingAllocationStrategy::Random => inner.pick_random()?,
-                    // First attempt an affine allocation where the slot
-                    // returned was previously used by `id`, but if that fails
-                    // pick a random free slot ID.
-                    //
-                    // Note that we do this to maintain an unbiased stealing
-                    // distribution: we want the likelihood of our taking a slot
-                    // from some other module's freelist to be proportional to
-                    // that module's freelist length. Or in other words, every
-                    // *slot* should be equally likely to be stolen. The
-                    // alternative, where we pick the victim module freelist
-                    // first, means that either a module with an affinity
-                    // freelist of one slot has the same chances of losing that
-                    // slot as one with a hundred slots; or else we need a
-                    // weighted random choice among modules, which is just as
-                    // complex as this process.
-                    //
-                    // We don't bother picking an empty slot (no established
-                    // affinity) before a random slot, because this is more
-                    // complex, and in the steady state, all slots will see at
-                    // least one instantiation very quickly, so there will never
-                    // (past an initial phase) be a slot with no affinity.
-                    PoolingAllocationStrategy::ReuseAffinity => inner
-                        .pick_affine(module_id)
-                        .or_else(|| inner.pick_random())?,
+        // As a first-pass always attempt an affine allocation. This will
+        // succeed if any slots are considered affine to `module_id` (if it's
+        // specified). Failing that something else is attempted to be chosen.
+        let slot_id = inner.pick_affine(module_id).or_else(|| {
+            match mode {
+                // If any slot is requested then this is a normal instantiation
+                // looking for an index. Without any affine candidates there are
+                // two options here:
+                //
+                // 1. Pick a slot amongst previously allocated slots
+                // 2. Pick a slot that's never been used before
+                //
+                // The choice here is guided by the initial configuration of
+                // `max_unused_warm_slots`. If our unused warm slots, which are
+                // likely all affine, is below this threshold then the affinity
+                // of the warm slots isn't tampered with and first a cold slot
+                // is chosen. If the cold slot allocation fails, however, a warm
+                // slot is evicted.
+                //
+                // The opposite happens when we're above our threshold for the
+                // maximum number of warm slots, meaning that a warm slot is
+                // attempted to be picked from first with a cold slot following
+                // that. Note that the warm slot allocation in this case should
+                // only fail of `max_unused_warm_slots` is 0, otherwise
+                // `pick_warm` will always succeed.
+                AllocMode::AnySlot => {
+                    if inner.unused_warm_slots < inner.max_unused_warm_slots {
+                        inner.pick_cold().or_else(|| inner.pick_warm())
+                    } else {
+                        inner.pick_warm().or_else(|| {
+                            debug_assert!(inner.max_unused_warm_slots == 0);
+                            inner.pick_cold()
+                        })
+                    }
                 }
+
+                // In this mode an affinity-based allocation is always performed
+                // as the purpose here is to clear out slots relevant to
+                // `module_id` during module teardown. This means that there's
+                // no consulting non-affine slots in this path.
+                AllocMode::ForceAffineAndClear => None,
             }
+        })?;
 
-            // In this mode an affinity-based allocation is always performed as
-            // the purpose here is to clear out slots relevant to `module_id`
-            // during module teardown.
-            AllocMode::ForceAffineAndClear => inner.pick_affine(module_id)?,
-        };
-
-        // Update internal metadata about the allocation of `slot_id` to
-        // `module_id`, meaning that it's removed from the per-module freelist
-        // if it was previously affine and additionally it's removed from the
-        // global freelist.
-        inner.remove_global_free_list_item(slot_id);
-        if let &SlotState::Free(FreeSlotState::Affinity { module, .. }) =
-            &inner.slot_state[slot_id.index()]
-        {
-            inner.remove_module_free_list_item(module, slot_id);
-        }
-        inner.slot_state[slot_id.index()] = SlotState::Taken(match mode {
+        inner.slot_state[slot_id.index()] = SlotState::Used(match mode {
             AllocMode::ForceAffineAndClear => None,
             AllocMode::AnySlot => module_id,
         });
@@ -310,24 +205,43 @@ impl IndexAllocator {
 
     pub(crate) fn free(&self, index: SlotId) {
         let mut inner = self.0.lock().unwrap();
-        let free_list_index = GlobalFreeListIndex(inner.free_list.len());
-        inner.free_list.push(index);
-        let module_id = inner.slot_state[index.index()].unwrap_module_id();
-        inner.slot_state[index.index()] = if let Some(id) = module_id {
-            let per_module_list = inner
-                .per_module
-                .entry(id)
-                .or_insert_with(|| Vec::with_capacity(1));
-            let per_module_index = PerModuleFreeListIndex(per_module_list.len());
-            per_module_list.push(index);
-            SlotState::Free(FreeSlotState::Affinity {
-                module: id,
-                free_list_index,
-                per_module_index,
-            })
-        } else {
-            SlotState::Free(FreeSlotState::NoAffinity { free_list_index })
+        let inner = &mut *inner;
+        let module = match inner.slot_state[index.index()] {
+            SlotState::Used(module) => module,
+            _ => unreachable!(),
         };
+
+        // Bump the number of warm slots since this slot is now considered
+        // previously used. Afterwards append it to the linked list of all
+        // unused and warm slots.
+        inner.unused_warm_slots += 1;
+        let unused_list_link = inner
+            .warm
+            .append(index, &mut inner.slot_state, |s| &mut s.unused_list_link);
+
+        let affine_list_link = match module {
+            // If this slot is affine to a particular module then append this
+            // index to the linked list for the affine module. Otherwise insert
+            // a new one-element linked list.
+            Some(module) => match inner.module_affine.entry(module) {
+                Entry::Occupied(mut e) => e
+                    .get_mut()
+                    .append(index, &mut inner.slot_state, |s| &mut s.affine_list_link),
+                Entry::Vacant(v) => {
+                    v.insert(List::new(index));
+                    Link::default()
+                }
+            },
+
+            // If this slot has no affinity then the affine link is empty.
+            None => Link::default(),
+        };
+
+        inner.slot_state[index.index()] = SlotState::UnusedWarm(Unused {
+            affinity: module,
+            affine_list_link,
+            unused_list_link,
+        });
     }
 
     /// For testing only, we want to be able to assert what is on the
@@ -335,7 +249,10 @@ impl IndexAllocator {
     #[cfg(test)]
     pub(crate) fn testing_freelist(&self) -> Vec<SlotId> {
         let inner = self.0.lock().unwrap();
-        inner.free_list.clone()
+        inner
+            .warm
+            .iter(&inner.slot_state, |s| &s.unused_list_link)
+            .collect()
     }
 
     /// For testing only, get the list of all modules with at least
@@ -343,102 +260,165 @@ impl IndexAllocator {
     #[cfg(test)]
     pub(crate) fn testing_module_affinity_list(&self) -> Vec<CompiledModuleId> {
         let inner = self.0.lock().unwrap();
-        let mut ret = vec![];
-        for (module, list) in inner.per_module.iter() {
-            assert!(!list.is_empty());
-            ret.push(*module);
-        }
-        ret
+        inner.module_affine.keys().copied().collect()
     }
 }
 
 impl Inner {
-    fn pick_last_used(&self) -> Option<SlotId> {
-        self.free_list.last().copied()
-    }
-
-    fn pick_random(&mut self) -> Option<SlotId> {
-        if self.free_list.len() == 0 {
-            return None;
-        }
-        let i = self.rng.gen_range(0..self.free_list.len());
-        Some(self.free_list[i])
-    }
-
     /// Attempts to allocate a slot already affine to `id`, returning `None` if
     /// `id` is `None` or if there are no affine slots.
-    fn pick_affine(&self, module_id: Option<CompiledModuleId>) -> Option<SlotId> {
-        let free = self.per_module.get(&module_id?)?;
-        free.last().copied()
+    fn pick_affine(&mut self, module_id: Option<CompiledModuleId>) -> Option<SlotId> {
+        // Note that the `tail` is chosen here of the affine list as it's the
+        // most recently used, which for affine allocations is what we want --
+        // maximizing temporal reuse.
+        let ret = self.module_affine.get(&module_id?)?.tail?;
+        self.remove(ret);
+        Some(ret)
     }
 
-    /// Remove a slot-index from the global free list.
-    fn remove_global_free_list_item(&mut self, index: SlotId) {
-        let free_list_index = self.slot_state[index.index()]
-            .unwrap_free()
-            .free_list_index();
-        assert_eq!(index, self.free_list.swap_remove(free_list_index.index()));
-        if free_list_index.index() < self.free_list.len() {
-            let replaced = self.free_list[free_list_index.index()];
-            self.slot_state[replaced.index()]
-                .unwrap_free_mut()
-                .update_free_list_index(free_list_index);
+    fn pick_warm(&mut self) -> Option<SlotId> {
+        // Insertions into the `unused` list happen at the `tail`, so the
+        // least-recently-used item will be at the head. That's our goal here,
+        // pick the least-recently-used slot since something "warm" is being
+        // evicted anyway.
+        let head = self.warm.head?;
+        self.remove(head);
+        Some(head)
+    }
+
+    fn remove(&mut self, slot: SlotId) {
+        // Decrement the size of the warm list, and additionally remove it from
+        // the `warm` linked list.
+        self.unused_warm_slots -= 1;
+        self.warm
+            .remove(slot, &mut self.slot_state, |u| &mut u.unused_list_link);
+
+        // If this slot is affine to a module then additionally remove it from
+        // that module's affinity linked list. Note that if the module's affine
+        // list is empty then the module's entry in the map is completely
+        // removed as well.
+        let module = self.slot_state[slot.index()].unwrap_unused().affinity;
+        if let Some(module) = module {
+            let mut list = match self.module_affine.entry(module) {
+                Entry::Occupied(e) => e,
+                Entry::Vacant(_) => unreachable!(),
+            };
+            list.get_mut()
+                .remove(slot, &mut self.slot_state, |u| &mut u.affine_list_link);
+
+            if list.get_mut().head.is_none() {
+                list.remove();
+            }
         }
     }
 
-    /// Remove a slot-index from a per-module free list.
-    fn remove_module_free_list_item(&mut self, module_id: CompiledModuleId, index: SlotId) {
-        debug_assert!(
-            self.per_module.contains_key(&module_id),
-            "per_module list for given module should not be empty"
-        );
-
-        let per_module_list = self.per_module.get_mut(&module_id).unwrap();
-        debug_assert!(!per_module_list.is_empty());
-
-        let per_module_index = self.slot_state[index.index()]
-            .unwrap_free()
-            .per_module_index();
-        assert_eq!(index, per_module_list.swap_remove(per_module_index.index()));
-        if per_module_index.index() < per_module_list.len() {
-            let replaced = per_module_list[per_module_index.index()];
-            self.slot_state[replaced.index()]
-                .unwrap_free_mut()
-                .update_per_module_index(per_module_index);
+    fn pick_cold(&mut self) -> Option<SlotId> {
+        if (self.last_cold as usize) == self.slot_state.len() {
+            None
+        } else {
+            let ret = Some(SlotId(self.last_cold));
+            self.last_cold += 1;
+            ret
         }
-        if per_module_list.is_empty() {
-            self.per_module.remove(&module_id);
+    }
+}
+
+impl List {
+    /// Creates a new one-element list pointing at `id`.
+    fn new(id: SlotId) -> List {
+        List {
+            head: Some(id),
+            tail: Some(id),
         }
     }
+
+    /// Appends the `id` to this list whose links are determined by `link`.
+    fn append(
+        &mut self,
+        id: SlotId,
+        states: &mut [SlotState],
+        link: fn(&mut Unused) -> &mut Link,
+    ) -> Link {
+        // This `id` is the new tail...
+        let tail = mem::replace(&mut self.tail, Some(id));
+
+        // If the tail was present, then update its `next` field to ourselves as
+        // we've been appended, otherwise update the `head` since the list was
+        // previously empty.
+        match tail {
+            Some(tail) => link(states[tail.index()].unwrap_unused()).next = Some(id),
+            None => self.head = Some(id),
+        }
+        Link {
+            prev: tail,
+            next: None,
+        }
+    }
+
+    /// Removes `id` from this list whose links are determined by `link`.
+    fn remove(
+        &mut self,
+        id: SlotId,
+        slot_state: &mut [SlotState],
+        link: fn(&mut Unused) -> &mut Link,
+    ) -> Unused {
+        let mut state = *slot_state[id.index()].unwrap_unused();
+        let next = link(&mut state).next;
+        let prev = link(&mut state).prev;
+
+        // If a `next` node is present for this link, then its previous was our
+        // own previous now. Otherwise we are the tail so the new tail is our
+        // previous.
+        match next {
+            Some(next) => link(slot_state[next.index()].unwrap_unused()).prev = prev,
+            None => self.tail = prev,
+        }
+
+        // Same as the `next` node, except everything is in reverse.
+        match prev {
+            Some(prev) => link(slot_state[prev.index()].unwrap_unused()).next = next,
+            None => self.head = next,
+        }
+        state
+    }
+
+    #[cfg(test)]
+    fn iter<'a>(
+        &'a self,
+        states: &'a [SlotState],
+        link: fn(&Unused) -> &Link,
+    ) -> impl Iterator<Item = SlotId> + 'a {
+        let mut cur = self.head;
+        let mut prev = None;
+        std::iter::from_fn(move || {
+            if cur.is_none() {
+                assert_eq!(prev, self.tail);
+            }
+            let ret = cur?;
+            match &states[ret.index()] {
+                SlotState::UnusedWarm(u) => {
+                    assert_eq!(link(u).prev, prev);
+                    prev = Some(ret);
+                    cur = link(u).next
+                }
+                _ => unreachable!(),
+            }
+            Some(ret)
+        })
+    }
 }
 
 #[cfg(test)]
 mod test {
     use super::{IndexAllocator, SlotId};
     use crate::CompiledModuleIdAllocator;
-    use crate::PoolingAllocationStrategy;
 
     #[test]
     fn test_next_available_allocation_strategy() {
-        let strat = PoolingAllocationStrategy::NextAvailable;
-
         for size in 0..20 {
-            let state = IndexAllocator::new(strat, size);
+            let state = IndexAllocator::new(size, 0);
             for i in 0..size {
-                assert_eq!(state.alloc(None).unwrap().index(), size - i - 1);
-            }
-            assert!(state.alloc(None).is_none());
-        }
-    }
-
-    #[test]
-    fn test_random_allocation_strategy() {
-        let strat = PoolingAllocationStrategy::Random;
-
-        for size in 0..20 {
-            let state = IndexAllocator::new(strat, size);
-            for _ in 0..size {
-                assert!(state.alloc(None).unwrap().index() < size);
+                assert_eq!(state.alloc(None).unwrap().index(), i as usize);
             }
             assert!(state.alloc(None).is_none());
         }
@@ -446,16 +426,15 @@ mod test {
 
     #[test]
     fn test_affinity_allocation_strategy() {
-        let strat = PoolingAllocationStrategy::ReuseAffinity;
         let id_alloc = CompiledModuleIdAllocator::new();
         let id1 = id_alloc.alloc();
         let id2 = id_alloc.alloc();
-        let state = IndexAllocator::new(strat, 100);
+        let state = IndexAllocator::new(100, 100);
 
         let index1 = state.alloc(Some(id1)).unwrap();
-        assert!(index1.index() < 100);
+        assert_eq!(index1.index(), 0);
         let index2 = state.alloc(Some(id2)).unwrap();
-        assert!(index2.index() < 100);
+        assert_eq!(index2.index(), 1);
         assert_ne!(index1, index2);
 
         state.free(index1);
@@ -503,12 +482,8 @@ mod test {
         let id_alloc = CompiledModuleIdAllocator::new();
         let id = id_alloc.alloc();
 
-        for strat in [
-            PoolingAllocationStrategy::ReuseAffinity,
-            PoolingAllocationStrategy::NextAvailable,
-            PoolingAllocationStrategy::Random,
-        ] {
-            let state = IndexAllocator::new(strat, 100);
+        for max_unused_warm_slots in [0, 1, 2] {
+            let state = IndexAllocator::new(100, max_unused_warm_slots);
 
             let index1 = state.alloc(Some(id)).unwrap();
             let index2 = state.alloc(Some(id)).unwrap();
@@ -525,12 +500,11 @@ mod test {
         use rand::Rng;
         let mut rng = rand::thread_rng();
 
-        let strat = PoolingAllocationStrategy::ReuseAffinity;
         let id_alloc = CompiledModuleIdAllocator::new();
         let ids = std::iter::repeat_with(|| id_alloc.alloc())
             .take(10)
             .collect::<Vec<_>>();
-        let state = IndexAllocator::new(strat, 1000);
+        let state = IndexAllocator::new(1000, 1000);
         let mut allocated: Vec<SlotId> = vec![];
         let mut last_id = vec![None; 1000];
 
@@ -566,4 +540,59 @@ mod test {
             hits
         );
     }
+
+    #[test]
+    fn test_affinity_threshold() {
+        let id_alloc = CompiledModuleIdAllocator::new();
+        let id1 = id_alloc.alloc();
+        let id2 = id_alloc.alloc();
+        let id3 = id_alloc.alloc();
+        let state = IndexAllocator::new(10, 2);
+
+        // Set some slot affinities
+        assert_eq!(state.alloc(Some(id1)), Some(SlotId(0)));
+        state.free(SlotId(0));
+        assert_eq!(state.alloc(Some(id2)), Some(SlotId(1)));
+        state.free(SlotId(1));
+
+        // Only 2 slots are allowed to be unused and warm, so we're at our
+        // threshold, meaning one must now be evicted.
+        assert_eq!(state.alloc(Some(id3)), Some(SlotId(0)));
+        state.free(SlotId(0));
+
+        // pickup `id2` again, it should be affine.
+        assert_eq!(state.alloc(Some(id2)), Some(SlotId(1)));
+
+        // with only one warm slot available allocation for `id1` should pick a
+        // fresh slot
+        assert_eq!(state.alloc(Some(id1)), Some(SlotId(2)));
+
+        state.free(SlotId(1));
+        state.free(SlotId(2));
+
+        // ensure everything stays affine
+        assert_eq!(state.alloc(Some(id1)), Some(SlotId(2)));
+        assert_eq!(state.alloc(Some(id2)), Some(SlotId(1)));
+        assert_eq!(state.alloc(Some(id3)), Some(SlotId(0)));
+
+        state.free(SlotId(1));
+        state.free(SlotId(2));
+        state.free(SlotId(0));
+
+        // LRU is 1, so that should be picked
+        assert_eq!(state.alloc(Some(id_alloc.alloc())), Some(SlotId(1)));
+
+        // Pick another LRU entry, this time 2
+        assert_eq!(state.alloc(Some(id_alloc.alloc())), Some(SlotId(2)));
+
+        // This should preserve slot `0` and pick up something new
+        assert_eq!(state.alloc(Some(id_alloc.alloc())), Some(SlotId(3)));
+
+        state.free(SlotId(1));
+        state.free(SlotId(2));
+        state.free(SlotId(3));
+
+        // for good measure make sure id3 is still affine
+        assert_eq!(state.alloc(Some(id3)), Some(SlotId(0)));
+    }
 }
diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs
index d74363d132..1a1894e7da 100644
--- a/crates/runtime/src/lib.rs
+++ b/crates/runtime/src/lib.rs
@@ -56,8 +56,7 @@ pub use crate::instance::{
 };
 #[cfg(feature = "pooling-allocator")]
 pub use crate::instance::{
-    InstanceLimits, PoolingAllocationStrategy, PoolingInstanceAllocator,
-    PoolingInstanceAllocatorConfig,
+    InstanceLimits, PoolingInstanceAllocator, PoolingInstanceAllocatorConfig,
 };
 pub use crate::memory::{
     DefaultMemoryCreator, Memory, RuntimeLinearMemory, RuntimeMemoryCreator, SharedMemory,
@@ -156,7 +155,7 @@ pub unsafe trait Store {
 /// is chiefly needed for lazy initialization of various bits of
 /// instance state.
 ///
-/// When an instance is created, it holds an Arc<dyn ModuleRuntimeInfo>
+/// When an instance is created, it holds an `Arc<dyn ModuleRuntimeInfo>`
 /// so that it can get to signatures, metadata on functions, memory and
 /// funcref-table images, etc. All of these things are ordinarily known
 /// by the higher-level layers of Wasmtime. Specifically, the main
diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs
index 2f699450f6..b69e0b0384 100644
--- a/crates/wasmtime/src/config.rs
+++ b/crates/wasmtime/src/config.rs
@@ -1712,17 +1712,61 @@ pub struct PoolingAllocationConfig {
     config: wasmtime_runtime::PoolingInstanceAllocatorConfig,
 }
 
-#[cfg(feature = "pooling-allocator")]
-pub use wasmtime_runtime::PoolingAllocationStrategy;
-
 #[cfg(feature = "pooling-allocator")]
 impl PoolingAllocationConfig {
-    /// Configures the method by which slots in the pooling allocator are
-    /// allocated to instances
+    /// Configures the maximum number of "unused warm slots" to retain in the
+    /// pooling allocator.
     ///
-    /// This defaults to [`PoolingAllocationStrategy::ReuseAffinity`] .
-    pub fn strategy(&mut self, strategy: PoolingAllocationStrategy) -> &mut Self {
-        self.config.strategy = strategy;
+    /// The pooling allocator operates over slots to allocate from, and each
+    /// slot is considered "cold" if it's never been used before or "warm" if
+    /// it's been used by some module in the past. Slots in the pooling
+    /// allocator additionally track an "affinity" flag to a particular core
+    /// wasm module. When a module is instantiated into a slot then the slot is
+    /// considered affine to that module, even after the instance has been
+    /// dealloocated.
+    ///
+    /// When a new instance is created then a slot must be chosen, and the
+    /// current algorithm for selecting a slot is:
+    ///
+    /// * If there are slots that are affine to the module being instantiated,
+    ///   then the most recently used slot is selected to be allocated from.
+    ///   This is done to improve reuse of resources such as memory mappings and
+    ///   additionally try to benefit from temporal locality for things like
+    ///   caches.
+    ///
+    /// * Otherwise if there are more than N affine slots to other modules, then
+    ///   one of those affine slots is chosen to be allocated. The slot chosen
+    ///   is picked on a least-recently-used basis.
+    ///
+    /// * Finally, if there are less than N affine slots to other modules, then
+    ///   the non-affine slots are allocated from.
+    ///
+    /// This setting, `max_unused_warm_slots`, is the value for N in the above
+    /// algorithm. The purpose of this setting is to have a knob over the RSS
+    /// impact of "unused slots" for a long-running wasm server.
+    ///
+    /// If this setting is set to 0, for example, then affine slots are
+    /// aggressively resused on a least-recently-used basis. A "cold" slot is
+    /// only used if there are no affine slots available to allocate from. This
+    /// means that the set of slots used over the lifetime of a program is the
+    /// same as the maximum concurrent number of wasm instances.
+    ///
+    /// If this setting is set to infinity, however, then cold slots are
+    /// prioritized to be allocated from. This means that the set of slots used
+    /// over the lifetime of a program will approach
+    /// [`PoolingAllocationConfig::instance_count`], or the maximum number of
+    /// slots in the pooling allocator.
+    ///
+    /// Wasmtime does not aggressively decommit all resources associated with a
+    /// slot when the slot is not in use. For example the
+    /// [`PoolingAllocationConfig::linear_memory_keep_resident`] option can be
+    /// used to keep memory associated with a slot, even when it's not in use.
+    /// This means that the total set of used slots in the pooling instance
+    /// allocator can impact the overall RSS usage of a program.
+    ///
+    /// The default value for this option is 100.
+    pub fn max_unused_warm_slots(&mut self, max: u32) -> &mut Self {
+        self.config.max_unused_warm_slots = max;
         self
     }