From 2b52f47b83efd1aba5efbeb2608095a57dceb96e Mon Sep 17 00:00:00 2001
From: Andrew Brown <andrew.brown@intel.com>
Date: Wed, 8 Jun 2022 10:13:40 -0700
Subject: [PATCH] Add shared memories (#4187)

* Add shared memories

This change adds the ability to use shared memories in Wasmtime when the
[threads proposal] is enabled. Shared memories are annotated as `shared`
in the WebAssembly syntax, e.g., `(memory 1 1 shared)`, and are
protected from concurrent access during `memory.size` and `memory.grow`.

[threads proposal]: https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md

In order to implement this in Wasmtime, there are two main cases to
cover:
    - a program may simply create a shared memory and possibly export it;
    this means that Wasmtime itself must be able to create shared
    memories
    - a user may create a shared memory externally and pass it in as an
    import during instantiation; this is the case when the program
    contains code like `(import "env" "memory" (memory 1 1
    shared))`--this case is handled by a new Wasmtime API
    type--`SharedMemory`

Because of the first case, this change allows any of the current
memory-creation mechanisms to work as-is. Wasmtime can still create
either static or dynamic memories in either on-demand or pooling modes,
and any of these memories can be considered shared. When shared, the
`Memory` runtime container will lock appropriately during `memory.size`
and `memory.grow` operations; since all memories use this container, it
is an ideal place for implementing the locking once and once only.

The second case is covered by the new `SharedMemory` structure. It uses
the same `Mmap` allocation under the hood as non-shared memories, but
allows the user to perform the allocation externally to Wasmtime and
share the memory across threads (via an `Arc`). The pointer address to
the actual memory is carefully wired through and owned by the
`SharedMemory` structure itself. This means that there are differing
views of where to access the pointer (i.e., `VMMemoryDefinition`): for
owned memories (the default), the `VMMemoryDefinition` is stored
directly by the `VMContext`; in the `SharedMemory` case, however, this
`VMContext` must point to this separate structure.

To ensure that the `VMContext` can always point to the correct
`VMMemoryDefinition`, this change alters the `VMContext` structure.
Since a `SharedMemory` owns its own `VMMemoryDefinition`, the
`defined_memories` table in the `VMContext` becomes a sequence of
pointers--in the shared memory case, they point to the
`VMMemoryDefinition` owned by the `SharedMemory` and in the owned memory
case (i.e., not shared) they point to `VMMemoryDefinition`s stored in a
new table, `owned_memories`.

This change adds an additional indirection (through the `*mut
VMMemoryDefinition` pointer) that could add overhead. Using an imported
memory as a proxy, we measured a 1-3% overhead of this approach on the
`pulldown-cmark` benchmark. To avoid this, Cranelift-generated code will
special-case the owned memory access (i.e., load a pointer directly to
the `owned_memories` entry) for `memory.size` so that only
shared memories (and imported memories, as before) incur the indirection
cost.

* review: remove thread feature check

* review: swap wasmtime-types dependency for existing wasmtime-environ use

* review: remove unused VMMemoryUnion

* review: reword cross-engine error message

* review: improve tests

* review: refactor to separate prevent Memory <-> SharedMemory conversion

* review: into_shared_memory -> as_shared_memory

* review: remove commented out code

* review: limit shared min/max to 32 bits

* review: skip imported memories

* review: imported memories are not owned

* review: remove TODO

* review: document unsafe send + sync

* review: add limiter assertion

* review: remove TODO

* review: improve tests

* review: fix doc test

* fix: fixes based on discussion with Alex

This changes several key parts:
 - adds memory indexes to imports and exports
 - makes `VMMemoryDefinition::current_length` an atomic usize

* review: add `Extern::SharedMemory`

* review: remove TODO

* review: atomically load from VMMemoryDescription in JIT-generated code

* review: add test probing the last available memory slot across threads

* fix: move assertion to new location due to rebase

* fix: doc link

* fix: add TODOs to c-api

* fix: broken doc link

* fix: modify pooling allocator messages in tests

* review: make owned_memory_index panic instead of returning an option

* review: clarify calculation of num_owned_memories

* review: move 'use' to top of file

* review: change '*const [u8]' to '*mut [u8]'

* review: remove TODO

* review: avoid hard-coding memory index

* review: remove 'preallocation' parameter from 'Memory::_new'

* fix: component model memory length

* review: check that shared memory plans are static

* review: ignore growth limits for shared memory

* review: improve atomic store comment

* review: add FIXME for memory growth failure

* review: add comment about absence of bounds-checked 'memory.size'

* review: make 'current_length()' doc comment more precise

* review: more comments related to memory.size non-determinism

* review: make 'vmmemory' unreachable for shared memory

* review: move code around

* review: thread plan through to 'wrap()'

* review: disallow shared memory allocation with the pooling allocator
---
 crates/c-api/src/extern.rs                    |   2 +
 crates/cranelift/src/compiler.rs              |  14 +-
 crates/cranelift/src/func_environ.rs          | 100 +++--
 crates/environ/src/module.rs                  |  28 +-
 crates/environ/src/module_environ.rs          |   6 -
 crates/environ/src/vmoffsets.rs               |  62 ++-
 crates/runtime/src/export.rs                  |   4 +-
 crates/runtime/src/externref.rs               |   3 +
 crates/runtime/src/instance.rs                |  98 ++---
 crates/runtime/src/instance/allocator.rs      |  52 ++-
 crates/runtime/src/lib.rs                     |   8 +-
 crates/runtime/src/libcalls.rs                |   2 +-
 crates/runtime/src/memory.rs                  | 370 ++++++++++++++----
 crates/runtime/src/vmcontext.rs               |  36 +-
 crates/types/src/lib.rs                       |   5 +
 crates/wasmtime/src/component/func/options.rs |   4 +-
 crates/wasmtime/src/externals.rs              |  33 +-
 crates/wasmtime/src/instance.rs               |  25 +-
 crates/wasmtime/src/limits.rs                 |   2 +-
 crates/wasmtime/src/memory.rs                 | 190 ++++++++-
 crates/wasmtime/src/trampoline.rs             |   7 +-
 crates/wasmtime/src/trampoline/memory.rs      |  73 +++-
 crates/wasmtime/src/types.rs                  |  27 ++
 crates/wasmtime/src/types/matching.rs         |   5 +
 tests/all/main.rs                             |   1 +
 tests/all/pooling_allocator.rs                |  11 +-
 tests/all/threads.rs                          | 269 +++++++++++++
 27 files changed, 1211 insertions(+), 226 deletions(-)
 create mode 100644 tests/all/threads.rs

diff --git a/crates/c-api/src/extern.rs b/crates/c-api/src/extern.rs
index 106d9ad306..516e21b284 100644
--- a/crates/c-api/src/extern.rs
+++ b/crates/c-api/src/extern.rs
@@ -20,6 +20,7 @@ pub extern "C" fn wasm_extern_kind(e: &wasm_extern_t) -> wasm_externkind_t {
         Extern::Global(_) => crate::WASM_EXTERN_GLOBAL,
         Extern::Table(_) => crate::WASM_EXTERN_TABLE,
         Extern::Memory(_) => crate::WASM_EXTERN_MEMORY,
+        Extern::SharedMemory(_) => todo!(),
     }
 }
 
@@ -119,6 +120,7 @@ impl From<Extern> for wasmtime_extern_t {
                 kind: WASMTIME_EXTERN_MEMORY,
                 of: wasmtime_extern_union { memory },
             },
+            Extern::SharedMemory(_memory) => todo!(),
         }
     }
 }
diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs
index 6dcdeeac1a..9985d0afd0 100644
--- a/crates/cranelift/src/compiler.rs
+++ b/crates/cranelift/src/compiler.rs
@@ -16,7 +16,7 @@ use cranelift_codegen::{MachSrcLoc, MachStackMap};
 use cranelift_entity::{EntityRef, PrimaryMap};
 use cranelift_frontend::FunctionBuilder;
 use cranelift_wasm::{
-    DefinedFuncIndex, DefinedMemoryIndex, FuncIndex, FuncTranslator, MemoryIndex, SignatureIndex,
+    DefinedFuncIndex, FuncIndex, FuncTranslator, MemoryIndex, OwnedMemoryIndex, SignatureIndex,
     WasmFuncType,
 };
 use object::write::{Object, StandardSegment, SymbolId};
@@ -711,8 +711,18 @@ impl Compiler {
         let memory_offset = if ofs.num_imported_memories > 0 {
             ModuleMemoryOffset::Imported(ofs.vmctx_vmmemory_import(MemoryIndex::new(0)))
         } else if ofs.num_defined_memories > 0 {
+            // The addition of shared memory makes the following assumption,
+            // "owned memory index = 0", possibly false. If the first memory
+            // is a shared memory, the base pointer will not be stored in
+            // the `owned_memories` array. The following code should
+            // eventually be fixed to not only handle shared memories but
+            // also multiple memories.
+            assert_eq!(
+                ofs.num_defined_memories, ofs.num_owned_memories,
+                "the memory base pointer may be incorrect due to sharing memory"
+            );
             ModuleMemoryOffset::Defined(
-                ofs.vmctx_vmmemory_definition_base(DefinedMemoryIndex::new(0)),
+                ofs.vmctx_vmmemory_definition_base(OwnedMemoryIndex::new(0)),
             )
         } else {
             ModuleMemoryOffset::None
diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs
index 8c64cd62a9..2108f3193c 100644
--- a/crates/cranelift/src/func_environ.rs
+++ b/crates/cranelift/src/func_environ.rs
@@ -1368,18 +1368,37 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m
 
     fn make_heap(&mut self, func: &mut ir::Function, index: MemoryIndex) -> WasmResult<ir::Heap> {
         let pointer_type = self.pointer_type();
-
+        let is_shared = self.module.memory_plans[index].memory.shared;
         let (ptr, base_offset, current_length_offset) = {
             let vmctx = self.vmctx(func);
             if let Some(def_index) = self.module.defined_memory_index(index) {
-                let base_offset =
-                    i32::try_from(self.offsets.vmctx_vmmemory_definition_base(def_index)).unwrap();
-                let current_length_offset = i32::try_from(
-                    self.offsets
-                        .vmctx_vmmemory_definition_current_length(def_index),
-                )
-                .unwrap();
-                (vmctx, base_offset, current_length_offset)
+                if is_shared {
+                    // As with imported memory, the `VMMemoryDefinition` for a
+                    // shared memory is stored elsewhere. We store a `*mut
+                    // VMMemoryDefinition` to it and dereference that when
+                    // atomically growing it.
+                    let from_offset = self.offsets.vmctx_vmmemory_pointer(def_index);
+                    let memory = func.create_global_value(ir::GlobalValueData::Load {
+                        base: vmctx,
+                        offset: Offset32::new(i32::try_from(from_offset).unwrap()),
+                        global_type: pointer_type,
+                        readonly: true,
+                    });
+                    let base_offset = i32::from(self.offsets.vmmemory_definition_base());
+                    let current_length_offset =
+                        i32::from(self.offsets.vmmemory_definition_current_length());
+                    (memory, base_offset, current_length_offset)
+                } else {
+                    let owned_index = self.module.owned_memory_index(def_index);
+                    let owned_base_offset =
+                        self.offsets.vmctx_vmmemory_definition_base(owned_index);
+                    let owned_length_offset = self
+                        .offsets
+                        .vmctx_vmmemory_definition_current_length(owned_index);
+                    let current_base_offset = i32::try_from(owned_base_offset).unwrap();
+                    let current_length_offset = i32::try_from(owned_length_offset).unwrap();
+                    (vmctx, current_base_offset, current_length_offset)
+                }
             } else {
                 let from_offset = self.offsets.vmctx_vmmemory_import_from(index);
                 let memory = func.create_global_value(ir::GlobalValueData::Load {
@@ -1693,28 +1712,65 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m
     ) -> WasmResult<ir::Value> {
         let pointer_type = self.pointer_type();
         let vmctx = self.vmctx(&mut pos.func);
+        let is_shared = self.module.memory_plans[index].memory.shared;
         let base = pos.ins().global_value(pointer_type, vmctx);
         let current_length_in_bytes = match self.module.defined_memory_index(index) {
             Some(def_index) => {
-                let offset = i32::try_from(
-                    self.offsets
-                        .vmctx_vmmemory_definition_current_length(def_index),
-                )
-                .unwrap();
-                pos.ins()
-                    .load(pointer_type, ir::MemFlags::trusted(), base, offset)
+                if is_shared {
+                    let offset =
+                        i32::try_from(self.offsets.vmctx_vmmemory_pointer(def_index)).unwrap();
+                    let vmmemory_ptr =
+                        pos.ins()
+                            .load(pointer_type, ir::MemFlags::trusted(), base, offset);
+                    let vmmemory_definition_offset =
+                        i64::from(self.offsets.vmmemory_definition_current_length());
+                    let vmmemory_definition_ptr =
+                        pos.ins().iadd_imm(vmmemory_ptr, vmmemory_definition_offset);
+                    // This atomic access of the
+                    // `VMMemoryDefinition::current_length` is direct; no bounds
+                    // check is needed. This is possible because shared memory
+                    // has a static size (the maximum is always known). Shared
+                    // memory is thus built with a static memory plan and no
+                    // bounds-checked version of this is implemented.
+                    pos.ins().atomic_load(
+                        pointer_type,
+                        ir::MemFlags::trusted(),
+                        vmmemory_definition_ptr,
+                    )
+                } else {
+                    let owned_index = self.module.owned_memory_index(def_index);
+                    let offset = i32::try_from(
+                        self.offsets
+                            .vmctx_vmmemory_definition_current_length(owned_index),
+                    )
+                    .unwrap();
+                    pos.ins()
+                        .load(pointer_type, ir::MemFlags::trusted(), base, offset)
+                }
             }
             None => {
                 let offset = i32::try_from(self.offsets.vmctx_vmmemory_import_from(index)).unwrap();
                 let vmmemory_ptr =
                     pos.ins()
                         .load(pointer_type, ir::MemFlags::trusted(), base, offset);
-                pos.ins().load(
-                    pointer_type,
-                    ir::MemFlags::trusted(),
-                    vmmemory_ptr,
-                    i32::from(self.offsets.vmmemory_definition_current_length()),
-                )
+                if is_shared {
+                    let vmmemory_definition_offset =
+                        i64::from(self.offsets.vmmemory_definition_current_length());
+                    let vmmemory_definition_ptr =
+                        pos.ins().iadd_imm(vmmemory_ptr, vmmemory_definition_offset);
+                    pos.ins().atomic_load(
+                        pointer_type,
+                        ir::MemFlags::trusted(),
+                        vmmemory_definition_ptr,
+                    )
+                } else {
+                    pos.ins().load(
+                        pointer_type,
+                        ir::MemFlags::trusted(),
+                        vmmemory_ptr,
+                        i32::from(self.offsets.vmmemory_definition_current_length()),
+                    )
+                }
             }
         };
         let current_length_in_pages = pos
diff --git a/crates/environ/src/module.rs b/crates/environ/src/module.rs
index 6b5b7ba91c..c937af024b 100644
--- a/crates/environ/src/module.rs
+++ b/crates/environ/src/module.rs
@@ -10,7 +10,7 @@ use std::mem;
 use std::ops::Range;
 use wasmtime_types::*;
 
-/// Implemenation styles for WebAssembly linear memory.
+/// Implementation styles for WebAssembly linear memory.
 #[derive(Debug, Clone, Hash, Serialize, Deserialize)]
 pub enum MemoryStyle {
     /// The actual memory can be resized and moved.
@@ -18,7 +18,7 @@ pub enum MemoryStyle {
         /// Extra space to reserve when a memory must be moved due to growth.
         reserve: u64,
     },
-    /// Addresss space is allocated up front.
+    /// Address space is allocated up front.
     Static {
         /// The number of mapped and unmapped pages.
         bound: u64,
@@ -160,7 +160,7 @@ pub enum MemoryInitialization {
     ///   which might reside in a compiled module on disk, available immediately
     ///   in a linear memory's address space.
     ///
-    /// To facilitate the latter fo these techniques the `try_static_init`
+    /// To facilitate the latter of these techniques the `try_static_init`
     /// function below, which creates this variant, takes a host page size
     /// argument which can page-align everything to make mmap-ing possible.
     Static {
@@ -919,6 +919,28 @@ impl Module {
         }
     }
 
+    /// Convert a `DefinedMemoryIndex` into an `OwnedMemoryIndex`. Returns None
+    /// if the index is an imported memory.
+    #[inline]
+    pub fn owned_memory_index(&self, memory: DefinedMemoryIndex) -> OwnedMemoryIndex {
+        assert!(
+            memory.index() < self.memory_plans.len(),
+            "non-shared memory must have an owned index"
+        );
+
+        // Once we know that the memory index is not greater than the number of
+        // plans, we can iterate through the plans up to the memory index and
+        // count how many are not shared (i.e., owned).
+        let owned_memory_index = self
+            .memory_plans
+            .iter()
+            .skip(self.num_imported_memories)
+            .take(memory.index())
+            .filter(|(_, mp)| !mp.memory.shared)
+            .count();
+        OwnedMemoryIndex::new(owned_memory_index)
+    }
+
     /// Test whether the given memory index is for an imported memory.
     #[inline]
     pub fn is_imported_memory(&self, index: MemoryIndex) -> bool {
diff --git a/crates/environ/src/module_environ.rs b/crates/environ/src/module_environ.rs
index 0df931c3be..c5e6f7ca37 100644
--- a/crates/environ/src/module_environ.rs
+++ b/crates/environ/src/module_environ.rs
@@ -240,9 +240,6 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> {
                             EntityType::Function(sig_index)
                         }
                         TypeRef::Memory(ty) => {
-                            if ty.shared {
-                                return Err(WasmError::Unsupported("shared memories".to_owned()));
-                            }
                             self.result.module.num_imported_memories += 1;
                             EntityType::Memory(ty.into())
                         }
@@ -296,9 +293,6 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> {
 
                 for entry in memories {
                     let memory = entry?;
-                    if memory.shared {
-                        return Err(WasmError::Unsupported("shared memories".to_owned()));
-                    }
                     let plan = MemoryPlan::for_memory(memory.into(), &self.tunables);
                     self.result.module.memory_plans.push(plan);
                 }
diff --git a/crates/environ/src/vmoffsets.rs b/crates/environ/src/vmoffsets.rs
index ad579d9fd9..cee03503ff 100644
--- a/crates/environ/src/vmoffsets.rs
+++ b/crates/environ/src/vmoffsets.rs
@@ -15,7 +15,8 @@
 //      imported_memories: [VMMemoryImport; module.num_imported_memories],
 //      imported_globals: [VMGlobalImport; module.num_imported_globals],
 //      tables: [VMTableDefinition; module.num_defined_tables],
-//      memories: [VMMemoryDefinition; module.num_defined_memories],
+//      memories: [*mut VMMemoryDefinition; module.num_defined_memories],
+//      owned_memories: [VMMemoryDefinition; module.num_owned_memories],
 //      globals: [VMGlobalDefinition; module.num_defined_globals],
 //      anyfuncs: [VMCallerCheckedAnyfunc; module.num_escaped_funcs],
 // }
@@ -27,6 +28,7 @@ use crate::{
 use cranelift_entity::packed_option::ReservedValue;
 use more_asserts::assert_lt;
 use std::convert::TryFrom;
+use wasmtime_types::OwnedMemoryIndex;
 
 /// Sentinel value indicating that wasm has been interrupted.
 // Note that this has a bit of an odd definition. See the `insert_stack_check`
@@ -66,6 +68,8 @@ pub struct VMOffsets<P> {
     pub num_defined_tables: u32,
     /// The number of defined memories in the module.
     pub num_defined_memories: u32,
+    /// The number of memories owned by the module instance.
+    pub num_owned_memories: u32,
     /// The number of defined globals in the module.
     pub num_defined_globals: u32,
     /// The number of escaped functions in the module, the size of the anyfuncs
@@ -86,6 +90,7 @@ pub struct VMOffsets<P> {
     imported_globals: u32,
     defined_tables: u32,
     defined_memories: u32,
+    owned_memories: u32,
     defined_globals: u32,
     defined_anyfuncs: u32,
     size: u32,
@@ -157,9 +162,11 @@ pub struct VMOffsetsFields<P> {
     pub num_defined_tables: u32,
     /// The number of defined memories in the module.
     pub num_defined_memories: u32,
+    /// The number of memories owned by the module instance.
+    pub num_owned_memories: u32,
     /// The number of defined globals in the module.
     pub num_defined_globals: u32,
-    /// The numbe of escaped functions in the module, the size of the anyfunc
+    /// The number of escaped functions in the module, the size of the anyfunc
     /// array.
     pub num_escaped_funcs: u32,
 }
@@ -167,6 +174,14 @@ pub struct VMOffsetsFields<P> {
 impl<P: PtrSize> VMOffsets<P> {
     /// Return a new `VMOffsets` instance, for a given pointer size.
     pub fn new(ptr: P, module: &Module) -> Self {
+        let num_owned_memories = module
+            .memory_plans
+            .iter()
+            .skip(module.num_imported_memories)
+            .filter(|p| !p.1.memory.shared)
+            .count()
+            .try_into()
+            .unwrap();
         VMOffsets::from(VMOffsetsFields {
             ptr,
             num_imported_functions: cast_to_u32(module.num_imported_funcs),
@@ -177,6 +192,7 @@ impl<P: PtrSize> VMOffsets<P> {
             num_defined_memories: cast_to_u32(
                 module.memory_plans.len() - module.num_imported_memories,
             ),
+            num_owned_memories,
             num_defined_globals: cast_to_u32(module.globals.len() - module.num_imported_globals),
             num_escaped_funcs: cast_to_u32(module.num_escaped_funcs),
         })
@@ -206,12 +222,13 @@ impl<P: PtrSize> VMOffsets<P> {
                     num_defined_tables: _,
                     num_defined_globals: _,
                     num_defined_memories: _,
+                    num_owned_memories: _,
                     num_escaped_funcs: _,
 
                     // used as the initial size below
                     size,
 
-                    // exhaustively match teh rest of the fields with input from
+                    // exhaustively match the rest of the fields with input from
                     // the macro
                     $($name,)*
                 } = *self;
@@ -235,6 +252,7 @@ impl<P: PtrSize> VMOffsets<P> {
             defined_anyfuncs: "module functions",
             defined_globals: "defined globals",
             defined_memories: "defined memories",
+            owned_memories: "owned memories",
             defined_tables: "defined tables",
             imported_globals: "imported globals",
             imported_memories: "imported memories",
@@ -261,6 +279,7 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
             num_imported_globals: fields.num_imported_globals,
             num_defined_tables: fields.num_defined_tables,
             num_defined_memories: fields.num_defined_memories,
+            num_owned_memories: fields.num_owned_memories,
             num_defined_globals: fields.num_defined_globals,
             num_escaped_funcs: fields.num_escaped_funcs,
             magic: 0,
@@ -276,6 +295,7 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
             imported_globals: 0,
             defined_tables: 0,
             defined_memories: 0,
+            owned_memories: 0,
             defined_globals: 0,
             defined_anyfuncs: 0,
             size: 0,
@@ -330,7 +350,9 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
             size(defined_tables)
                 = cmul(ret.num_defined_tables, ret.size_of_vmtable_definition()),
             size(defined_memories)
-                = cmul(ret.num_defined_memories, ret.size_of_vmmemory_definition()),
+                = cmul(ret.num_defined_memories, ret.size_of_vmmemory_pointer()),
+            size(owned_memories)
+                = cmul(ret.num_owned_memories, ret.size_of_vmmemory_definition()),
             align(16),
             size(defined_globals)
                 = cmul(ret.num_defined_globals, ret.size_of_vmglobal_definition()),
@@ -452,7 +474,7 @@ impl<P: PtrSize> VMOffsets<P> {
     /// Return the size of `VMMemoryImport`.
     #[inline]
     pub fn size_of_vmmemory_import(&self) -> u8 {
-        2 * self.pointer_size()
+        3 * self.pointer_size()
     }
 }
 
@@ -477,6 +499,12 @@ impl<P: PtrSize> VMOffsets<P> {
     pub fn size_of_vmmemory_definition(&self) -> u8 {
         2 * self.pointer_size()
     }
+
+    /// Return the size of `*mut VMMemoryDefinition`.
+    #[inline]
+    pub fn size_of_vmmemory_pointer(&self) -> u8 {
+        self.pointer_size()
+    }
 }
 
 /// Offsets for `VMGlobalImport`.
@@ -613,6 +641,12 @@ impl<P: PtrSize> VMOffsets<P> {
         self.defined_memories
     }
 
+    /// The offset of the `owned_memories` array.
+    #[inline]
+    pub fn vmctx_owned_memories_begin(&self) -> u32 {
+        self.owned_memories
+    }
+
     /// The offset of the `globals` array.
     #[inline]
     pub fn vmctx_globals_begin(&self) -> u32 {
@@ -676,11 +710,19 @@ impl<P: PtrSize> VMOffsets<P> {
         self.vmctx_tables_begin() + index.as_u32() * u32::from(self.size_of_vmtable_definition())
     }
 
-    /// Return the offset to `VMMemoryDefinition` index `index`.
+    /// Return the offset to the `*mut VMMemoryDefinition` at index `index`.
     #[inline]
-    pub fn vmctx_vmmemory_definition(&self, index: DefinedMemoryIndex) -> u32 {
+    pub fn vmctx_vmmemory_pointer(&self, index: DefinedMemoryIndex) -> u32 {
         assert_lt!(index.as_u32(), self.num_defined_memories);
-        self.vmctx_memories_begin() + index.as_u32() * u32::from(self.size_of_vmmemory_definition())
+        self.vmctx_memories_begin() + index.as_u32() * u32::from(self.size_of_vmmemory_pointer())
+    }
+
+    /// Return the offset to the owned `VMMemoryDefinition` at index `index`.
+    #[inline]
+    pub fn vmctx_vmmemory_definition(&self, index: OwnedMemoryIndex) -> u32 {
+        assert_lt!(index.as_u32(), self.num_owned_memories);
+        self.vmctx_owned_memories_begin()
+            + index.as_u32() * u32::from(self.size_of_vmmemory_definition())
     }
 
     /// Return the offset to the `VMGlobalDefinition` index `index`.
@@ -744,13 +786,13 @@ impl<P: PtrSize> VMOffsets<P> {
 
     /// Return the offset to the `base` field in `VMMemoryDefinition` index `index`.
     #[inline]
-    pub fn vmctx_vmmemory_definition_base(&self, index: DefinedMemoryIndex) -> u32 {
+    pub fn vmctx_vmmemory_definition_base(&self, index: OwnedMemoryIndex) -> u32 {
         self.vmctx_vmmemory_definition(index) + u32::from(self.vmmemory_definition_base())
     }
 
     /// Return the offset to the `current_length` field in `VMMemoryDefinition` index `index`.
     #[inline]
-    pub fn vmctx_vmmemory_definition_current_length(&self, index: DefinedMemoryIndex) -> u32 {
+    pub fn vmctx_vmmemory_definition_current_length(&self, index: OwnedMemoryIndex) -> u32 {
         self.vmctx_vmmemory_definition(index) + u32::from(self.vmmemory_definition_current_length())
     }
 
diff --git a/crates/runtime/src/export.rs b/crates/runtime/src/export.rs
index 5f540924b3..a96e77f12c 100644
--- a/crates/runtime/src/export.rs
+++ b/crates/runtime/src/export.rs
@@ -2,7 +2,7 @@ use crate::vmcontext::{
     VMCallerCheckedAnyfunc, VMContext, VMGlobalDefinition, VMMemoryDefinition, VMTableDefinition,
 };
 use std::ptr::NonNull;
-use wasmtime_environ::{Global, MemoryPlan, TablePlan};
+use wasmtime_environ::{DefinedMemoryIndex, Global, MemoryPlan, TablePlan};
 
 /// The value of an export passed from one instance to another.
 pub enum Export {
@@ -71,6 +71,8 @@ pub struct ExportMemory {
     pub vmctx: *mut VMContext,
     /// The memory declaration, used for compatibility checking.
     pub memory: MemoryPlan,
+    /// The index at which the memory is defined within the `vmctx`.
+    pub index: DefinedMemoryIndex,
 }
 
 // See docs on send/sync for `ExportFunction` above.
diff --git a/crates/runtime/src/externref.rs b/crates/runtime/src/externref.rs
index f7c86e0aea..9477acd4fd 100644
--- a/crates/runtime/src/externref.rs
+++ b/crates/runtime/src/externref.rs
@@ -1043,6 +1043,7 @@ mod tests {
             num_imported_globals: 0,
             num_defined_tables: 0,
             num_defined_memories: 0,
+            num_owned_memories: 0,
             num_defined_globals: 0,
             num_escaped_funcs: 0,
         });
@@ -1069,6 +1070,7 @@ mod tests {
             num_imported_globals: 0,
             num_defined_tables: 0,
             num_defined_memories: 0,
+            num_owned_memories: 0,
             num_defined_globals: 0,
             num_escaped_funcs: 0,
         });
@@ -1095,6 +1097,7 @@ mod tests {
             num_imported_globals: 0,
             num_defined_tables: 0,
             num_defined_memories: 0,
+            num_owned_memories: 0,
             num_defined_globals: 0,
             num_escaped_funcs: 0,
         });
diff --git a/crates/runtime/src/instance.rs b/crates/runtime/src/instance.rs
index 854d4e0179..1ae33e3e92 100644
--- a/crates/runtime/src/instance.rs
+++ b/crates/runtime/src/instance.rs
@@ -26,7 +26,7 @@ use std::ops::Range;
 use std::ptr::NonNull;
 use std::sync::atomic::AtomicU64;
 use std::sync::Arc;
-use std::{mem, ptr, slice};
+use std::{mem, ptr};
 use wasmtime_environ::{
     packed_option::ReservedValue, DataIndex, DefinedGlobalIndex, DefinedMemoryIndex,
     DefinedTableIndex, ElemIndex, EntityIndex, EntityRef, EntitySet, FuncIndex, GlobalIndex,
@@ -193,13 +193,13 @@ impl Instance {
             self.memory(defined_index)
         } else {
             let import = self.imported_memory(index);
-            *unsafe { import.from.as_ref().unwrap() }
+            unsafe { VMMemoryDefinition::load(import.from) }
         }
     }
 
     /// Return the indexed `VMMemoryDefinition`.
     fn memory(&self, index: DefinedMemoryIndex) -> VMMemoryDefinition {
-        unsafe { *self.memory_ptr(index) }
+        unsafe { VMMemoryDefinition::load(self.memory_ptr(index)) }
     }
 
     /// Set the indexed memory to `VMMemoryDefinition`.
@@ -211,7 +211,7 @@ impl Instance {
 
     /// Return the indexed `VMMemoryDefinition`.
     fn memory_ptr(&self, index: DefinedMemoryIndex) -> *mut VMMemoryDefinition {
-        unsafe { self.vmctx_plus_offset(self.offsets.vmctx_vmmemory_definition(index)) }
+        unsafe { *self.vmctx_plus_offset(self.offsets.vmctx_vmmemory_pointer(index)) }
     }
 
     /// Return the indexed `VMGlobalDefinition`.
@@ -309,17 +309,18 @@ impl Instance {
     }
 
     fn get_exported_memory(&mut self, index: MemoryIndex) -> ExportMemory {
-        let (definition, vmctx) = if let Some(def_index) = self.module().defined_memory_index(index)
-        {
-            (self.memory_ptr(def_index), self.vmctx_ptr())
-        } else {
-            let import = self.imported_memory(index);
-            (import.from, import.vmctx)
-        };
+        let (definition, vmctx, def_index) =
+            if let Some(def_index) = self.module().defined_memory_index(index) {
+                (self.memory_ptr(def_index), self.vmctx_ptr(), def_index)
+            } else {
+                let import = self.imported_memory(index);
+                (import.from, import.vmctx, import.index)
+            };
         ExportMemory {
             definition,
             vmctx,
             memory: self.module().memory_plans[index].clone(),
+            index: def_index,
         }
     }
 
@@ -369,19 +370,6 @@ impl Instance {
         index
     }
 
-    /// Return the memory index for the given `VMMemoryDefinition`.
-    unsafe fn memory_index(&self, memory: &VMMemoryDefinition) -> DefinedMemoryIndex {
-        let index = DefinedMemoryIndex::new(
-            usize::try_from(
-                (memory as *const VMMemoryDefinition)
-                    .offset_from(self.memory_ptr(DefinedMemoryIndex::new(0))),
-            )
-            .unwrap(),
-        );
-        assert_lt!(index.index(), self.memories.len());
-        index
-    }
-
     /// Grow memory by the specified amount of pages.
     ///
     /// Returns `None` if memory can't be grown by the specified amount
@@ -398,20 +386,20 @@ impl Instance {
             let import = self.imported_memory(index);
             unsafe {
                 let foreign_instance = (*import.vmctx).instance_mut();
-                let foreign_memory_def = &*import.from;
-                let foreign_memory_index = foreign_instance.memory_index(foreign_memory_def);
-                (foreign_memory_index, foreign_instance)
+                (import.index, foreign_instance)
             }
         };
         let store = unsafe { &mut *instance.store() };
         let memory = &mut instance.memories[idx];
 
-        let result = unsafe { memory.grow(delta, store) };
-        let vmmemory = memory.vmmemory();
+        let result = unsafe { memory.grow(delta, Some(store)) };
 
-        // Update the state used by wasm code in case the base pointer and/or
-        // the length changed.
-        instance.set_memory(idx, vmmemory);
+        // Update the state used by a non-shared Wasm memory in case the base
+        // pointer and/or the length changed.
+        if memory.as_shared_memory().is_none() {
+            let vmmemory = memory.vmmemory();
+            instance.set_memory(idx, vmmemory);
+        }
 
         result
     }
@@ -661,14 +649,16 @@ impl Instance {
         let src_mem = self.get_memory(src_index);
         let dst_mem = self.get_memory(dst_index);
 
-        let src = self.validate_inbounds(src_mem.current_length, src, len)?;
-        let dst = self.validate_inbounds(dst_mem.current_length, dst, len)?;
+        let src = self.validate_inbounds(src_mem.current_length(), src, len)?;
+        let dst = self.validate_inbounds(dst_mem.current_length(), dst, len)?;
 
         // Bounds and casts are checked above, by this point we know that
         // everything is safe.
         unsafe {
             let dst = dst_mem.base.add(dst);
             let src = src_mem.base.add(src);
+            // FIXME audit whether this is safe in the presence of shared memory
+            // (https://github.com/bytecodealliance/wasmtime/issues/4203).
             ptr::copy(src, dst, len as usize);
         }
 
@@ -701,12 +691,14 @@ impl Instance {
         len: u64,
     ) -> Result<(), Trap> {
         let memory = self.get_memory(memory_index);
-        let dst = self.validate_inbounds(memory.current_length, dst, len)?;
+        let dst = self.validate_inbounds(memory.current_length(), dst, len)?;
 
         // Bounds and casts are checked above, by this point we know that
         // everything is safe.
         unsafe {
             let dst = memory.base.add(dst);
+            // FIXME audit whether this is safe in the presence of shared memory
+            // (https://github.com/bytecodealliance/wasmtime/issues/4203).
             ptr::write_bytes(dst, val, len as usize);
         }
 
@@ -751,16 +743,16 @@ impl Instance {
 
         let memory = self.get_memory(memory_index);
         let data = self.wasm_data(range);
-        let dst = self.validate_inbounds(memory.current_length, dst, len.into())?;
+        let dst = self.validate_inbounds(memory.current_length(), dst, len.into())?;
         let src = self.validate_inbounds(data.len(), src.into(), len.into())?;
         let len = len as usize;
 
-        let src_slice = &data[src..(src + len)];
-
         unsafe {
+            let src_start = data.as_ptr().add(src);
             let dst_start = memory.base.add(dst);
-            let dst_slice = slice::from_raw_parts_mut(dst_start, len);
-            dst_slice.copy_from_slice(src_slice);
+            // FIXME audit whether this is safe in the presence of shared memory
+            // (https://github.com/bytecodealliance/wasmtime/issues/4203).
+            ptr::copy_nonoverlapping(src_start, dst_start, len);
         }
 
         Ok(())
@@ -935,10 +927,27 @@ impl Instance {
             ptr = ptr.add(1);
         }
 
-        // Initialize the defined memories
+        // Initialize the defined memories. This fills in both the
+        // `defined_memories` table and the `owned_memories` table at the same
+        // time. Entries in `defined_memories` hold a pointer to a definition
+        // (all memories) whereas the `owned_memories` hold the actual
+        // definitions of memories owned (not shared) in the module.
         let mut ptr = self.vmctx_plus_offset(self.offsets.vmctx_memories_begin());
+        let mut owned_ptr = self.vmctx_plus_offset(self.offsets.vmctx_owned_memories_begin());
         for i in 0..module.memory_plans.len() - module.num_imported_memories {
-            ptr::write(ptr, self.memories[DefinedMemoryIndex::new(i)].vmmemory());
+            let defined_memory_index = DefinedMemoryIndex::new(i);
+            let memory_index = module.memory_index(defined_memory_index);
+            if module.memory_plans[memory_index].memory.shared {
+                let def_ptr = self.memories[defined_memory_index]
+                    .as_shared_memory()
+                    .unwrap()
+                    .vmmemory_ptr_mut();
+                ptr::write(ptr, def_ptr);
+            } else {
+                ptr::write(owned_ptr, self.memories[defined_memory_index].vmmemory());
+                ptr::write(ptr, owned_ptr);
+                owned_ptr = owned_ptr.add(1);
+            }
             ptr = ptr.add(1);
         }
 
@@ -1104,11 +1113,6 @@ impl InstanceHandle {
         self.instance().host_state()
     }
 
-    /// Return the memory index for the given `VMMemoryDefinition` in this instance.
-    pub unsafe fn memory_index(&self, memory: &VMMemoryDefinition) -> DefinedMemoryIndex {
-        self.instance().memory_index(memory)
-    }
-
     /// Get a memory defined locally within this module.
     pub fn get_defined_memory(&mut self, index: DefinedMemoryIndex) -> *mut Memory {
         self.instance_mut().get_defined_memory(index)
diff --git a/crates/runtime/src/instance/allocator.rs b/crates/runtime/src/instance/allocator.rs
index 3e45687d61..bede02c42b 100644
--- a/crates/runtime/src/instance/allocator.rs
+++ b/crates/runtime/src/instance/allocator.rs
@@ -10,7 +10,6 @@ use std::alloc;
 use std::any::Any;
 use std::convert::TryFrom;
 use std::ptr;
-use std::slice;
 use std::sync::Arc;
 use thiserror::Error;
 use wasmtime_environ::{
@@ -315,7 +314,7 @@ fn check_memory_init_bounds(
             .and_then(|start| start.checked_add(init.data.len()));
 
         match end {
-            Some(end) if end <= memory.current_length => {
+            Some(end) if end <= memory.current_length() => {
                 // Initializer is in bounds
             }
             _ => {
@@ -331,7 +330,7 @@ fn check_memory_init_bounds(
 
 fn initialize_memories(instance: &mut Instance, module: &Module) -> Result<(), InstantiationError> {
     let memory_size_in_pages =
-        &|memory| (instance.get_memory(memory).current_length as u64) / u64::from(WASM_PAGE_SIZE);
+        &|memory| (instance.get_memory(memory).current_length() as u64) / u64::from(WASM_PAGE_SIZE);
 
     // Loads the `global` value and returns it as a `u64`, but sign-extends
     // 32-bit globals which can be used as the base for 32-bit memories.
@@ -372,10 +371,15 @@ fn initialize_memories(instance: &mut Instance, module: &Module) -> Result<(), I
                 }
             }
             let memory = instance.get_memory(memory_index);
-            let dst_slice =
-                unsafe { slice::from_raw_parts_mut(memory.base, memory.current_length) };
-            let dst = &mut dst_slice[usize::try_from(init.offset).unwrap()..][..init.data.len()];
-            dst.copy_from_slice(instance.wasm_data(init.data.clone()));
+
+            unsafe {
+                let src = instance.wasm_data(init.data.clone());
+                let dst = memory.base.add(usize::try_from(init.offset).unwrap());
+                // FIXME audit whether this is safe in the presence of shared
+                // memory
+                // (https://github.com/bytecodealliance/wasmtime/issues/4203).
+                ptr::copy_nonoverlapping(src.as_ptr(), dst, src.len())
+            }
             true
         },
     );
@@ -513,6 +517,36 @@ impl Default for OnDemandInstanceAllocator {
     }
 }
 
+/// Allocate an instance containing a single memory.
+///
+/// In order to import a [`Memory`] into a WebAssembly instance, Wasmtime
+/// requires that memory to exist in its own instance. Here we bring to life
+/// such a "Frankenstein" instance with the only purpose of exporting a
+/// [`Memory`].
+pub unsafe fn allocate_single_memory_instance(
+    req: InstanceAllocationRequest,
+    memory: Memory,
+) -> Result<InstanceHandle, InstantiationError> {
+    let mut memories = PrimaryMap::default();
+    memories.push(memory);
+    let tables = PrimaryMap::default();
+    let module = req.runtime_info.module();
+    let offsets = VMOffsets::new(HostPtr, module);
+    let layout = Instance::alloc_layout(&offsets);
+    let instance = alloc::alloc(layout) as *mut Instance;
+    Instance::new_at(instance, layout.size(), offsets, req, memories, tables);
+    Ok(InstanceHandle { instance })
+}
+
+/// Internal implementation of [`InstanceHandle`] deallocation.
+///
+/// See [`InstanceAllocator::deallocate()`] for more details.
+pub unsafe fn deallocate(handle: &InstanceHandle) {
+    let layout = Instance::alloc_layout(&handle.instance().offsets);
+    ptr::drop_in_place(handle.instance);
+    alloc::dealloc(handle.instance.cast(), layout);
+}
+
 unsafe impl InstanceAllocator for OnDemandInstanceAllocator {
     unsafe fn allocate(
         &self,
@@ -542,9 +576,7 @@ unsafe impl InstanceAllocator for OnDemandInstanceAllocator {
     }
 
     unsafe fn deallocate(&self, handle: &InstanceHandle) {
-        let layout = Instance::alloc_layout(&handle.instance().offsets);
-        ptr::drop_in_place(handle.instance);
-        alloc::dealloc(handle.instance.cast(), layout);
+        deallocate(handle)
     }
 
     #[cfg(feature = "async")]
diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs
index 0da234d2fe..e8ed16c6a3 100644
--- a/crates/runtime/src/lib.rs
+++ b/crates/runtime/src/lib.rs
@@ -52,12 +52,14 @@ pub use crate::export::*;
 pub use crate::externref::*;
 pub use crate::imports::Imports;
 pub use crate::instance::{
-    InstanceAllocationRequest, InstanceAllocator, InstanceHandle, InstantiationError, LinkError,
-    OnDemandInstanceAllocator, StorePtr,
+    allocate_single_memory_instance, InstanceAllocationRequest, InstanceAllocator, InstanceHandle,
+    InstantiationError, LinkError, OnDemandInstanceAllocator, StorePtr,
 };
 #[cfg(feature = "pooling-allocator")]
 pub use crate::instance::{InstanceLimits, PoolingAllocationStrategy, PoolingInstanceAllocator};
-pub use crate::memory::{DefaultMemoryCreator, Memory, RuntimeLinearMemory, RuntimeMemoryCreator};
+pub use crate::memory::{
+    DefaultMemoryCreator, Memory, RuntimeLinearMemory, RuntimeMemoryCreator, SharedMemory,
+};
 pub use crate::mmap::Mmap;
 pub use crate::mmap_vec::MmapVec;
 pub use crate::table::{Table, TableElement};
diff --git a/crates/runtime/src/libcalls.rs b/crates/runtime/src/libcalls.rs
index d1a75286c9..6173d9f83e 100644
--- a/crates/runtime/src/libcalls.rs
+++ b/crates/runtime/src/libcalls.rs
@@ -586,7 +586,7 @@ unsafe fn validate_atomic_addr(
     memory: MemoryIndex,
     addr: usize,
 ) -> Result<(), Trap> {
-    if addr > instance.get_memory(memory).current_length {
+    if addr > instance.get_memory(memory).current_length() {
         return Err(Trap::wasm(TrapCode::HeapOutOfBounds));
     }
     Ok(())
diff --git a/crates/runtime/src/memory.rs b/crates/runtime/src/memory.rs
index 0d402dad4e..6fb822e153 100644
--- a/crates/runtime/src/memory.rs
+++ b/crates/runtime/src/memory.rs
@@ -11,7 +11,8 @@ use anyhow::Error;
 use anyhow::{bail, format_err, Result};
 use more_asserts::{assert_ge, assert_le};
 use std::convert::TryFrom;
-use std::sync::Arc;
+use std::sync::atomic::Ordering;
+use std::sync::{Arc, RwLock};
 use wasmtime_environ::{MemoryPlan, MemoryStyle, WASM32_MAX_PAGES, WASM64_MAX_PAGES};
 
 const WASM_PAGE_SIZE: usize = wasmtime_environ::WASM_PAGE_SIZE as usize;
@@ -60,6 +61,81 @@ pub trait RuntimeLinearMemory: Send + Sync {
     /// Returns `None` if the memory is unbounded.
     fn maximum_byte_size(&self) -> Option<usize>;
 
+    /// Grows a memory by `delta_pages`.
+    ///
+    /// This performs the necessary checks on the growth before delegating to
+    /// the underlying `grow_to` implementation. A default implementation of
+    /// this memory is provided here since this is assumed to be the same for
+    /// most kinds of memory; one exception is shared memory, which must perform
+    /// all the steps of the default implementation *plus* the required locking.
+    ///
+    /// The `store` is used only for error reporting.
+    fn grow(
+        &mut self,
+        delta_pages: u64,
+        mut store: Option<&mut dyn Store>,
+    ) -> Result<Option<(usize, usize)>, Error> {
+        let old_byte_size = self.byte_size();
+
+        // Wasm spec: when growing by 0 pages, always return the current size.
+        if delta_pages == 0 {
+            return Ok(Some((old_byte_size, old_byte_size)));
+        }
+
+        // The largest wasm-page-aligned region of memory is possible to
+        // represent in a `usize`. This will be impossible for the system to
+        // actually allocate.
+        let absolute_max = 0usize.wrapping_sub(WASM_PAGE_SIZE);
+
+        // Calculate the byte size of the new allocation. Let it overflow up to
+        // `usize::MAX`, then clamp it down to `absolute_max`.
+        let new_byte_size = usize::try_from(delta_pages)
+            .unwrap_or(usize::MAX)
+            .saturating_mul(WASM_PAGE_SIZE)
+            .saturating_add(old_byte_size);
+        let new_byte_size = if new_byte_size > absolute_max {
+            absolute_max
+        } else {
+            new_byte_size
+        };
+
+        let maximum = self.maximum_byte_size();
+        // Store limiter gets first chance to reject memory_growing.
+        if let Some(store) = &mut store {
+            if !store.memory_growing(old_byte_size, new_byte_size, maximum)? {
+                return Ok(None);
+            }
+        }
+
+        // Never exceed maximum, even if limiter permitted it.
+        if let Some(max) = maximum {
+            if new_byte_size > max {
+                if let Some(store) = store {
+                    // FIXME: shared memories may not have an associated store
+                    // to report the growth failure to but the error should not
+                    // be dropped
+                    // (https://github.com/bytecodealliance/wasmtime/issues/4240).
+                    store.memory_grow_failed(&format_err!("Memory maximum size exceeded"));
+                }
+                return Ok(None);
+            }
+        }
+
+        match self.grow_to(new_byte_size) {
+            Ok(_) => Ok(Some((old_byte_size, new_byte_size))),
+            Err(e) => {
+                // FIXME: shared memories may not have an associated store to
+                // report the growth failure to but the error should not be
+                // dropped
+                // (https://github.com/bytecodealliance/wasmtime/issues/4240).
+                if let Some(store) = store {
+                    store.memory_grow_failed(&e);
+                }
+                Ok(None)
+            }
+        }
+    }
+
     /// Grow memory to the specified amount of bytes.
     ///
     /// Returns an error if memory can't be grown by the specified amount
@@ -77,7 +153,6 @@ pub trait RuntimeLinearMemory: Send + Sync {
 
     /// For the pooling allocator, we must be able to downcast this trait to its
     /// underlying structure.
-    #[cfg(feature = "pooling-allocator")]
     fn as_any_mut(&mut self) -> &mut dyn std::any::Any;
 }
 
@@ -114,7 +189,8 @@ pub struct MmapMemory {
 }
 
 impl MmapMemory {
-    /// Create a new linear memory instance with specified minimum and maximum number of wasm pages.
+    /// Create a new linear memory instance with specified minimum and maximum
+    /// number of wasm pages.
     pub fn new(
         plan: &MemoryPlan,
         minimum: usize,
@@ -145,13 +221,14 @@ impl MmapMemory {
                 (bound_bytes, 0)
             }
         };
+
         let request_bytes = pre_guard_bytes
             .checked_add(alloc_bytes)
             .and_then(|i| i.checked_add(extra_to_reserve_on_growth))
             .and_then(|i| i.checked_add(offset_guard_bytes))
             .ok_or_else(|| format_err!("cannot allocate {} with guard regions", minimum))?;
-
         let mut mmap = Mmap::accessible_reserved(0, request_bytes)?;
+
         if minimum > 0 {
             mmap.make_accessible(pre_guard_bytes, minimum)?;
         }
@@ -250,7 +327,7 @@ impl RuntimeLinearMemory for MmapMemory {
     fn vmmemory(&mut self) -> VMMemoryDefinition {
         VMMemoryDefinition {
             base: unsafe { self.mmap.as_mut_ptr().add(self.pre_guard_size) },
-            current_length: self.accessible,
+            current_length: self.accessible.into(),
         }
     }
 
@@ -260,7 +337,6 @@ impl RuntimeLinearMemory for MmapMemory {
         self.memory_image.is_none()
     }
 
-    #[cfg(feature = "pooling-allocator")]
     fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
         self
     }
@@ -268,7 +344,7 @@ impl RuntimeLinearMemory for MmapMemory {
 
 /// A "static" memory where the lifetime of the backing memory is managed
 /// elsewhere. Currently used with the pooling allocator.
-struct ExternalMemory {
+struct StaticMemory {
     /// The memory in the host for this wasm memory. The length of this
     /// slice is the maximum size of the memory that can be grown to.
     base: &'static mut [u8],
@@ -286,7 +362,7 @@ struct ExternalMemory {
     memory_image: Option<MemoryImageSlot>,
 }
 
-impl ExternalMemory {
+impl StaticMemory {
     fn new(
         base: &'static mut [u8],
         initial_size: usize,
@@ -324,7 +400,7 @@ impl ExternalMemory {
     }
 }
 
-impl RuntimeLinearMemory for ExternalMemory {
+impl RuntimeLinearMemory for StaticMemory {
     fn byte_size(&self) -> usize {
         self.size
     }
@@ -362,7 +438,7 @@ impl RuntimeLinearMemory for ExternalMemory {
     fn vmmemory(&mut self) -> VMMemoryDefinition {
         VMMemoryDefinition {
             base: self.base.as_mut_ptr().cast(),
-            current_length: self.size,
+            current_length: self.size.into(),
         }
     }
 
@@ -374,7 +450,150 @@ impl RuntimeLinearMemory for ExternalMemory {
         }
     }
 
-    #[cfg(feature = "pooling-allocator")]
+    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
+        self
+    }
+}
+
+/// For shared memory (and only for shared memory), this lock-version restricts
+/// access when growing the memory or checking its size. This is to conform with
+/// the [thread proposal]: "When `IsSharedArrayBuffer(...)` is true, the return
+/// value should be the result of an atomic read-modify-write of the new size to
+/// the internal `length` slot."
+///
+/// [thread proposal]:
+///     https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md#webassemblymemoryprototypegrow
+#[derive(Clone)]
+pub struct SharedMemory(Arc<RwLock<SharedMemoryInner>>);
+impl SharedMemory {
+    /// Construct a new [`SharedMemory`].
+    pub fn new(plan: MemoryPlan) -> Result<Self> {
+        let (minimum_bytes, maximum_bytes) = Memory::limit_new(&plan, None)?;
+        let mmap_memory = MmapMemory::new(&plan, minimum_bytes, maximum_bytes, None)?;
+        Ok(Self::wrap(&plan, Box::new(mmap_memory), plan.memory))
+    }
+
+    /// Wrap an existing [Memory] with the locking provided by a [SharedMemory].
+    pub fn wrap(
+        plan: &MemoryPlan,
+        mut memory: Box<dyn RuntimeLinearMemory>,
+        ty: wasmtime_environ::Memory,
+    ) -> Self {
+        assert!(ty.shared);
+        assert!(matches!(plan.style, MemoryStyle::Static { .. }));
+        assert!(
+            memory.as_any_mut().type_id() != std::any::TypeId::of::<SharedMemory>(),
+            "cannot re-wrap a shared memory"
+        );
+        let def = LongTermVMMemoryDefinition(memory.vmmemory());
+        Self(Arc::new(RwLock::new(SharedMemoryInner {
+            memory: memory,
+            ty,
+            def,
+        })))
+    }
+
+    /// Return the memory type for this [`SharedMemory`].
+    pub fn ty(&self) -> wasmtime_environ::Memory {
+        self.0.read().unwrap().ty
+    }
+
+    /// Convert this shared memory into a [`Memory`].
+    pub fn as_memory(self) -> Memory {
+        Memory(Box::new(self))
+    }
+
+    /// Return a mutable pointer to the shared memory's [VMMemoryDefinition].
+    pub fn vmmemory_ptr_mut(&mut self) -> *mut VMMemoryDefinition {
+        &self.0.read().unwrap().def.0 as *const _ as *mut _
+    }
+
+    /// Return a pointer to the shared memory's [VMMemoryDefinition].
+    pub fn vmmemory_ptr(&self) -> *const VMMemoryDefinition {
+        &self.0.read().unwrap().def.0 as *const _
+    }
+}
+
+struct SharedMemoryInner {
+    memory: Box<dyn RuntimeLinearMemory>,
+    ty: wasmtime_environ::Memory,
+    def: LongTermVMMemoryDefinition,
+}
+
+/// Shared memory needs some representation of a `VMMemoryDefinition` for
+/// JIT-generated code to access. This structure owns the base pointer and
+/// length to the actual memory and we share this definition across threads by:
+/// - never changing the base pointer; according to the specification, shared
+///   memory must be created with a known maximum size so it can be allocated
+///   once and never moved
+/// - carefully changing the length, using atomic accesses in both the runtime
+///   and JIT-generated code.
+struct LongTermVMMemoryDefinition(VMMemoryDefinition);
+unsafe impl Send for LongTermVMMemoryDefinition {}
+unsafe impl Sync for LongTermVMMemoryDefinition {}
+
+/// Proxy all calls through the [`RwLock`].
+impl RuntimeLinearMemory for SharedMemory {
+    fn byte_size(&self) -> usize {
+        self.0.read().unwrap().memory.byte_size()
+    }
+
+    fn maximum_byte_size(&self) -> Option<usize> {
+        self.0.read().unwrap().memory.maximum_byte_size()
+    }
+
+    fn grow(
+        &mut self,
+        delta_pages: u64,
+        store: Option<&mut dyn Store>,
+    ) -> Result<Option<(usize, usize)>, Error> {
+        let mut inner = self.0.write().unwrap();
+        let result = inner.memory.grow(delta_pages, store)?;
+        if let Some((_old_size_in_bytes, new_size_in_bytes)) = result {
+            // Store the new size to the `VMMemoryDefinition` for JIT-generated
+            // code (and runtime functions) to access. No other code can be
+            // growing this memory due to the write lock, but code in other
+            // threads could have access to this shared memory and we want them
+            // to see the most consistent version of the `current_length`; a
+            // weaker consistency is possible if we accept them seeing an older,
+            // smaller memory size (assumption: memory only grows) but presently
+            // we are aiming for accuracy.
+            //
+            // Note that it could be possible to access a memory address that is
+            // now-valid due to changes to the page flags in `grow` above but
+            // beyond the `memory.size` that we are about to assign to. In these
+            // and similar cases, discussion in the thread proposal concluded
+            // that: "multiple accesses in one thread racing with another
+            // thread's `memory.grow` that are in-bounds only after the grow
+            // commits may independently succeed or trap" (see
+            // https://github.com/WebAssembly/threads/issues/26#issuecomment-433930711).
+            // In other words, some non-determinism is acceptable when using
+            // `memory.size` on work being done by `memory.grow`.
+            inner
+                .def
+                .0
+                .current_length
+                .store(new_size_in_bytes, Ordering::SeqCst);
+        }
+        Ok(result)
+    }
+
+    fn grow_to(&mut self, size: usize) -> Result<()> {
+        self.0.write().unwrap().memory.grow_to(size)
+    }
+
+    fn vmmemory(&mut self) -> VMMemoryDefinition {
+        // `vmmemory()` is used for writing the `VMMemoryDefinition` of a memory
+        // into its `VMContext`; this should never be possible for a shared
+        // memory because the only `VMMemoryDefinition` for it should be stored
+        // in its own `def` field.
+        unreachable!()
+    }
+
+    fn needs_init(&self) -> bool {
+        self.0.read().unwrap().memory.needs_init()
+    }
+
     fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
         self
     }
@@ -391,13 +610,14 @@ impl Memory {
         store: &mut dyn Store,
         memory_image: Option<&Arc<MemoryImage>>,
     ) -> Result<Self> {
-        let (minimum, maximum) = Self::limit_new(plan, store)?;
-        Ok(Memory(creator.new_memory(
-            plan,
-            minimum,
-            maximum,
-            memory_image,
-        )?))
+        let (minimum, maximum) = Self::limit_new(plan, Some(store))?;
+        let allocation = creator.new_memory(plan, minimum, maximum, memory_image)?;
+        let allocation = if plan.memory.shared {
+            Box::new(SharedMemory::wrap(plan, allocation, plan.memory))
+        } else {
+            allocation
+        };
+        Ok(Memory(allocation))
     }
 
     /// Create a new static (immovable) memory instance for the specified plan.
@@ -408,17 +628,30 @@ impl Memory {
         memory_image: Option<MemoryImageSlot>,
         store: &mut dyn Store,
     ) -> Result<Self> {
-        let (minimum, maximum) = Self::limit_new(plan, store)?;
+        let (minimum, maximum) = Self::limit_new(plan, Some(store))?;
         let pooled_memory =
-            ExternalMemory::new(base, minimum, maximum, make_accessible, memory_image)?;
-        Ok(Memory(Box::new(pooled_memory)))
+            StaticMemory::new(base, minimum, maximum, make_accessible, memory_image)?;
+        let allocation = Box::new(pooled_memory);
+        let allocation: Box<dyn RuntimeLinearMemory> = if plan.memory.shared {
+            // FIXME: since the pooling allocator owns the memory allocation
+            // (which is torn down with the instance), the current shared memory
+            // implementation will cause problems; see
+            // https://github.com/bytecodealliance/wasmtime/issues/4244.
+            todo!("using shared memory with the pooling allocator is a work in progress");
+        } else {
+            allocation
+        };
+        Ok(Memory(allocation))
     }
 
     /// Calls the `store`'s limiter to optionally prevent a memory from being allocated.
     ///
     /// Returns the minimum size and optional maximum size of the memory, in
     /// bytes.
-    fn limit_new(plan: &MemoryPlan, store: &mut dyn Store) -> Result<(usize, Option<usize>)> {
+    fn limit_new(
+        plan: &MemoryPlan,
+        store: Option<&mut dyn Store>,
+    ) -> Result<(usize, Option<usize>)> {
         // Sanity-check what should already be true from wasm module validation.
         let absolute_max = if plan.memory.memory64 {
             WASM64_MAX_PAGES
@@ -473,17 +706,24 @@ impl Memory {
             maximum = usize::try_from(1u64 << 32).ok();
         }
 
-        // Inform the store's limiter what's about to happen. This will let the limiter
-        // reject anything if necessary, and this also guarantees that we should
-        // call the limiter for all requested memories, even if our `minimum`
-        // calculation overflowed. This means that the `minimum` we're informing
-        // the limiter is lossy and may not be 100% accurate, but for now the
-        // expected uses of limiter means that's ok.
-        if !store.memory_growing(0, minimum.unwrap_or(absolute_max), maximum)? {
-            bail!(
-                "memory minimum size of {} pages exceeds memory limits",
-                plan.memory.minimum
-            );
+        // Inform the store's limiter what's about to happen. This will let the
+        // limiter reject anything if necessary, and this also guarantees that
+        // we should call the limiter for all requested memories, even if our
+        // `minimum` calculation overflowed. This means that the `minimum` we're
+        // informing the limiter is lossy and may not be 100% accurate, but for
+        // now the expected uses of limiter means that's ok.
+        if let Some(store) = store {
+            // We ignore the store limits for shared memories since they are
+            // technically not created within a store (though, trickily, they
+            // may be associated with one in order to get a `vmctx`).
+            if !plan.memory.shared {
+                if !store.memory_growing(0, minimum.unwrap_or(absolute_max), maximum)? {
+                    bail!(
+                        "memory minimum size of {} pages exceeds memory limits",
+                        plan.memory.minimum
+                    );
+                }
+            }
         }
 
         // At this point we need to actually handle overflows, so bail out with
@@ -539,52 +779,11 @@ impl Memory {
     pub unsafe fn grow(
         &mut self,
         delta_pages: u64,
-        store: &mut dyn Store,
+        store: Option<&mut dyn Store>,
     ) -> Result<Option<usize>, Error> {
-        let old_byte_size = self.byte_size();
-
-        // Wasm spec: when growing by 0 pages, always return the current size.
-        if delta_pages == 0 {
-            return Ok(Some(old_byte_size));
-        }
-
-        // largest wasm-page-aligned region of memory it is possible to
-        // represent in a usize. This will be impossible for the system to
-        // actually allocate.
-        let absolute_max = 0usize.wrapping_sub(WASM_PAGE_SIZE);
-        // calculate byte size of the new allocation. Let it overflow up to
-        // usize::MAX, then clamp it down to absolute_max.
-        let new_byte_size = usize::try_from(delta_pages)
-            .unwrap_or(usize::MAX)
-            .saturating_mul(WASM_PAGE_SIZE)
-            .saturating_add(old_byte_size);
-        let new_byte_size = if new_byte_size > absolute_max {
-            absolute_max
-        } else {
-            new_byte_size
-        };
-
-        let maximum = self.maximum_byte_size();
-        // Store limiter gets first chance to reject memory_growing.
-        if !store.memory_growing(old_byte_size, new_byte_size, maximum)? {
-            return Ok(None);
-        }
-
-        // Never exceed maximum, even if limiter permitted it.
-        if let Some(max) = maximum {
-            if new_byte_size > max {
-                store.memory_grow_failed(&format_err!("Memory maximum size exceeded"));
-                return Ok(None);
-            }
-        }
-
-        match self.0.grow_to(new_byte_size) {
-            Ok(_) => Ok(Some(old_byte_size)),
-            Err(e) => {
-                store.memory_grow_failed(&e);
-                Ok(None)
-            }
-        }
+        self.0
+            .grow(delta_pages, store)
+            .map(|opt| opt.map(|(old, _new)| old))
     }
 
     /// Return a `VMMemoryDefinition` for exposing the memory to compiled wasm code.
@@ -597,7 +796,7 @@ impl Memory {
     #[cfg(feature = "pooling-allocator")]
     pub fn is_static(&mut self) -> bool {
         let as_any = self.0.as_any_mut();
-        as_any.downcast_ref::<ExternalMemory>().is_some()
+        as_any.downcast_ref::<StaticMemory>().is_some()
     }
 
     /// Consume the memory, returning its [`MemoryImageSlot`] if any is present.
@@ -606,10 +805,21 @@ impl Memory {
     #[cfg(feature = "pooling-allocator")]
     pub fn unwrap_static_image(mut self) -> Option<MemoryImageSlot> {
         let as_any = self.0.as_any_mut();
-        if let Some(m) = as_any.downcast_mut::<ExternalMemory>() {
+        if let Some(m) = as_any.downcast_mut::<StaticMemory>() {
             std::mem::take(&mut m.memory_image)
         } else {
             None
         }
     }
+
+    /// If the [Memory] is a [SharedMemory], unwrap it and return a clone to
+    /// that shared memory.
+    pub fn as_shared_memory(&mut self) -> Option<SharedMemory> {
+        let as_any = self.0.as_any_mut();
+        if let Some(m) = as_any.downcast_mut::<SharedMemory>() {
+            Some(m.clone())
+        } else {
+            None
+        }
+    }
 }
diff --git a/crates/runtime/src/vmcontext.rs b/crates/runtime/src/vmcontext.rs
index 757520c2e0..12e4a41304 100644
--- a/crates/runtime/src/vmcontext.rs
+++ b/crates/runtime/src/vmcontext.rs
@@ -7,7 +7,9 @@ use std::any::Any;
 use std::cell::UnsafeCell;
 use std::marker;
 use std::ptr::NonNull;
+use std::sync::atomic::{AtomicUsize, Ordering};
 use std::u32;
+use wasmtime_environ::DefinedMemoryIndex;
 
 pub const VMCONTEXT_MAGIC: u32 = u32::from_le_bytes(*b"core");
 
@@ -129,6 +131,9 @@ pub struct VMMemoryImport {
 
     /// A pointer to the `VMContext` that owns the memory description.
     pub vmctx: *mut VMContext,
+
+    /// The index of the memory in the containing `vmctx`.
+    pub index: DefinedMemoryIndex,
 }
 
 // Declare that this type is send/sync, it's the responsibility of users of
@@ -205,14 +210,41 @@ mod test_vmglobal_import {
 /// The fields compiled code needs to access to utilize a WebAssembly linear
 /// memory defined within the instance, namely the start address and the
 /// size in bytes.
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug)]
 #[repr(C)]
 pub struct VMMemoryDefinition {
     /// The start address.
     pub base: *mut u8,
 
     /// The current logical size of this linear memory in bytes.
-    pub current_length: usize,
+    ///
+    /// This is atomic because shared memories must be able to grow their length
+    /// atomically. For relaxed access, see
+    /// [`VMMemoryDefinition::current_length()`].
+    pub current_length: AtomicUsize,
+}
+
+impl VMMemoryDefinition {
+    /// Return the current length of the [`VMMemoryDefinition`] by performing a
+    /// relaxed load; do not use this function for situations in which a precise
+    /// length is needed. Owned memories (i.e., non-shared) will always return a
+    /// precise result (since no concurrent modification is possible) but shared
+    /// memories may see an imprecise value--a `current_length` potentially
+    /// smaller than what some other thread observes. Since Wasm memory only
+    /// grows, this under-estimation may be acceptable in certain cases.
+    pub fn current_length(&self) -> usize {
+        self.current_length.load(Ordering::Relaxed)
+    }
+
+    /// Return a copy of the [`VMMemoryDefinition`] using the relaxed value of
+    /// `current_length`; see [`VMMemoryDefinition::current_length()`].
+    pub unsafe fn load(ptr: *mut Self) -> Self {
+        let other = &*ptr;
+        VMMemoryDefinition {
+            base: other.base,
+            current_length: other.current_length().into(),
+        }
+    }
 }
 
 #[cfg(test)]
diff --git a/crates/types/src/lib.rs b/crates/types/src/lib.rs
index b3bac9ec9a..a41ec5ad0c 100644
--- a/crates/types/src/lib.rs
+++ b/crates/types/src/lib.rs
@@ -164,6 +164,11 @@ entity_impl!(DefinedTableIndex);
 pub struct DefinedMemoryIndex(u32);
 entity_impl!(DefinedMemoryIndex);
 
+/// Index type of a defined memory inside the WebAssembly module.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Serialize, Deserialize)]
+pub struct OwnedMemoryIndex(u32);
+entity_impl!(OwnedMemoryIndex);
+
 /// Index type of a defined global inside the WebAssembly module.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Serialize, Deserialize)]
 pub struct DefinedGlobalIndex(u32);
diff --git a/crates/wasmtime/src/component/func/options.rs b/crates/wasmtime/src/component/func/options.rs
index 25fc61cef6..faa26cda40 100644
--- a/crates/wasmtime/src/component/func/options.rs
+++ b/crates/wasmtime/src/component/func/options.rs
@@ -128,7 +128,7 @@ impl Options {
         // is an optional configuration in canonical ABI options.
         unsafe {
             let memory = self.memory.unwrap().as_ref();
-            std::slice::from_raw_parts(memory.base, memory.current_length)
+            std::slice::from_raw_parts(memory.base, memory.current_length())
         }
     }
 
@@ -139,7 +139,7 @@ impl Options {
         // See comments in `memory` about the unsafety
         unsafe {
             let memory = self.memory.unwrap().as_ref();
-            std::slice::from_raw_parts_mut(memory.base, memory.current_length)
+            std::slice::from_raw_parts_mut(memory.base, memory.current_length())
         }
     }
 
diff --git a/crates/wasmtime/src/externals.rs b/crates/wasmtime/src/externals.rs
index 6ad014af26..8942e87195 100644
--- a/crates/wasmtime/src/externals.rs
+++ b/crates/wasmtime/src/externals.rs
@@ -1,8 +1,8 @@
 use crate::store::{StoreData, StoreOpaque, Stored};
 use crate::trampoline::{generate_global_export, generate_table_export};
 use crate::{
-    AsContext, AsContextMut, ExternRef, ExternType, Func, GlobalType, Memory, Mutability,
-    TableType, Trap, Val, ValType,
+    AsContext, AsContextMut, Engine, ExternRef, ExternType, Func, GlobalType, Memory, Mutability,
+    SharedMemory, TableType, Trap, Val, ValType,
 };
 use anyhow::{anyhow, bail, Result};
 use std::mem;
@@ -29,6 +29,9 @@ pub enum Extern {
     Table(Table),
     /// A WebAssembly linear memory.
     Memory(Memory),
+    /// A WebAssembly shared memory; these are handled separately from
+    /// [`Memory`].
+    SharedMemory(SharedMemory),
 }
 
 impl Extern {
@@ -72,6 +75,17 @@ impl Extern {
         }
     }
 
+    /// Returns the underlying `SharedMemory`, if this external is a shared
+    /// memory.
+    ///
+    /// Returns `None` if this is not a shared memory.
+    pub fn into_shared_memory(self) -> Option<SharedMemory> {
+        match self {
+            Extern::SharedMemory(memory) => Some(memory),
+            _ => None,
+        }
+    }
+
     /// Returns the type associated with this `Extern`.
     ///
     /// The `store` argument provided must own this `Extern` and is used to look
@@ -85,6 +99,7 @@ impl Extern {
         match self {
             Extern::Func(ft) => ExternType::Func(ft.ty(store)),
             Extern::Memory(ft) => ExternType::Memory(ft.ty(store)),
+            Extern::SharedMemory(ft) => ExternType::Memory(ft.ty()),
             Extern::Table(tt) => ExternType::Table(tt.ty(store)),
             Extern::Global(gt) => ExternType::Global(gt.ty(store)),
         }
@@ -99,7 +114,11 @@ impl Extern {
                 Extern::Func(Func::from_wasmtime_function(f, store))
             }
             wasmtime_runtime::Export::Memory(m) => {
-                Extern::Memory(Memory::from_wasmtime_memory(m, store))
+                if m.memory.memory.shared {
+                    Extern::SharedMemory(SharedMemory::from_wasmtime_memory(m, store))
+                } else {
+                    Extern::Memory(Memory::from_wasmtime_memory(m, store))
+                }
             }
             wasmtime_runtime::Export::Global(g) => {
                 Extern::Global(Global::from_wasmtime_global(g, store))
@@ -115,6 +134,7 @@ impl Extern {
             Extern::Func(f) => f.comes_from_same_store(store),
             Extern::Global(g) => store.store_data().contains(g.0),
             Extern::Memory(m) => m.comes_from_same_store(store),
+            Extern::SharedMemory(m) => Engine::same(m.engine(), store.engine()),
             Extern::Table(t) => store.store_data().contains(t.0),
         }
     }
@@ -124,6 +144,7 @@ impl Extern {
             Extern::Func(_) => "function",
             Extern::Table(_) => "table",
             Extern::Memory(_) => "memory",
+            Extern::SharedMemory(_) => "shared memory",
             Extern::Global(_) => "global",
         }
     }
@@ -147,6 +168,12 @@ impl From<Memory> for Extern {
     }
 }
 
+impl From<SharedMemory> for Extern {
+    fn from(r: SharedMemory) -> Self {
+        Extern::SharedMemory(r)
+    }
+}
+
 impl From<Table> for Extern {
     fn from(r: Table) -> Self {
         Extern::Table(r)
diff --git a/crates/wasmtime/src/instance.rs b/crates/wasmtime/src/instance.rs
index b548f6016b..bf2cf4680e 100644
--- a/crates/wasmtime/src/instance.rs
+++ b/crates/wasmtime/src/instance.rs
@@ -2,8 +2,8 @@ use crate::linker::Definition;
 use crate::store::{InstanceId, StoreOpaque, Stored};
 use crate::types::matching;
 use crate::{
-    AsContextMut, Engine, Export, Extern, Func, Global, Memory, Module, StoreContextMut, Table,
-    Trap, TypedFunc,
+    AsContextMut, Engine, Export, Extern, Func, Global, Memory, Module, SharedMemory,
+    StoreContextMut, Table, Trap, TypedFunc,
 };
 use anyhow::{anyhow, bail, Context, Error, Result};
 use std::mem;
@@ -495,6 +495,23 @@ impl Instance {
         self.get_export(store, name)?.into_memory()
     }
 
+    /// Looks up an exported [`SharedMemory`] value by name.
+    ///
+    /// Returns `None` if there was no export named `name`, or if there was but
+    /// it wasn't a shared memory.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `store` does not own this instance.
+    pub fn get_shared_memory(
+        &self,
+        mut store: impl AsContextMut,
+        name: &str,
+    ) -> Option<SharedMemory> {
+        let mut store = store.as_context_mut();
+        self.get_export(&mut store, name)?.into_shared_memory()
+    }
+
     /// Looks up an exported [`Global`] value by name.
     ///
     /// Returns `None` if there was no export named `name`, or if there was but
@@ -566,6 +583,9 @@ impl OwnedImports {
             Extern::Memory(i) => {
                 self.memories.push(i.vmimport(store));
             }
+            Extern::SharedMemory(i) => {
+                self.memories.push(i.vmimport(store));
+            }
         }
     }
 
@@ -594,6 +614,7 @@ impl OwnedImports {
                 self.memories.push(VMMemoryImport {
                     from: m.definition,
                     vmctx: m.vmctx,
+                    index: m.index,
                 });
             }
         }
diff --git a/crates/wasmtime/src/limits.rs b/crates/wasmtime/src/limits.rs
index de0e1ffaca..afe679d131 100644
--- a/crates/wasmtime/src/limits.rs
+++ b/crates/wasmtime/src/limits.rs
@@ -244,7 +244,7 @@ impl StoreLimitsBuilder {
 /// Provides limits for a [`Store`](crate::Store).
 ///
 /// This type is created with a [`StoreLimitsBuilder`] and is typically used in
-/// conjuction with [`Store::limiter`](crate::Store::limiter).
+/// conjunction with [`Store::limiter`](crate::Store::limiter).
 ///
 /// This is a convenience type included to avoid needing to implement the
 /// [`ResourceLimiter`] trait if your use case fits in the static configuration
diff --git a/crates/wasmtime/src/memory.rs b/crates/wasmtime/src/memory.rs
index 596e11537f..396c9fae15 100644
--- a/crates/wasmtime/src/memory.rs
+++ b/crates/wasmtime/src/memory.rs
@@ -1,9 +1,11 @@
 use crate::store::{StoreData, StoreOpaque, Stored};
 use crate::trampoline::generate_memory_export;
-use crate::{AsContext, AsContextMut, MemoryType, StoreContext, StoreContextMut};
+use crate::{AsContext, AsContextMut, Engine, MemoryType, StoreContext, StoreContextMut};
 use anyhow::{bail, Result};
 use std::convert::TryFrom;
 use std::slice;
+use wasmtime_environ::MemoryPlan;
+use wasmtime_runtime::{RuntimeLinearMemory, VMMemoryImport};
 
 /// Error for out of bounds [`Memory`] access.
 #[derive(Debug)]
@@ -227,7 +229,7 @@ impl Memory {
     /// # }
     /// ```
     pub fn new(mut store: impl AsContextMut, ty: MemoryType) -> Result<Memory> {
-        Memory::_new(store.as_context_mut().0, ty)
+        Self::_new(store.as_context_mut().0, ty)
     }
 
     #[cfg_attr(nightlydoc, doc(cfg(feature = "async")))]
@@ -252,12 +254,13 @@ impl Memory {
             store.0.async_support(),
             "cannot use `new_async` without enabling async support on the config"
         );
-        store.on_fiber(|store| Memory::_new(store.0, ty)).await?
+        store.on_fiber(|store| Self::_new(store.0, ty)).await?
     }
 
+    /// Helper function for attaching the memory to a "frankenstein" instance
     fn _new(store: &mut StoreOpaque, ty: MemoryType) -> Result<Memory> {
         unsafe {
-            let export = generate_memory_export(store, &ty)?;
+            let export = generate_memory_export(store, &ty, None)?;
             Ok(Memory::from_wasmtime_memory(export, store))
         }
     }
@@ -350,8 +353,9 @@ impl Memory {
     pub fn data<'a, T: 'a>(&self, store: impl Into<StoreContext<'a, T>>) -> &'a [u8] {
         unsafe {
             let store = store.into();
-            let definition = *store[self.0].definition;
-            slice::from_raw_parts(definition.base, definition.current_length)
+            let definition = &*store[self.0].definition;
+            debug_assert!(!self.ty(store).is_shared());
+            slice::from_raw_parts(definition.base, definition.current_length())
         }
     }
 
@@ -366,8 +370,9 @@ impl Memory {
     pub fn data_mut<'a, T: 'a>(&self, store: impl Into<StoreContextMut<'a, T>>) -> &'a mut [u8] {
         unsafe {
             let store = store.into();
-            let definition = *store[self.0].definition;
-            slice::from_raw_parts_mut(definition.base, definition.current_length)
+            let definition = &*store[self.0].definition;
+            debug_assert!(!self.ty(store).is_shared());
+            slice::from_raw_parts_mut(definition.base, definition.current_length())
         }
     }
 
@@ -432,7 +437,7 @@ impl Memory {
     }
 
     pub(crate) fn internal_data_size(&self, store: &StoreOpaque) -> usize {
-        unsafe { (*store[self.0].definition).current_length }
+        unsafe { (*store[self.0].definition).current_length() }
     }
 
     /// Returns the size, in WebAssembly pages, of this wasm memory.
@@ -453,7 +458,7 @@ impl Memory {
     /// This will attempt to add `delta` more pages of memory on to the end of
     /// this `Memory` instance. If successful this may relocate the memory and
     /// cause [`Memory::data_ptr`] to return a new value. Additionally any
-    /// unsafetly constructed slices into this memory may no longer be valid.
+    /// unsafely constructed slices into this memory may no longer be valid.
     ///
     /// On success returns the number of pages this memory previously had
     /// before the growth succeeded.
@@ -498,7 +503,7 @@ impl Memory {
         let store = store.as_context_mut().0;
         let mem = self.wasmtime_memory(store);
         unsafe {
-            match (*mem).grow(delta, store)? {
+            match (*mem).grow(delta, Some(store))? {
                 Some(size) => {
                     let vm = (*mem).vmmemory();
                     *store[self.0].definition = vm;
@@ -533,12 +538,12 @@ impl Memory {
         );
         store.on_fiber(|store| self.grow(store, delta)).await?
     }
+
     fn wasmtime_memory(&self, store: &mut StoreOpaque) -> *mut wasmtime_runtime::Memory {
         unsafe {
             let export = &store[self.0];
             let mut handle = wasmtime_runtime::InstanceHandle::from_vmctx(export.vmctx);
-            let idx = handle.memory_index(&*export.definition);
-            handle.get_defined_memory(idx)
+            handle.get_defined_memory(export.index)
         }
     }
 
@@ -558,6 +563,7 @@ impl Memory {
         wasmtime_runtime::VMMemoryImport {
             from: export.definition,
             vmctx: export.vmctx,
+            index: export.index,
         }
     }
 
@@ -654,6 +660,164 @@ pub unsafe trait MemoryCreator: Send + Sync {
     ) -> Result<Box<dyn LinearMemory>, String>;
 }
 
+/// A constructor for externally-created shared memory.
+///
+/// The [threads proposal] adds the concept of "shared memory" to WebAssembly.
+/// This is much the same as a Wasm linear memory (i.e., [`Memory`]), but can be
+/// used concurrently by multiple agents. Because these agents may execute in
+/// different threads, [`SharedMemory`] must be thread-safe.
+///
+/// When the threads proposal is enabled, there are multiple ways to construct
+/// shared memory:
+///  1. for imported shared memory, e.g., `(import "env" "memory" (memory 1 1
+///     shared))`, the user must supply a [`SharedMemory`] with the
+///     externally-created memory as an import to the instance--e.g.,
+///     `shared_memory.into()`.
+///  2. for private or exported shared memory, e.g., `(export "env" "memory"
+///     (memory 1 1 shared))`, Wasmtime will create the memory internally during
+///     instantiation--access using `Instance::get_shared_memory()`.
+///
+/// [threads proposal]:
+///     https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md
+///
+/// # Examples
+///
+/// ```
+/// # use wasmtime::*;
+/// # fn main() -> anyhow::Result<()> {
+/// let mut config = Config::new();
+/// config.wasm_threads(true);
+/// let engine = Engine::new(&config)?;
+/// let mut store = Store::new(&engine, ());
+///
+/// let shared_memory = SharedMemory::new(&engine, MemoryType::shared(1, 2))?;
+/// let module = Module::new(&engine, r#"(module (memory (import "" "") 1 2 shared))"#)?;
+/// let instance = Instance::new(&mut store, &module, &[shared_memory.into()])?;
+/// // ...
+/// # Ok(())
+/// # }
+/// ```
+#[derive(Clone)]
+pub struct SharedMemory(wasmtime_runtime::SharedMemory, Engine);
+impl SharedMemory {
+    /// Construct a [`SharedMemory`] by providing both the `minimum` and
+    /// `maximum` number of 64K-sized pages. This call allocates the necessary
+    /// pages on the system.
+    pub fn new(engine: &Engine, ty: MemoryType) -> Result<Self> {
+        if !ty.is_shared() {
+            bail!("shared memory must have the `shared` flag enabled on its memory type")
+        }
+        debug_assert!(ty.maximum().is_some());
+
+        let tunables = &engine.config().tunables;
+        let plan = MemoryPlan::for_memory(ty.wasmtime_memory().clone(), tunables);
+        let memory = wasmtime_runtime::SharedMemory::new(plan)?;
+        Ok(Self(memory, engine.clone()))
+    }
+
+    /// Return the type of the shared memory.
+    pub fn ty(&self) -> MemoryType {
+        MemoryType::from_wasmtime_memory(&self.0.ty())
+    }
+
+    /// Returns the size, in WebAssembly pages, of this wasm memory.
+    pub fn size(&self) -> u64 {
+        (self.data_size() / wasmtime_environ::WASM_PAGE_SIZE as usize) as u64
+    }
+
+    /// Returns the byte length of this memory.
+    ///
+    /// The returned value will be a multiple of the wasm page size, 64k.
+    ///
+    /// For more information and examples see the documentation on the
+    /// [`Memory`] type.
+    pub fn data_size(&self) -> usize {
+        self.0.byte_size()
+    }
+
+    /// Return access to the available portion of the shared memory.
+    ///
+    /// Because the memory is shared, it is possible that this memory is being
+    /// modified in other threads--in other words, the data can change at any
+    /// time. Users of this function must manage synchronization and locking to
+    /// this region of memory themselves.
+    ///
+    /// Not only can the data change, but the length of this region can change
+    /// as well. Other threads can call `memory.grow` operations that will
+    /// extend the region length but--importantly--this will not be reflected in
+    /// the size of region returned by this function.
+    pub fn data(&self) -> *mut [u8] {
+        unsafe {
+            let definition = &*self.0.vmmemory_ptr();
+            slice::from_raw_parts_mut(definition.base, definition.current_length())
+        }
+    }
+
+    /// Grows this WebAssembly memory by `delta` pages.
+    ///
+    /// This will attempt to add `delta` more pages of memory on to the end of
+    /// this `Memory` instance. If successful this may relocate the memory and
+    /// cause [`Memory::data_ptr`] to return a new value. Additionally any
+    /// unsafely constructed slices into this memory may no longer be valid.
+    ///
+    /// On success returns the number of pages this memory previously had
+    /// before the growth succeeded.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if memory could not be grown, for example if it exceeds
+    /// the maximum limits of this memory. A
+    /// [`ResourceLimiter`](crate::ResourceLimiter) is another example of
+    /// preventing a memory to grow.
+    pub fn grow(&mut self, delta: u64) -> Result<u64> {
+        match self.0.grow(delta, None)? {
+            Some((old_size, _new_size)) => {
+                // For shared memory, the `VMMemoryDefinition` is updated inside
+                // the locked region.
+                Ok(u64::try_from(old_size).unwrap() / u64::from(wasmtime_environ::WASM_PAGE_SIZE))
+            }
+            None => bail!("failed to grow memory by `{}`", delta),
+        }
+    }
+
+    /// Return a reference to the [`Engine`] used to configure the shared
+    /// memory.
+    pub(crate) fn engine(&self) -> &Engine {
+        &self.1
+    }
+
+    /// Construct a single-memory instance to provide a way to import
+    /// [`SharedMemory`] into other modules.
+    pub(crate) fn vmimport(&self, store: &mut StoreOpaque) -> wasmtime_runtime::VMMemoryImport {
+        let runtime_shared_memory = self.clone().0;
+        let export_memory =
+            generate_memory_export(store, &self.ty(), Some(runtime_shared_memory)).unwrap();
+        VMMemoryImport {
+            from: export_memory.definition,
+            vmctx: export_memory.vmctx,
+            index: export_memory.index,
+        }
+    }
+
+    /// Create a [`SharedMemory`] from an [`ExportMemory`] definition. This
+    /// function is available to handle the case in which a Wasm module exports
+    /// shared memory and the user wants host-side access to it.
+    pub(crate) unsafe fn from_wasmtime_memory(
+        wasmtime_export: wasmtime_runtime::ExportMemory,
+        store: &mut StoreOpaque,
+    ) -> Self {
+        let mut handle = wasmtime_runtime::InstanceHandle::from_vmctx(wasmtime_export.vmctx);
+        let memory = handle
+            .get_defined_memory(wasmtime_export.index)
+            .as_mut()
+            .unwrap();
+        let shared_memory = memory
+            .as_shared_memory()
+            .expect("unable to convert from a shared memory");
+        Self(shared_memory, store.engine().clone())
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use crate::*;
diff --git a/crates/wasmtime/src/trampoline.rs b/crates/wasmtime/src/trampoline.rs
index 357302633e..46c9fa202e 100644
--- a/crates/wasmtime/src/trampoline.rs
+++ b/crates/wasmtime/src/trampoline.rs
@@ -19,8 +19,8 @@ use std::any::Any;
 use std::sync::Arc;
 use wasmtime_environ::{GlobalIndex, MemoryIndex, Module, SignatureIndex, TableIndex};
 use wasmtime_runtime::{
-    Imports, InstanceAllocationRequest, InstanceAllocator, OnDemandInstanceAllocator, StorePtr,
-    VMFunctionImport, VMSharedSignatureIndex,
+    Imports, InstanceAllocationRequest, InstanceAllocator, OnDemandInstanceAllocator, SharedMemory,
+    StorePtr, VMFunctionImport, VMSharedSignatureIndex,
 };
 
 fn create_handle(
@@ -68,8 +68,9 @@ pub fn generate_global_export(
 pub fn generate_memory_export(
     store: &mut StoreOpaque,
     m: &MemoryType,
+    preallocation: Option<SharedMemory>,
 ) -> Result<wasmtime_runtime::ExportMemory> {
-    let instance = create_memory(store, m)?;
+    let instance = create_memory(store, m, preallocation)?;
     Ok(store
         .instance_mut(instance)
         .get_exported_memory(MemoryIndex::from_u32(0)))
diff --git a/crates/wasmtime/src/trampoline/memory.rs b/crates/wasmtime/src/trampoline/memory.rs
index 1a5858182f..6a136f00ae 100644
--- a/crates/wasmtime/src/trampoline/memory.rs
+++ b/crates/wasmtime/src/trampoline/memory.rs
@@ -1,28 +1,84 @@
 use crate::memory::{LinearMemory, MemoryCreator};
+use crate::module::BareModuleInfo;
 use crate::store::{InstanceId, StoreOpaque};
-use crate::trampoline::create_handle;
 use crate::MemoryType;
 use anyhow::{anyhow, Result};
 use std::convert::TryFrom;
 use std::sync::Arc;
 use wasmtime_environ::{EntityIndex, MemoryPlan, MemoryStyle, Module, WASM_PAGE_SIZE};
 use wasmtime_runtime::{
-    MemoryImage, RuntimeLinearMemory, RuntimeMemoryCreator, VMMemoryDefinition,
+    allocate_single_memory_instance, DefaultMemoryCreator, Imports, InstanceAllocationRequest,
+    InstantiationError, Memory, MemoryImage, RuntimeLinearMemory, RuntimeMemoryCreator,
+    SharedMemory, StorePtr, VMMemoryDefinition,
 };
 
-pub fn create_memory(store: &mut StoreOpaque, memory: &MemoryType) -> Result<InstanceId> {
+/// Create a "frankenstein" instance with a single memory.
+///
+/// This separate instance is necessary because Wasm objects in Wasmtime must be
+/// attached to instances (versus the store, e.g.) and some objects exist
+/// outside: a host-provided memory import, shared memory.
+pub fn create_memory(
+    store: &mut StoreOpaque,
+    memory_ty: &MemoryType,
+    preallocation: Option<SharedMemory>,
+) -> Result<InstanceId> {
     let mut module = Module::new();
 
-    let memory_plan = wasmtime_environ::MemoryPlan::for_memory(
-        memory.wasmtime_memory().clone(),
+    // Create a memory plan for the memory, though it will never be used for
+    // constructing a memory with an allocator: instead the memories are either
+    // preallocated (i.e., shared memory) or allocated manually below.
+    let plan = wasmtime_environ::MemoryPlan::for_memory(
+        memory_ty.wasmtime_memory().clone(),
         &store.engine().config().tunables,
     );
-    let memory_id = module.memory_plans.push(memory_plan);
+    let memory_id = module.memory_plans.push(plan.clone());
+
+    let memory = match &preallocation {
+        // If we are passing in a pre-allocated shared memory, we can clone its
+        // `Arc`. We know that a preallocated memory *must* be shared--it could
+        // be used by several instances.
+        Some(shared_memory) => shared_memory.clone().as_memory(),
+        // If we do not have a pre-allocated memory, then we create it here and
+        // associate it with the "frankenstein" instance, which now owns it.
+        None => {
+            let creator = &DefaultMemoryCreator;
+            let store = unsafe {
+                store
+                    .traitobj()
+                    .as_mut()
+                    .expect("the store pointer cannot be null here")
+            };
+            Memory::new_dynamic(&plan, creator, store, None)
+                .map_err(|err| InstantiationError::Resource(err.into()))?
+        }
+    };
+
+    // Since we have only associated a single memory with the "frankenstein"
+    // instance, it will be exported at index 0.
+    debug_assert_eq!(memory_id.as_u32(), 0);
     module
         .exports
         .insert(String::new(), EntityIndex::Memory(memory_id));
 
-    create_handle(module, store, Box::new(()), &[], None)
+    // We create an instance in the on-demand allocator when creating handles
+    // associated with external objects. The configured instance allocator
+    // should only be used when creating module instances as we don't want host
+    // objects to count towards instance limits.
+    let runtime_info = &BareModuleInfo::maybe_imported_func(Arc::new(module), None).into_traitobj();
+    let host_state = Box::new(());
+    let imports = Imports::default();
+    let request = InstanceAllocationRequest {
+        imports,
+        host_state,
+        store: StorePtr::new(store.traitobj()),
+        runtime_info,
+    };
+
+    unsafe {
+        let handle = allocate_single_memory_instance(request, memory)?;
+        let instance_id = store.add_instance(handle.clone(), true);
+        Ok(instance_id)
+    }
 }
 
 struct LinearMemoryProxy {
@@ -45,7 +101,7 @@ impl RuntimeLinearMemory for LinearMemoryProxy {
     fn vmmemory(&mut self) -> VMMemoryDefinition {
         VMMemoryDefinition {
             base: self.mem.as_ptr(),
-            current_length: self.mem.byte_size(),
+            current_length: self.mem.byte_size().into(),
         }
     }
 
@@ -53,7 +109,6 @@ impl RuntimeLinearMemory for LinearMemoryProxy {
         true
     }
 
-    #[cfg(feature = "pooling-allocator")]
     fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
         self
     }
diff --git a/crates/wasmtime/src/types.rs b/crates/wasmtime/src/types.rs
index b7dc9a0a79..7286eb819a 100644
--- a/crates/wasmtime/src/types.rs
+++ b/crates/wasmtime/src/types.rs
@@ -373,6 +373,25 @@ impl MemoryType {
         }
     }
 
+    /// Creates a new descriptor for shared WebAssembly memory given the
+    /// specified limits of the memory.
+    ///
+    /// The `minimum` and `maximum`  values here are specified in units of
+    /// WebAssembly pages, which are 64k.
+    ///
+    /// Note that shared memories are part of the threads proposal for
+    /// WebAssembly which is not standardized yet.
+    pub fn shared(minimum: u32, maximum: u32) -> MemoryType {
+        MemoryType {
+            ty: Memory {
+                memory64: false,
+                shared: true,
+                minimum: minimum.into(),
+                maximum: Some(maximum.into()),
+            },
+        }
+    }
+
     /// Returns whether this is a 64-bit memory or not.
     ///
     /// Note that 64-bit memories are part of the memory64 proposal for
@@ -381,6 +400,14 @@ impl MemoryType {
         self.ty.memory64
     }
 
+    /// Returns whether this is a shared memory or not.
+    ///
+    /// Note that shared memories are part of the threads proposal for
+    /// WebAssembly which is not standardized yet.
+    pub fn is_shared(&self) -> bool {
+        self.ty.shared
+    }
+
     /// Returns minimum number of WebAssembly pages this memory must have.
     ///
     /// Note that the return value, while a `u64`, will always fit into a `u32`
diff --git a/crates/wasmtime/src/types/matching.rs b/crates/wasmtime/src/types/matching.rs
index 7261a420ad..4e20473597 100644
--- a/crates/wasmtime/src/types/matching.rs
+++ b/crates/wasmtime/src/types/matching.rs
@@ -35,6 +35,10 @@ impl MatchCx<'_> {
         )
     }
 
+    pub fn shared_memory(&self, expected: &Memory, actual: &crate::SharedMemory) -> Result<()> {
+        memory_ty(expected, actual.ty().wasmtime_memory(), Some(actual.size()))
+    }
+
     pub fn func(&self, expected: SignatureIndex, actual: &crate::Func) -> Result<()> {
         self.vmshared_signature_index(expected, actual.sig_index(self.store.store_data()))
     }
@@ -87,6 +91,7 @@ impl MatchCx<'_> {
             },
             EntityType::Memory(expected) => match actual {
                 Extern::Memory(actual) => self.memory(expected, actual),
+                Extern::SharedMemory(actual) => self.shared_memory(expected, actual),
                 _ => bail!("expected memory, but found {}", actual.desc()),
             },
             EntityType::Function(expected) => match actual {
diff --git a/tests/all/main.rs b/tests/all/main.rs
index 2db9e611d8..5bf8992fc3 100644
--- a/tests/all/main.rs
+++ b/tests/all/main.rs
@@ -30,6 +30,7 @@ mod relocs;
 mod stack_overflow;
 mod store;
 mod table;
+mod threads;
 mod traps;
 mod wast;
 
diff --git a/tests/all/pooling_allocator.rs b/tests/all/pooling_allocator.rs
index 0509df7825..b68dd1602a 100644
--- a/tests/all/pooling_allocator.rs
+++ b/tests/all/pooling_allocator.rs
@@ -630,11 +630,10 @@ fn instance_too_large() -> Result<()> {
 
     let engine = Engine::new(&config)?;
     let expected = "\
-instance allocation for this module requires 304 bytes which exceeds the \
+instance allocation for this module requires 320 bytes which exceeds the \
 configured maximum of 16 bytes; breakdown of allocation requirement:
 
- * 78.95% - 240 bytes - instance state management
- * 5.26% - 16 bytes - jit store state
+ * 80.00% - 256 bytes - instance state management
 ";
     match Module::new(&engine, "(module)") {
         Ok(_) => panic!("should have failed to compile"),
@@ -648,11 +647,11 @@ configured maximum of 16 bytes; breakdown of allocation requirement:
     lots_of_globals.push_str(")");
 
     let expected = "\
-instance allocation for this module requires 1904 bytes which exceeds the \
+instance allocation for this module requires 1920 bytes which exceeds the \
 configured maximum of 16 bytes; breakdown of allocation requirement:
 
- * 12.61% - 240 bytes - instance state management
- * 84.03% - 1600 bytes - defined globals
+ * 13.33% - 256 bytes - instance state management
+ * 83.33% - 1600 bytes - defined globals
 ";
     match Module::new(&engine, &lots_of_globals) {
         Ok(_) => panic!("should have failed to compile"),
diff --git a/tests/all/threads.rs b/tests/all/threads.rs
new file mode 100644
index 0000000000..30e22e32e9
--- /dev/null
+++ b/tests/all/threads.rs
@@ -0,0 +1,269 @@
+use anyhow::Result;
+use std::{
+    collections::{hash_map::RandomState, HashSet},
+    sync::{
+        atomic::{AtomicBool, Ordering},
+        Arc, RwLock,
+    },
+};
+use wasmtime::*;
+
+#[test]
+fn test_instantiate_shared_memory() -> Result<()> {
+    let wat = r#"(module (memory 1 1 shared))"#;
+    let mut config = Config::new();
+    config.wasm_threads(true);
+    let engine = Engine::new(&config)?;
+    let module = Module::new(&engine, wat)?;
+    let mut store = Store::new(&engine, ());
+    let _instance = Instance::new(&mut store, &module, &[])?;
+    Ok(())
+}
+
+#[test]
+fn test_import_shared_memory() -> Result<()> {
+    let wat = r#"(module (import "env" "memory" (memory 1 5 shared)))"#;
+    let mut config = Config::new();
+    config.wasm_threads(true);
+    let engine = Engine::new(&config)?;
+    let module = Module::new(&engine, wat)?;
+    let mut store = Store::new(&engine, ());
+    let shared_memory = SharedMemory::new(&engine, MemoryType::shared(1, 5))?;
+    let _instance = Instance::new(&mut store, &module, &[shared_memory.into()])?;
+    Ok(())
+}
+
+#[test]
+fn test_export_shared_memory() -> Result<()> {
+    let wat = r#"(module (memory (export "memory") 1 5 shared))"#;
+    let mut config = Config::new();
+    config.wasm_threads(true);
+    let engine = Engine::new(&config)?;
+    let module = Module::new(&engine, wat)?;
+    let mut store = Store::new(&engine, ());
+    let instance = Instance::new(&mut store, &module, &[])?;
+    let shared_memory = instance.get_shared_memory(&mut store, "memory").unwrap();
+
+    assert_eq!(shared_memory.size(), 1);
+    assert!(shared_memory.ty().is_shared());
+    assert_eq!(shared_memory.ty().maximum(), Some(5));
+
+    Ok(())
+}
+
+#[test]
+fn test_sharing_of_shared_memory() -> Result<()> {
+    let wat = r#"(module
+        (import "env" "memory" (memory 1 5 shared))
+        (func (export "first_word") (result i32) (i32.load (i32.const 0)))
+    )"#;
+    let mut config = Config::new();
+    config.wasm_threads(true);
+    let engine = Engine::new(&config)?;
+    let module = Module::new(&engine, wat)?;
+    let mut store = Store::new(&engine, ());
+    let shared_memory = SharedMemory::new(&engine, MemoryType::shared(1, 5))?;
+    let instance1 = Instance::new(&mut store, &module, &[shared_memory.clone().into()])?;
+    let instance2 = Instance::new(&mut store, &module, &[shared_memory.clone().into()])?;
+
+    // Modify the memory in one place.
+    unsafe {
+        (*(shared_memory.data()))[0] = 42;
+    }
+
+    // Verify that the memory is the same in all shared locations.
+    let shared_memory_first_word =
+        i32::from_le_bytes(unsafe { (*shared_memory.data())[0..4].try_into()? });
+    let instance1_first_word = instance1
+        .get_typed_func::<(), i32, _>(&mut store, "first_word")?
+        .call(&mut store, ())?;
+    let instance2_first_word = instance2
+        .get_typed_func::<(), i32, _>(&mut store, "first_word")?
+        .call(&mut store, ())?;
+    assert_eq!(shared_memory_first_word, 42);
+    assert_eq!(instance1_first_word, 42);
+    assert_eq!(instance2_first_word, 42);
+
+    Ok(())
+}
+
+#[test]
+fn test_probe_shared_memory_size() -> Result<()> {
+    let wat = r#"(module
+        (memory (export "memory") 1 2 shared)
+        (func (export "size") (result i32) (memory.size))
+    )"#;
+    let mut config = Config::new();
+    config.wasm_threads(true);
+    let engine = Engine::new(&config)?;
+    let module = Module::new(&engine, wat)?;
+    let mut store = Store::new(&engine, ());
+    let instance = Instance::new(&mut store, &module, &[])?;
+    let size_fn = instance.get_typed_func::<(), i32, _>(&mut store, "size")?;
+    let mut shared_memory = instance.get_shared_memory(&mut store, "memory").unwrap();
+
+    assert_eq!(size_fn.call(&mut store, ())?, 1);
+    assert_eq!(shared_memory.size(), 1);
+
+    shared_memory.grow(1)?;
+
+    assert_eq!(shared_memory.size(), 2);
+    assert_eq!(size_fn.call(&mut store, ())?, 2);
+
+    Ok(())
+}
+
+#[test]
+fn test_multi_memory() -> Result<()> {
+    let wat = r#"(module
+        (import "env" "imported" (memory $imported 5 10 shared))
+        (memory (export "owned") 10 20)
+        (memory (export "shared") 1 2 shared)
+        (export "imported" (memory $imported))
+    )"#;
+    let mut config = Config::new();
+    config.wasm_threads(true);
+    config.wasm_multi_memory(true);
+    let engine = Engine::new(&config)?;
+    let module = Module::new(&engine, wat)?;
+    let mut store = Store::new(&engine, ());
+    let incoming_shared_memory = SharedMemory::new(&engine, MemoryType::shared(5, 10))?;
+    let instance = Instance::new(&mut store, &module, &[incoming_shared_memory.into()])?;
+    let owned_memory = instance.get_memory(&mut store, "owned").unwrap();
+    let shared_memory = instance.get_shared_memory(&mut store, "shared").unwrap();
+    let imported_memory = instance.get_shared_memory(&mut store, "imported").unwrap();
+
+    assert_eq!(owned_memory.size(&store), 10);
+    assert_eq!(owned_memory.ty(&store).minimum(), 10);
+    assert_eq!(owned_memory.ty(&store).maximum(), Some(20));
+    assert_eq!(owned_memory.ty(&store).is_shared(), false);
+    assert_eq!(shared_memory.size(), 1);
+    assert_eq!(shared_memory.ty().minimum(), 1);
+    assert_eq!(shared_memory.ty().maximum(), Some(2));
+    assert_eq!(shared_memory.ty().is_shared(), true);
+    assert_eq!(imported_memory.size(), 5);
+    assert_eq!(imported_memory.ty().minimum(), 5);
+    assert_eq!(imported_memory.ty().maximum(), Some(10));
+    assert_eq!(imported_memory.ty().is_shared(), true);
+
+    Ok(())
+}
+
+#[test]
+fn test_grow_memory_in_multiple_threads() -> Result<()> {
+    const NUM_THREADS: usize = 4;
+    const NUM_GROW_OPS: usize = 1000;
+
+    let wat = r#"(module
+        (import "env" "memory" (memory 1 4000 shared))
+        (func (export "grow") (param $delta i32) (result i32) (memory.grow (local.get $delta)))
+    )"#;
+
+    let mut config = Config::new();
+    config.wasm_threads(true);
+    let engine = Engine::new(&config)?;
+    let module = Module::new(&engine, wat)?;
+    let shared_memory = SharedMemory::new(&engine, MemoryType::shared(1, NUM_GROW_OPS as u32))?;
+    let mut threads = vec![];
+    let observed_sizes = Arc::new(RwLock::new(vec![]));
+
+    // Spawn several threads using a single shared memory and grow the memory
+    // concurrently on all threads.
+    for _ in 0..NUM_THREADS {
+        let engine = engine.clone();
+        let module = module.clone();
+        let observed_sizes = observed_sizes.clone();
+        let shared_memory = shared_memory.clone();
+        let thread = std::thread::spawn(move || {
+            let mut store = Store::new(&engine, ());
+            let instance = Instance::new(&mut store, &module, &[shared_memory.into()]).unwrap();
+            let grow_fn = instance
+                .get_typed_func::<i32, i32, _>(&mut store, "grow")
+                .unwrap();
+            let mut thread_local_observed_sizes: Vec<_> = (0..NUM_GROW_OPS / NUM_THREADS)
+                .map(|_| grow_fn.call(&mut store, 1).unwrap() as u32)
+                .collect();
+            println!(
+                "Returned memory sizes for {:?}: {:?}",
+                std::thread::current().id(),
+                thread_local_observed_sizes
+            );
+            assert!(is_sorted(thread_local_observed_sizes.as_slice()));
+            observed_sizes
+                .write()
+                .unwrap()
+                .append(&mut thread_local_observed_sizes);
+        });
+        threads.push(thread);
+    }
+
+    // Wait for all threads to finish.
+    for t in threads {
+        t.join().unwrap()
+    }
+
+    // Ensure the returned "old memory sizes" are all unique--i.e., we have not
+    // observed the same growth twice.
+    let unique_observed_sizes: HashSet<u32, RandomState> =
+        HashSet::from_iter(observed_sizes.read().unwrap().iter().cloned());
+    assert_eq!(
+        observed_sizes.read().unwrap().len(),
+        unique_observed_sizes.len()
+    );
+
+    Ok(())
+}
+
+fn is_sorted(data: &[u32]) -> bool {
+    data.windows(2).all(|d| d[0] <= d[1])
+}
+
+#[test]
+fn test_memory_size_accessibility() -> Result<()> {
+    const NUM_GROW_OPS: usize = 1000;
+    let wat = r#"(module
+        (import "env" "memory" (memory $memory 1 1000 shared))
+        (func (export "probe_last_available") (result i32)
+            (local $last_address i32)
+            (local.set $last_address (i32.sub (i32.mul (memory.size) (i32.const 0x10000)) (i32.const 4)))
+            (i32.load $memory (local.get $last_address))
+        )
+    )"#;
+
+    let mut config = Config::new();
+    config.wasm_threads(true);
+    let engine = Engine::new(&config)?;
+    let module = Module::new(&engine, wat)?;
+    let shared_memory = SharedMemory::new(&engine, MemoryType::shared(1, NUM_GROW_OPS as u32))?;
+    let done = Arc::new(AtomicBool::new(false));
+
+    let mut grow_memory = shared_memory.clone();
+    let grow_thread = std::thread::spawn(move || {
+        for i in 0..NUM_GROW_OPS {
+            if grow_memory.grow(1).is_err() {
+                println!("stopping at grow operation #{}", i);
+                break;
+            }
+        }
+    });
+
+    let probe_memory = shared_memory.clone();
+    let probe_done = done.clone();
+    let probe_thread = std::thread::spawn(move || {
+        let mut store = Store::new(&engine, ());
+        let instance = Instance::new(&mut store, &module, &[probe_memory.into()]).unwrap();
+        let probe_fn = instance
+            .get_typed_func::<(), i32, _>(&mut store, "probe_last_available")
+            .unwrap();
+        while !probe_done.load(Ordering::SeqCst) {
+            let value = probe_fn.call(&mut store, ()).unwrap() as u32;
+            assert_eq!(value, 0);
+        }
+    });
+
+    grow_thread.join().unwrap();
+    done.store(true, Ordering::SeqCst);
+    probe_thread.join().unwrap();
+
+    Ok(())
+}