diff --git a/Cargo.lock b/Cargo.lock
index b05d7a3249..44cbe0a771 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3556,6 +3556,7 @@ dependencies = [
  "lazy_static",
  "libc",
  "log",
+ "object",
  "paste",
  "psm",
  "rayon",
@@ -3654,6 +3655,7 @@ dependencies = [
  "lazy_static",
  "libc",
  "log",
+ "memchr",
  "more-asserts",
  "num_cpus",
  "object",
diff --git a/Cargo.toml b/Cargo.toml
index 1d128937eb..2aaf5485f3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -58,6 +58,7 @@ wast = "37.0.0"
 criterion = "0.3.4"
 num_cpus = "1.13.0"
 winapi = { version = "0.3.9", features = ['memoryapi'] }
+memchr = "2.4"
 
 [build-dependencies]
 anyhow = "1.0.19"
diff --git a/crates/cache/src/lib.rs b/crates/cache/src/lib.rs
index 874e481934..64f7d8e262 100644
--- a/crates/cache/src/lib.rs
+++ b/crates/cache/src/lib.rs
@@ -42,13 +42,40 @@ impl<'config> ModuleCacheEntry<'config> {
         Self(Some(inner))
     }
 
-    /// Gets cached data if state matches, otherwise calls the `compute`.
-    // NOTE: This takes a function pointer instead of a closure so that it doesn't accidentally
-    // close over something not accounted in the cache.
-    pub fn get_data<T, U, E>(&self, state: T, compute: fn(T) -> Result<U, E>) -> Result<U, E>
+    /// Gets cached data if state matches, otherwise calls `compute`.
+    ///
+    /// Data is automatically serialized/deserialized with `bincode`.
+    pub fn get_data<T, U, E>(&self, state: T, compute: fn(&T) -> Result<U, E>) -> Result<U, E>
     where
         T: Hash,
         U: Serialize + for<'a> Deserialize<'a>,
+    {
+        self.get_data_raw(
+            &state,
+            compute,
+            |_state, data| bincode::serialize(data).ok(),
+            |_state, data| bincode::deserialize(&data).ok(),
+        )
+    }
+
+    /// Gets cached data if state matches, otherwise calls `compute`.
+    ///
+    /// If the cache is disabled or no cached data is found then `compute` is
+    /// called to calculate the data. If the data was found in cache it is
+    /// passed to `deserialize`, which if successful will be the returned value.
+    /// When computed the `serialize` function is used to generate the bytes
+    /// from the returned value.
+    pub fn get_data_raw<T, U, E>(
+        &self,
+        state: &T,
+        // NOTE: These are function pointers instead of closures so that they
+        // don't accidentally close over something not accounted in the cache.
+        compute: fn(&T) -> Result<U, E>,
+        serialize: fn(&T, &U) -> Option<Vec<u8>>,
+        deserialize: fn(&T, Vec<u8>) -> Option<U>,
+    ) -> Result<U, E>
+    where
+        T: Hash,
     {
         let inner = match &self.0 {
             Some(inner) => inner,
@@ -62,14 +89,18 @@ impl<'config> ModuleCacheEntry<'config> {
         let hash = base64::encode_config(&hash, base64::URL_SAFE_NO_PAD);
 
         if let Some(cached_val) = inner.get_data(&hash) {
-            let mod_cache_path = inner.root_path.join(&hash);
-            inner.cache_config.on_cache_get_async(&mod_cache_path); // call on success
-            return Ok(cached_val);
+            if let Some(val) = deserialize(state, cached_val) {
+                let mod_cache_path = inner.root_path.join(&hash);
+                inner.cache_config.on_cache_get_async(&mod_cache_path); // call on success
+                return Ok(val);
+            }
         }
         let val_to_cache = compute(state)?;
-        if inner.update_data(&hash, &val_to_cache).is_some() {
-            let mod_cache_path = inner.root_path.join(&hash);
-            inner.cache_config.on_cache_update_async(&mod_cache_path); // call on success
+        if let Some(bytes) = serialize(state, &val_to_cache) {
+            if inner.update_data(&hash, &bytes).is_some() {
+                let mod_cache_path = inner.root_path.join(&hash);
+                inner.cache_config.on_cache_update_async(&mod_cache_path); // call on success
+            }
         }
         Ok(val_to_cache)
     }
@@ -118,27 +149,19 @@ impl<'config> ModuleCacheEntryInner<'config> {
         }
     }
 
-    fn get_data<T>(&self, hash: &str) -> Option<T>
-    where
-        T: for<'a> Deserialize<'a>,
-    {
+    fn get_data(&self, hash: &str) -> Option<Vec<u8>> {
         let mod_cache_path = self.root_path.join(hash);
         trace!("get_data() for path: {}", mod_cache_path.display());
         let compressed_cache_bytes = fs::read(&mod_cache_path).ok()?;
         let cache_bytes = zstd::decode_all(&compressed_cache_bytes[..])
             .map_err(|err| warn!("Failed to decompress cached code: {}", err))
             .ok()?;
-        bincode::deserialize(&cache_bytes[..])
-            .map_err(|err| warn!("Failed to deserialize cached code: {}", err))
-            .ok()
+        Some(cache_bytes)
     }
 
-    fn update_data<T: Serialize>(&self, hash: &str, data: &T) -> Option<()> {
+    fn update_data(&self, hash: &str, serialized_data: &[u8]) -> Option<()> {
         let mod_cache_path = self.root_path.join(hash);
         trace!("update_data() for path: {}", mod_cache_path.display());
-        let serialized_data = bincode::serialize(&data)
-            .map_err(|err| warn!("Failed to serialize cached code: {}", err))
-            .ok()?;
         let compressed_data = zstd::encode_all(
             &serialized_data[..],
             self.cache_config.baseline_compression_level(),
diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs
index 1757935179..5968a32d95 100644
--- a/crates/jit/src/instantiate.rs
+++ b/crates/jit/src/instantiate.rs
@@ -6,7 +6,7 @@
 use crate::code_memory::CodeMemory;
 use crate::debug::create_gdbjit_image;
 use crate::link::link_module;
-use crate::ProfilingAgent;
+use crate::{MmapVec, ProfilingAgent};
 use anyhow::{anyhow, Context, Result};
 use object::read::File;
 use object::write::{Object, StandardSegment};
@@ -68,15 +68,6 @@ pub enum SetupError {
     DebugInfo(#[from] anyhow::Error),
 }
 
-/// Final result of compilation which supports serialization to disk.
-#[derive(Serialize, Deserialize)]
-pub struct CompilationArtifacts {
-    // NB: this structure is in a transitionary phase and will soon go away. At
-    // this time it only contains the ELF image created by compilation, and in
-    // the near future even this will be removed.
-    obj: Box<[u8]>,
-}
-
 /// Secondary in-memory results of compilation.
 ///
 /// This opaque structure can be optionally passed back to
@@ -113,125 +104,120 @@ struct Metadata {
     has_wasm_debuginfo: bool,
 }
 
-impl CompilationArtifacts {
-    /// Finishes compilation of the `translation` specified, producing the final
-    /// compilation artifacts and auxiliary information.
-    ///
-    /// This function will consume the final results of compiling a wasm module
-    /// and finish the ELF image in-progress as part of `obj` by appending any
-    /// compiler-agnostic sections.
-    ///
-    /// The auxiliary `CompiledModuleInfo` structure returned here has also been
-    /// serialized into `CompilationArtifacts`, but if the caller will quickly
-    /// turn-around and invoke `CompiledModule::from_artifacts` after this then
-    /// the information can be passed to that method to avoid extra
-    /// deserialization. This is done to avoid a serialize-then-deserialize for
-    /// API calls like `Module::new` where the compiled module is immediately
-    /// going to be used.
-    pub fn new(
-        translation: ModuleTranslation<'_>,
-        mut obj: Object,
-        funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
-        tunables: &Tunables,
-    ) -> Result<(CompilationArtifacts, CompiledModuleInfo)> {
-        let ModuleTranslation {
-            mut module,
-            debuginfo,
+/// Finishes compilation of the `translation` specified, producing the final
+/// compilation artifact and auxiliary information.
+///
+/// This function will consume the final results of compiling a wasm module
+/// and finish the ELF image in-progress as part of `obj` by appending any
+/// compiler-agnostic sections.
+///
+/// The auxiliary `CompiledModuleInfo` structure returned here has also been
+/// serialized into the object returned, but if the caller will quickly
+/// turn-around and invoke `CompiledModule::from_artifacts` after this then the
+/// information can be passed to that method to avoid extra deserialization.
+/// This is done to avoid a serialize-then-deserialize for API calls like
+/// `Module::new` where the compiled module is immediately going to be used.
+///
+/// The `MmapVec` returned here contains the compiled image and resides in
+/// mmap'd memory for easily switching permissions to executable afterwards.
+pub fn finish_compile(
+    translation: ModuleTranslation<'_>,
+    mut obj: Object,
+    funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
+    tunables: &Tunables,
+) -> Result<(MmapVec, CompiledModuleInfo)> {
+    let ModuleTranslation {
+        mut module,
+        debuginfo,
+        has_unparsed_debuginfo,
+        data,
+        passive_data,
+        ..
+    } = translation;
+
+    // Place all data from the wasm module into a section which will the
+    // source of the data later at runtime.
+    let data_id = obj.add_section(
+        obj.segment_name(StandardSegment::Data).to_vec(),
+        ELF_WASM_DATA.as_bytes().to_vec(),
+        SectionKind::ReadOnlyData,
+    );
+    let mut total_data_len = 0;
+    for data in data.iter() {
+        obj.append_section_data(data_id, data, 1);
+        total_data_len += data.len();
+    }
+    for data in passive_data.iter() {
+        obj.append_section_data(data_id, data, 1);
+    }
+
+    // Update passive data offsets since they're all located after the other
+    // data in the module.
+    for (_, range) in module.passive_data_map.iter_mut() {
+        range.start = range.start.checked_add(total_data_len as u32).unwrap();
+        range.end = range.end.checked_add(total_data_len as u32).unwrap();
+    }
+
+    // Insert the wasm raw wasm-based debuginfo into the output, if
+    // requested. Note that this is distinct from the native debuginfo
+    // possibly generated by the native compiler, hence these sections
+    // getting wasm-specific names.
+    if tunables.parse_wasm_debuginfo {
+        push_debug(&mut obj, &debuginfo.dwarf.debug_abbrev);
+        push_debug(&mut obj, &debuginfo.dwarf.debug_addr);
+        push_debug(&mut obj, &debuginfo.dwarf.debug_aranges);
+        push_debug(&mut obj, &debuginfo.dwarf.debug_info);
+        push_debug(&mut obj, &debuginfo.dwarf.debug_line);
+        push_debug(&mut obj, &debuginfo.dwarf.debug_line_str);
+        push_debug(&mut obj, &debuginfo.dwarf.debug_str);
+        push_debug(&mut obj, &debuginfo.dwarf.debug_str_offsets);
+        push_debug(&mut obj, &debuginfo.debug_ranges);
+        push_debug(&mut obj, &debuginfo.debug_rnglists);
+    }
+
+    // Encode a `CompiledModuleInfo` structure into the `ELF_WASMTIME_INFO`
+    // section of this image. This is not necessary when the returned module
+    // is never serialized to disk, which is also why we return a copy of
+    // the `CompiledModuleInfo` structure to the caller in case they don't
+    // want to deserialize this value immediately afterwards from the
+    // section. Otherwise, though, this is necessary to reify a `Module` on
+    // the other side from disk-serialized artifacts in
+    // `Module::deserialize` (a Wasmtime API).
+    let info_id = obj.add_section(
+        obj.segment_name(StandardSegment::Data).to_vec(),
+        ELF_WASMTIME_INFO.as_bytes().to_vec(),
+        SectionKind::ReadOnlyData,
+    );
+    let mut bytes = Vec::new();
+    let info = CompiledModuleInfo {
+        module,
+        funcs,
+        meta: Metadata {
+            native_debug_info_present: tunables.generate_native_debuginfo,
             has_unparsed_debuginfo,
-            data,
-            passive_data,
-            ..
-        } = translation;
+            code_section_offset: debuginfo.wasm_file.code_section_offset,
+            has_wasm_debuginfo: tunables.parse_wasm_debuginfo,
+        },
+    };
+    bincode::serialize_into(&mut bytes, &info)?;
+    obj.append_section_data(info_id, &bytes, 1);
 
-        // Place all data from the wasm module into a section which will the
-        // source of the data later at runtime.
-        let data_id = obj.add_section(
-            obj.segment_name(StandardSegment::Data).to_vec(),
-            ELF_WASM_DATA.as_bytes().to_vec(),
-            SectionKind::ReadOnlyData,
+    return Ok((MmapVec::from_obj(obj)?, info));
+
+    fn push_debug<'a, T>(obj: &mut Object, section: &T)
+    where
+        T: gimli::Section<gimli::EndianSlice<'a, gimli::LittleEndian>>,
+    {
+        let data = section.reader().slice();
+        if data.is_empty() {
+            return;
+        }
+        let section_id = obj.add_section(
+            obj.segment_name(StandardSegment::Debug).to_vec(),
+            wasm_section_name(T::id()).as_bytes().to_vec(),
+            SectionKind::Debug,
         );
-        let mut total_data_len = 0;
-        for data in data.iter() {
-            obj.append_section_data(data_id, data, 1);
-            total_data_len += data.len();
-        }
-        for data in passive_data.iter() {
-            obj.append_section_data(data_id, data, 1);
-        }
-
-        // Update passive data offsets since they're all located after the other
-        // data in the module.
-        for (_, range) in module.passive_data_map.iter_mut() {
-            range.start = range.start.checked_add(total_data_len as u32).unwrap();
-            range.end = range.end.checked_add(total_data_len as u32).unwrap();
-        }
-
-        // Insert the wasm raw wasm-based debuginfo into the output, if
-        // requested. Note that this is distinct from the native debuginfo
-        // possibly generated by the native compiler, hence these sections
-        // getting wasm-specific names.
-        if tunables.parse_wasm_debuginfo {
-            push_debug(&mut obj, &debuginfo.dwarf.debug_abbrev);
-            push_debug(&mut obj, &debuginfo.dwarf.debug_addr);
-            push_debug(&mut obj, &debuginfo.dwarf.debug_aranges);
-            push_debug(&mut obj, &debuginfo.dwarf.debug_info);
-            push_debug(&mut obj, &debuginfo.dwarf.debug_line);
-            push_debug(&mut obj, &debuginfo.dwarf.debug_line_str);
-            push_debug(&mut obj, &debuginfo.dwarf.debug_str);
-            push_debug(&mut obj, &debuginfo.dwarf.debug_str_offsets);
-            push_debug(&mut obj, &debuginfo.debug_ranges);
-            push_debug(&mut obj, &debuginfo.debug_rnglists);
-        }
-
-        // Encode a `CompiledModuleInfo` structure into the `ELF_WASMTIME_INFO`
-        // section of this image. This is not necessary when the returned module
-        // is never serialized to disk, which is also why we return a copy of
-        // the `CompiledModuleInfo` structure to the caller in case they don't
-        // want to deserialize this value immediately afterwards from the
-        // section. Otherwise, though, this is necessary to reify a `Module` on
-        // the other side from disk-serialized artifacts in
-        // `Module::deserialize` (a Wasmtime API).
-        let info_id = obj.add_section(
-            obj.segment_name(StandardSegment::Data).to_vec(),
-            ELF_WASMTIME_INFO.as_bytes().to_vec(),
-            SectionKind::ReadOnlyData,
-        );
-        let mut bytes = Vec::new();
-        let info = CompiledModuleInfo {
-            module,
-            funcs,
-            meta: Metadata {
-                native_debug_info_present: tunables.generate_native_debuginfo,
-                has_unparsed_debuginfo,
-                code_section_offset: debuginfo.wasm_file.code_section_offset,
-                has_wasm_debuginfo: tunables.parse_wasm_debuginfo,
-            },
-        };
-        bincode::serialize_into(&mut bytes, &info)?;
-        obj.append_section_data(info_id, &bytes, 1);
-
-        return Ok((
-            CompilationArtifacts {
-                obj: obj.write()?.into(),
-            },
-            info,
-        ));
-
-        fn push_debug<'a, T>(obj: &mut Object, section: &T)
-        where
-            T: gimli::Section<gimli::EndianSlice<'a, gimli::LittleEndian>>,
-        {
-            let data = section.reader().slice();
-            if data.is_empty() {
-                return;
-            }
-            let section_id = obj.add_section(
-                obj.segment_name(StandardSegment::Debug).to_vec(),
-                wasm_section_name(T::id()).as_bytes().to_vec(),
-                SectionKind::Debug,
-            );
-            obj.append_section_data(section_id, data, 1);
-        }
+        obj.append_section_data(section_id, data, 1);
     }
 }
 
@@ -270,7 +256,7 @@ pub struct CompiledModule {
     wasm_data: Range<usize>,
     address_map_data: Range<usize>,
     trap_data: Range<usize>,
-    artifacts: CompilationArtifacts,
+    mmap: MmapVec,
     module: Arc<Module>,
     funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
     meta: Metadata,
@@ -280,7 +266,12 @@ pub struct CompiledModule {
 }
 
 impl CompiledModule {
-    /// Creates `CompiledModule` directly from `CompilationArtifacts`.
+    /// Creates `CompiledModule` directly from a precompiled artifact.
+    ///
+    /// The `mmap` argument is expecte to be the result of a previous call to
+    /// `finish_compile` above. This is an ELF image, at this time, which
+    /// contains all necessary information to create a `CompiledModule` from a
+    /// compilation.
     ///
     /// This method also takes `info`, an optionally-provided deserialization of
     /// the artifacts' compilation metadata section. If this information is not
@@ -292,11 +283,11 @@ impl CompiledModule {
     /// The `profiler` argument here is used to inform JIT profiling runtimes
     /// about new code that is loaded.
     pub fn from_artifacts(
-        artifacts: CompilationArtifacts,
+        mmap: MmapVec,
         info: Option<CompiledModuleInfo>,
         profiler: &dyn ProfilingAgent,
     ) -> Result<Arc<Self>> {
-        let obj = File::parse(&artifacts.obj[..])
+        let obj = File::parse(&mmap[..])
             .with_context(|| "failed to parse internal ELF compilation artifact")?;
 
         let section = |name: &str| {
@@ -314,9 +305,9 @@ impl CompiledModule {
         };
         let module = Arc::new(info.module);
         let funcs = info.funcs;
-        let wasm_data = subslice_range(section(ELF_WASM_DATA)?, &artifacts.obj);
-        let address_map_data = subslice_range(section(ELF_WASMTIME_ADDRMAP)?, &artifacts.obj);
-        let trap_data = subslice_range(section(ELF_WASMTIME_TRAPS)?, &artifacts.obj);
+        let wasm_data = subslice_range(section(ELF_WASM_DATA)?, &mmap);
+        let address_map_data = subslice_range(section(ELF_WASMTIME_ADDRMAP)?, &mmap);
+        let trap_data = subslice_range(section(ELF_WASMTIME_TRAPS)?, &mmap);
 
         // Allocate all of the compiled functions into executable memory,
         // copying over their contents.
@@ -336,7 +327,7 @@ impl CompiledModule {
             meta: info.meta,
             funcs,
             module,
-            artifacts,
+            mmap,
             wasm_data,
             address_map_data,
             trap_data,
@@ -357,7 +348,7 @@ impl CompiledModule {
         // Register GDB JIT images; initialize profiler and load the wasm module.
         let dbg_jit_registration = if self.meta.native_debug_info_present {
             let bytes = create_gdbjit_image(
-                self.artifacts.obj.to_vec(),
+                self.mmap.to_vec(),
                 (
                     self.code.range.0 as *const u8,
                     self.code.range.1 - self.code.range.0,
@@ -376,9 +367,10 @@ impl CompiledModule {
         Ok(())
     }
 
-    /// Extracts `CompilationArtifacts` from the compiled module.
-    pub fn compilation_artifacts(&self) -> &CompilationArtifacts {
-        &self.artifacts
+    /// Returns the underlying memory which contains the compiled module's
+    /// image.
+    pub fn mmap(&self) -> &MmapVec {
+        &self.mmap
     }
 
     /// Returns the concatenated list of all data associated with this wasm
@@ -387,20 +379,20 @@ impl CompiledModule {
     /// This is used for initialization of memories and all data ranges stored
     /// in a `Module` are relative to the slice returned here.
     pub fn wasm_data(&self) -> &[u8] {
-        &self.artifacts.obj[self.wasm_data.clone()]
+        &self.mmap[self.wasm_data.clone()]
     }
 
     /// Returns the encoded address map section used to pass to
     /// `wasmtime_environ::lookup_file_pos`.
     pub fn address_map_data(&self) -> &[u8] {
-        &self.artifacts.obj[self.address_map_data.clone()]
+        &self.mmap[self.address_map_data.clone()]
     }
 
     /// Returns the encoded trap information for this compiled image.
     ///
     /// For more information see `wasmtime_environ::trap_encoding`.
     pub fn trap_data(&self) -> &[u8] {
-        &self.artifacts.obj[self.trap_data.clone()]
+        &self.mmap[self.trap_data.clone()]
     }
 
     /// Return a reference-counting pointer to a module.
@@ -500,7 +492,7 @@ impl CompiledModule {
         if !self.meta.has_wasm_debuginfo {
             return Ok(None);
         }
-        let obj = File::parse(&self.artifacts.obj[..])
+        let obj = File::parse(&self.mmap[..])
             .context("failed to parse internal ELF file representation")?;
         let dwarf = gimli::Dwarf::load(|id| -> Result<_> {
             let data = obj
@@ -603,7 +595,7 @@ fn build_code_memory(
 ///
 /// This method requires that `inner` is a sub-slice of `outer`, and if that
 /// isn't true then this method will panic.
-fn subslice_range(inner: &[u8], outer: &[u8]) -> Range<usize> {
+pub fn subslice_range(inner: &[u8], outer: &[u8]) -> Range<usize> {
     if inner.len() == 0 {
         return 0..0;
     }
diff --git a/crates/jit/src/lib.rs b/crates/jit/src/lib.rs
index 3de944d48a..1d7a284c52 100644
--- a/crates/jit/src/lib.rs
+++ b/crates/jit/src/lib.rs
@@ -24,15 +24,17 @@ mod code_memory;
 mod debug;
 mod instantiate;
 mod link;
+mod mmap_vec;
 mod profiling;
 mod unwind;
 
 pub use crate::code_memory::CodeMemory;
 pub use crate::instantiate::{
-    CompilationArtifacts, CompiledModule, CompiledModuleInfo, ModuleCode, SetupError,
+    finish_compile, subslice_range, CompiledModule, CompiledModuleInfo, ModuleCode, SetupError,
     SymbolizeContext, TypeTables,
 };
 pub use crate::link::link_module;
+pub use crate::mmap_vec::MmapVec;
 pub use profiling::*;
 
 /// Version number of this crate.
diff --git a/crates/jit/src/mmap_vec.rs b/crates/jit/src/mmap_vec.rs
new file mode 100644
index 0000000000..ee49b27119
--- /dev/null
+++ b/crates/jit/src/mmap_vec.rs
@@ -0,0 +1,229 @@
+use anyhow::{Error, Result};
+use object::write::{Object, WritableBuffer};
+use std::ops::{Deref, DerefMut, Range, RangeTo};
+use std::sync::Arc;
+use wasmtime_runtime::Mmap;
+
+/// A type akin to `Vec<u8>`, but backed by `mmap` and able to be split.
+///
+/// This type is a non-growable owned list of bytes. It can be segmented into
+/// disjoint separately owned views akin to the `split_at` method on slices in
+/// Rust. An `MmapVec` is backed by an OS-level memory allocation and is not
+/// suitable for lots of small allocation (since it works at the page
+/// granularity).
+///
+/// An `MmapVec` is an owned value which means that owners have the ability to
+/// get exclusive access to the underlying bytes, enabling mutation.
+pub struct MmapVec {
+    mmap: Arc<Mmap>,
+    range: Range<usize>,
+}
+
+impl MmapVec {
+    /// Consumes an existing `mmap` and wraps it up into an `MmapVec`.
+    ///
+    /// The returned `MmapVec` will have the `size` specified, which can be
+    /// smaller than the region mapped by the `Mmap`. The returned `MmapVec`
+    /// will only have at most `size` bytes accessible.
+    pub fn new(mmap: Mmap, size: usize) -> MmapVec {
+        assert!(size <= mmap.len());
+        MmapVec {
+            mmap: Arc::new(mmap),
+            range: 0..size,
+        }
+    }
+
+    /// Creates a new zero-initialized `MmapVec` with the given `size`.
+    ///
+    /// This commit will return a new `MmapVec` suitably sized to hold `size`
+    /// bytes. All bytes will be initialized to zero since this is a fresh OS
+    /// page allocation.
+    pub fn with_capacity(size: usize) -> Result<MmapVec> {
+        Ok(MmapVec::new(Mmap::with_at_least(size)?, size))
+    }
+
+    /// Creates a new `MmapVec` from the contents of an existing `slice`.
+    ///
+    /// A new `MmapVec` is allocated to hold the contents of `slice` and then
+    /// `slice` is copied into the new mmap. It's recommended to avoid this
+    /// method if possible to avoid the need to copy data around.
+    pub fn from_slice(slice: &[u8]) -> Result<MmapVec> {
+        let mut result = MmapVec::with_capacity(slice.len())?;
+        result.copy_from_slice(slice);
+        Ok(result)
+    }
+
+    /// Creates a new `MmapVec` from serializing the specified `obj`.
+    ///
+    /// The returned `MmapVec` will contain the serialized version of `obj` and
+    /// is sized appropriately to the exact size of the object serialized.
+    pub fn from_obj(obj: Object) -> Result<MmapVec> {
+        let mut result = ObjectMmap::default();
+        match obj.emit(&mut result) {
+            Ok(()) => {
+                assert!(result.mmap.is_some(), "no reserve");
+                let mmap = result.mmap.expect("reserve not called");
+                assert_eq!(mmap.len(), result.len);
+                Ok(mmap)
+            }
+            Err(e) => match result.err.take() {
+                Some(original) => Err(original.context(e)),
+                None => Err(e.into()),
+            },
+        }
+    }
+
+    /// "Drains" leading bytes up to the end specified in `range` from this
+    /// `MmapVec`, returning a separately owned `MmapVec` which retains access
+    /// to the bytes.
+    ///
+    /// This method is similar to the `Vec` type's `drain` method, except that
+    /// the return value is not an iterator but rather a new `MmapVec`. The
+    /// purpose of this method is the ability to split-off new `MmapVec` values
+    /// which are sub-slices of the original one.
+    ///
+    /// Once data has been drained from an `MmapVec` it is no longer accessible
+    /// from the original `MmapVec`, it's only accessible from the returned
+    /// `MmapVec`. In other words ownership of the drain'd bytes is returned
+    /// through the `MmapVec` return value.
+    ///
+    /// This `MmapVec` will shrink by `range.end` bytes, and it will only refer
+    /// to the bytes that come after the drain range.
+    ///
+    /// This is an `O(1)` operation which does not involve copies.
+    pub fn drain(&mut self, range: RangeTo<usize>) -> MmapVec {
+        let amt = range.end;
+        assert!(amt <= (self.range.end - self.range.start));
+
+        // Create a new `MmapVec` which refers to the same underlying mmap, but
+        // has a disjoint range from ours. Our own range is adjusted to be
+        // disjoint just after `ret` is created.
+        let ret = MmapVec {
+            mmap: self.mmap.clone(),
+            range: self.range.start..self.range.start + amt,
+        };
+        self.range.start += amt;
+        return ret;
+    }
+}
+
+impl Deref for MmapVec {
+    type Target = [u8];
+
+    fn deref(&self) -> &[u8] {
+        &self.mmap.as_slice()[self.range.clone()]
+    }
+}
+
+impl DerefMut for MmapVec {
+    fn deref_mut(&mut self) -> &mut [u8] {
+        // SAFETY: The underlying mmap is protected behind an `Arc` which means
+        // there there can be many references to it. We are guaranteed, though,
+        // that each reference to the underlying `mmap` has a disjoint `range`
+        // listed that it can access. This means that despite having shared
+        // access to the mmap itself we have exclusive ownership of the bytes
+        // specified in `self.range`. This should allow us to safely hand out
+        // mutable access to these bytes if so desired.
+        unsafe {
+            let slice = std::slice::from_raw_parts_mut(self.mmap.as_mut_ptr(), self.mmap.len());
+            &mut slice[self.range.clone()]
+        }
+    }
+}
+
+/// Helper struct to implement the `WritableBuffer` trait from the `object`
+/// crate.
+///
+/// This enables writing an object directly into an mmap'd memory so it's
+/// immediately usable for execution after compilation. This implementation
+/// relies on a call to `reserve` happening once up front with all the needed
+/// data, and the mmap internally does not attempt to grow afterwards.
+#[derive(Default)]
+struct ObjectMmap {
+    mmap: Option<MmapVec>,
+    len: usize,
+    err: Option<Error>,
+}
+
+impl WritableBuffer for ObjectMmap {
+    fn len(&self) -> usize {
+        self.len
+    }
+
+    fn reserve(&mut self, additional: usize) -> Result<(), ()> {
+        assert!(self.mmap.is_none(), "cannot reserve twice");
+        self.mmap = match MmapVec::with_capacity(additional) {
+            Ok(mmap) => Some(mmap),
+            Err(e) => {
+                self.err = Some(e);
+                return Err(());
+            }
+        };
+        Ok(())
+    }
+
+    fn resize(&mut self, new_len: usize, value: u8) {
+        if new_len <= self.len {
+            return;
+        }
+        let mmap = self.mmap.as_mut().expect("write before reserve");
+
+        // new mmaps are automatically filled with zeros, so if we're asked to
+        // fill with zeros then we can skip the actual fill step.
+        if value != 0 {
+            mmap[self.len..][..new_len - self.len].fill(value);
+        }
+        self.len = new_len;
+    }
+
+    fn write_bytes(&mut self, val: &[u8]) {
+        let mmap = self.mmap.as_mut().expect("write before reserve");
+        mmap[self.len..][..val.len()].copy_from_slice(val);
+        self.len += val.len();
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::MmapVec;
+
+    #[test]
+    fn smoke() {
+        let mut mmap = MmapVec::with_capacity(10).unwrap();
+        assert_eq!(mmap.len(), 10);
+        assert_eq!(&mmap[..], &[0; 10]);
+
+        mmap[0] = 1;
+        mmap[2] = 3;
+        assert!(mmap.get(10).is_none());
+        assert_eq!(mmap[0], 1);
+        assert_eq!(mmap[2], 3);
+    }
+
+    #[test]
+    fn drain() {
+        let mut mmap = MmapVec::from_slice(&[1, 2, 3, 4]).unwrap();
+        assert_eq!(mmap.len(), 4);
+        assert!(mmap.drain(..0).is_empty());
+        assert_eq!(mmap.len(), 4);
+        let one = mmap.drain(..1);
+        assert_eq!(one.len(), 1);
+        assert_eq!(one[0], 1);
+        assert_eq!(mmap.len(), 3);
+        assert_eq!(&mmap[..], &[2, 3, 4]);
+        drop(one);
+        assert_eq!(mmap.len(), 3);
+
+        let two = mmap.drain(..2);
+        assert_eq!(two.len(), 2);
+        assert_eq!(two[0], 2);
+        assert_eq!(two[1], 3);
+        assert_eq!(mmap.len(), 1);
+        assert_eq!(mmap[0], 4);
+        drop(two);
+        assert!(mmap.drain(..0).is_empty());
+        assert!(mmap.drain(..1).len() == 1);
+        assert!(mmap.is_empty());
+        assert!(mmap.drain(..0).is_empty());
+    }
+}
diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml
index 76891f9361..006b65bcc5 100644
--- a/crates/wasmtime/Cargo.toml
+++ b/crates/wasmtime/Cargo.toml
@@ -38,6 +38,7 @@ paste = "1.0.3"
 psm = "0.1.11"
 lazy_static = "1.4"
 rayon = { version = "1.0", optional = true }
+object = { version = "0.26", default-features = false, features = ['read_core', 'elf'] }
 
 [target.'cfg(target_os = "windows")'.dependencies]
 winapi = "0.3.7"
diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs
index 9a64658237..7cd7348d16 100644
--- a/crates/wasmtime/src/module.rs
+++ b/crates/wasmtime/src/module.rs
@@ -10,7 +10,7 @@ use std::path::Path;
 use std::sync::Arc;
 use wasmparser::Validator;
 use wasmtime_environ::{ModuleEnvironment, ModuleIndex, PrimaryMap};
-use wasmtime_jit::{CompilationArtifacts, CompiledModule, CompiledModuleInfo, TypeTables};
+use wasmtime_jit::{CompiledModule, CompiledModuleInfo, MmapVec, TypeTables};
 
 mod registry;
 mod serialization;
@@ -299,21 +299,44 @@ impl Module {
 
         cfg_if::cfg_if! {
             if #[cfg(feature = "cache")] {
+                let state = (HashedEngineCompileEnv(engine), binary);
                 let (main_module, artifacts, types) = wasmtime_cache::ModuleCacheEntry::new(
                     "wasmtime",
                     engine.cache_config(),
                 )
-                .get_data((HashedEngineCompileEnv(engine), binary), |(engine, binary)| {
-                    Module::build_artifacts(engine.0, binary)
-                })?;
+                .get_data_raw(
+                    &state,
+
+                    // Cache miss, compute the actual artifacts
+                    |(engine, wasm)| Module::build_artifacts(engine.0, wasm),
+
+                    // Implementation of how to serialize artifacts
+                    |(engine, _wasm), (_, artifacts, types)| {
+                        SerializedModule::from_artifacts(
+                            engine.0,
+                            artifacts.iter().map(|p| &p.0),
+                            types,
+                        ).to_bytes().ok()
+                    },
+
+                    // Cache hit, deserialize the provided artifacts
+                    |(engine, _wasm), serialized_bytes| {
+                        let (i, m, t, upvars) = SerializedModule::from_bytes(&serialized_bytes, true)
+                            .ok()?
+                            .into_parts(engine.0)
+                            .ok()?;
+                        // This upvars list is always empty for top-level modules
+                        assert!(upvars.is_empty());
+                        Some((i, m, t))
+                    },
+                )?;
             } else {
-                let (main_module, artifacts, types) =
-                    Module::build_artifacts(engine, binary)?;
+                let (main_module, artifacts, types) = Module::build_artifacts(engine, binary)?;
             }
         };
 
-        let modules = engine.run_maybe_parallel(artifacts, |(a, i)| {
-            CompiledModule::from_artifacts(a, Some(i), &*engine.config().profiler)
+        let modules = engine.run_maybe_parallel(artifacts, |(a, b)| {
+            CompiledModule::from_artifacts(a, b, &*engine.config().profiler)
         })?;
 
         Self::from_parts(engine, modules, main_module, Arc::new(types), &[])
@@ -329,9 +352,10 @@ impl Module {
     /// * The index into the second field of the "main module". The "main
     ///   module" in this case is the outermost module described by the `wasm`
     ///   input, and is here for the module linking proposal.
-    /// * A list of `CompilationArtifacts` for each module found within `wasm`.
+    /// * A list of compilation artifacts for each module found within `wasm`.
     ///   Note that if module linking is disabled then this list will always
-    ///   have a size of exactly 1.
+    ///   have a size of exactly 1. These pairs are returned by
+    ///   `wasmtime_jit::finish_compile`.
     /// * Type information about all the modules returned. All returned modules
     ///   have local type information with indices that refer to these returned
     ///   tables.
@@ -341,7 +365,7 @@ impl Module {
         wasm: &[u8],
     ) -> Result<(
         usize,
-        Vec<(CompilationArtifacts, CompiledModuleInfo)>,
+        Vec<(MmapVec, Option<CompiledModuleInfo>)>,
         TypeTables,
     )> {
         let tunables = &engine.config().tunables;
@@ -388,12 +412,8 @@ impl Module {
                 translation.try_paged_init();
             }
 
-            Ok(CompilationArtifacts::new(
-                translation,
-                obj,
-                funcs,
-                tunables,
-            )?)
+            let (mmap, info) = wasmtime_jit::finish_compile(translation, obj, funcs, tunables)?;
+            Ok((mmap, Some(info)))
         })?;
 
         Ok((
diff --git a/crates/wasmtime/src/module/serialization.rs b/crates/wasmtime/src/module/serialization.rs
index 7c9fa725a9..128ff13f13 100644
--- a/crates/wasmtime/src/module/serialization.rs
+++ b/crates/wasmtime/src/module/serialization.rs
@@ -1,28 +1,67 @@
 //! Implements module serialization.
+//!
+//! This module implements the serialization format for `wasmtime::Module`.
+//! This includes both the binary format of the final artifact as well as
+//! validation on ingestion of artifacts.
+//!
+//! There are two main pieces of data associated with a binary artifact:
+//!
+//! 1. A list of compiled modules. The reason this is a list as opposed to one
+//!    singular module is that a module-linking module may encompass a number
+//!    of other modules.
+//! 2. Compilation metadata shared by all modules, including the global
+//!    `TypeTables` information. This metadata is validated for compilation
+//!    settings and also has information shared by all modules (such as the
+//!    shared `TypeTables`).
+//!
+//! Compiled modules are, at this time, represented as an ELF file. This ELF
+//! file contains all the necessary data needed to decode each individual
+//! module, and conveniently also handles things like alignment so we can
+//! actually directly `mmap` compilation artifacts from disk.
+//!
+//! With all this in mind, the current serialization format is as follows:
+//!
+//! * The first, primary, module starts the final artifact. This means that the
+//!   final artifact is actually, and conveniently, a valid ELF file. ELF files
+//!   don't place any restrictions on data coming after the ELF file itself,
+//!   so that's where everything else will go. Another reason for using this
+//!   format is that our compilation artifacts are then consumable by standard
+//!   debugging tools like `objdump` to poke around and see what's what.
+//!
+//! * Next, all other modules are encoded. Each module has its own alignment,
+//!   though, so modules aren't simply concatenated. Instead directly after an
+//!   ELF file there is a 64-bit little-endian integer which is the offset,
+//!   from the end of the previous ELF file, to the next ELF file.
+//!
+//! * Finally, once all modules have been encoded (there's always at least
+//!   one), the 8-byte value `u64::MAX` is encoded. Following this is a
+//!   number of fields:
+//!
+//!   1. The `HEADER` value
+//!   2. A byte indicating how long the next field is
+//!   3. A version string of the length of the previous byte value
+//!   4. A `bincode`-encoded `Metadata` structure.
+//!
+//!   This is hoped to help distinguish easily Wasmtime-based ELF files from
+//!   other random ELF files, as well as provide better error messages for
+//!   using wasmtime artifacts across versions.
+//!
+//! This format is implemented by the `to_bytes` and `from_mmap` function.
 
 use crate::{Engine, Module};
 use anyhow::{anyhow, bail, Context, Result};
-use bincode::Options;
+use object::read::elf::FileHeader;
+use object::{Bytes, File, Object, ObjectSection};
 use serde::{Deserialize, Serialize};
 use std::collections::BTreeMap;
+use std::convert::TryFrom;
 use std::str::FromStr;
 use std::sync::Arc;
 use wasmtime_environ::{Compiler, FlagValue, Tunables};
-use wasmtime_jit::{CompilationArtifacts, CompiledModule, TypeTables};
+use wasmtime_jit::{subslice_range, CompiledModule, CompiledModuleInfo, MmapVec, TypeTables};
 
 const HEADER: &[u8] = b"\0wasmtime-aot";
 
-fn bincode_options() -> impl Options {
-    // Use a variable-length integer encoding instead of fixed length. The
-    // module shown on #2318 gets compressed from ~160MB to ~110MB simply using
-    // this, presumably because there's a lot of 8-byte integers which generally
-    // have small values. Local testing shows that the deserialization
-    // performance, while higher, is in the few-percent range. For huge size
-    // savings this seems worthwhile to lose a small percentage of
-    // deserialization performance.
-    bincode::DefaultOptions::new().with_varint_encoding()
-}
-
 // This exists because `wasmparser::WasmFeatures` isn't serializable
 #[derive(Debug, Copy, Clone, Serialize, Deserialize)]
 struct WasmFeatures {
@@ -78,6 +117,12 @@ enum MyCow<'a, T> {
 }
 
 impl<'a, T> MyCow<'a, T> {
+    fn as_ref(&self) -> &T {
+        match self {
+            MyCow::Owned(val) => val,
+            MyCow::Borrowed(val) => val,
+        }
+    }
     fn unwrap_owned(self) -> T {
         match self {
             MyCow::Owned(val) => val,
@@ -149,14 +194,18 @@ impl SerializedModuleUpvar {
     }
 }
 
-#[derive(Serialize, Deserialize)]
 pub struct SerializedModule<'a> {
+    artifacts: Vec<MyCow<'a, MmapVec>>,
+    metadata: Metadata<'a>,
+}
+
+#[derive(Serialize, Deserialize)]
+struct Metadata<'a> {
     target: String,
     shared_flags: BTreeMap<String, FlagValue>,
     isa_flags: BTreeMap<String, FlagValue>,
     tunables: Tunables,
     features: WasmFeatures,
-    artifacts: Vec<MyCow<'a, CompilationArtifacts>>,
     module_upvars: Vec<SerializedModuleUpvar>,
     types: MyCow<'a, TypeTables>,
 }
@@ -168,10 +217,8 @@ impl<'a> SerializedModule<'a> {
             .inner
             .artifact_upvars
             .iter()
-            .map(|m| MyCow::Borrowed(m.compilation_artifacts()))
-            .chain(Some(MyCow::Borrowed(
-                module.inner.module.compilation_artifacts(),
-            )))
+            .map(|m| MyCow::Borrowed(m.mmap()))
+            .chain(Some(MyCow::Borrowed(module.inner.module.mmap())))
             .collect::<Vec<_>>();
         let module_upvars = module
             .inner
@@ -191,12 +238,12 @@ impl<'a> SerializedModule<'a> {
     #[cfg(compiler)]
     pub fn from_artifacts(
         engine: &Engine,
-        artifacts: &'a Vec<CompilationArtifacts>,
+        artifacts: impl IntoIterator<Item = &'a MmapVec>,
         types: &'a TypeTables,
     ) -> Self {
         Self::with_data(
             engine,
-            artifacts.iter().map(MyCow::Borrowed).collect(),
+            artifacts.into_iter().map(MyCow::Borrowed).collect(),
             Vec::new(),
             MyCow::Borrowed(types),
         )
@@ -205,23 +252,42 @@ impl<'a> SerializedModule<'a> {
     #[cfg(compiler)]
     fn with_data(
         engine: &Engine,
-        artifacts: Vec<MyCow<'a, CompilationArtifacts>>,
+        artifacts: Vec<MyCow<'a, MmapVec>>,
         module_upvars: Vec<SerializedModuleUpvar>,
         types: MyCow<'a, TypeTables>,
     ) -> Self {
         Self {
-            target: engine.compiler().triple().to_string(),
-            shared_flags: engine.compiler().flags(),
-            isa_flags: engine.compiler().isa_flags(),
-            tunables: engine.config().tunables.clone(),
-            features: (&engine.config().features).into(),
             artifacts,
-            module_upvars,
-            types,
+            metadata: Metadata {
+                target: engine.compiler().triple().to_string(),
+                shared_flags: engine.compiler().flags(),
+                isa_flags: engine.compiler().isa_flags(),
+                tunables: engine.config().tunables.clone(),
+                features: (&engine.config().features).into(),
+                module_upvars,
+                types,
+            },
         }
     }
 
-    pub fn into_module(mut self, engine: &Engine) -> Result<Module> {
+    pub fn into_module(self, engine: &Engine) -> Result<Module> {
+        let (main_module, modules, types, upvars) = self.into_parts(engine)?;
+        let modules = engine.run_maybe_parallel(modules, |(i, m)| {
+            CompiledModule::from_artifacts(i, m, &*engine.config().profiler)
+        })?;
+
+        Module::from_parts(engine, modules, main_module, Arc::new(types), &upvars)
+    }
+
+    pub fn into_parts(
+        mut self,
+        engine: &Engine,
+    ) -> Result<(
+        usize,
+        Vec<(MmapVec, Option<CompiledModuleInfo>)>,
+        TypeTables,
+        Vec<SerializedModuleUpvar>,
+    )> {
         // Verify that the module we're loading matches the triple that `engine`
         // is configured for. If compilation is disabled within engine then the
         // assumed triple is the host itself.
@@ -245,64 +311,106 @@ impl<'a> SerializedModule<'a> {
         self.check_tunables(&engine.config().tunables)?;
         self.check_features(&engine.config().features)?;
 
-        let modules = engine.run_maybe_parallel(self.artifacts, |i| {
-            CompiledModule::from_artifacts(i.unwrap_owned(), None, &*engine.config().profiler)
-        })?;
-
-        assert!(!modules.is_empty());
+        assert!(!self.artifacts.is_empty());
+        let modules = self.artifacts.into_iter().map(|i| (i.unwrap_owned(), None));
 
         let main_module = modules.len() - 1;
 
-        Module::from_parts(
-            engine,
-            modules,
+        Ok((
             main_module,
-            Arc::new(self.types.unwrap_owned()),
-            &self.module_upvars,
-        )
+            modules.collect(),
+            self.metadata.types.unwrap_owned(),
+            self.metadata.module_upvars,
+        ))
     }
 
     pub fn to_bytes(&self) -> Result<Vec<u8>> {
-        use std::io::Write;
+        // First up, create a linked-ish list of ELF files. For more
+        // information on this format, see the doc comment on this module.
+        // The only semi-tricky bit here is that we leave an
+        // offset-to-the-next-file between each set of ELF files. The list
+        // is then terminated with `u64::MAX`.
+        let mut ret = Vec::new();
+        for (i, obj) in self.artifacts.iter().enumerate() {
+            // Anything after the first object needs to respect the alignment of
+            // the object's sections, so insert padding as necessary. Note that
+            // the +8 to the length here is to accomodate the size we'll write
+            // to get to the next object.
+            if i > 0 {
+                let obj = File::parse(&obj.as_ref()[..])?;
+                let align = obj.sections().map(|s| s.align()).max().unwrap_or(0).max(1);
+                let align = usize::try_from(align).unwrap();
+                let new_size = align_to(ret.len() + 8, align);
+                ret.extend_from_slice(&(new_size as u64).to_le_bytes());
+                ret.resize(new_size, 0);
+            }
+            ret.extend_from_slice(obj.as_ref());
+        }
+        ret.extend_from_slice(&[0xff; 8]);
 
-        let mut bytes = Vec::new();
-
-        bytes.write_all(HEADER)?;
-
-        // Preface the data with a version so we can do a version check independent
-        // of the serialized data.
+        // The last part of our artifact is the bincode-encoded `Metadata`
+        // section with a few other guards to help give better error messages.
+        ret.extend_from_slice(HEADER);
         let version = env!("CARGO_PKG_VERSION");
         assert!(
             version.len() < 256,
             "package version must be less than 256 bytes"
         );
-        bytes.write(&[version.len() as u8])?;
+        ret.push(version.len() as u8);
+        ret.extend_from_slice(version.as_bytes());
+        bincode::serialize_into(&mut ret, &self.metadata)?;
 
-        bytes.write_all(version.as_bytes())?;
-
-        bincode_options().serialize_into(&mut bytes, self)?;
-
-        Ok(bytes)
+        Ok(ret)
     }
 
     pub fn from_bytes(bytes: &[u8], check_version: bool) -> Result<Self> {
-        if !bytes.starts_with(HEADER) {
-            bail!("bytes are not a compatible serialized wasmtime module");
-        }
+        Self::from_mmap(MmapVec::from_slice(bytes)?, check_version)
+    }
 
-        let bytes = &bytes[HEADER.len()..];
+    pub fn from_mmap(mut mmap: MmapVec, check_version: bool) -> Result<Self> {
+        // Artifacts always start with an ELF file, so read that first.
+        // Afterwards we continually read ELF files until we see the `u64::MAX`
+        // marker, meaning we've reached the end.
+        let first_module = read_file(&mut mmap)?;
+        let mut pos = first_module.len();
+        let mut artifacts = vec![MyCow::Owned(first_module)];
 
-        if bytes.is_empty() {
+        let metadata = loop {
+            if mmap.len() < 8 {
+                bail!("invalid serialized data");
+            }
+            let next_file_start = u64::from_le_bytes([
+                mmap[0], mmap[1], mmap[2], mmap[3], mmap[4], mmap[5], mmap[6], mmap[7],
+            ]);
+            if next_file_start == u64::MAX {
+                mmap.drain(..8);
+                break mmap;
+            }
+
+            // Remove padding leading up to the next file
+            let next_file_start = usize::try_from(next_file_start).unwrap();
+            let _padding = mmap.drain(..next_file_start - pos);
+            let data = read_file(&mut mmap)?;
+            pos = next_file_start + data.len();
+            artifacts.push(MyCow::Owned(data));
+        };
+
+        // Once we've reached the end we parse a `Metadata` object. This has a
+        // few guards up front which we process first, and eventually this
+        // bottoms out in a `bincode::deserialize` call.
+        let metadata = metadata
+            .strip_prefix(HEADER)
+            .ok_or_else(|| anyhow!("bytes are not a compatible serialized wasmtime module"))?;
+        if metadata.is_empty() {
             bail!("serialized data data is empty");
         }
-
-        let version_len = bytes[0] as usize;
-        if bytes.len() < version_len + 1 {
+        let version_len = metadata[0] as usize;
+        if metadata.len() < version_len + 1 {
             bail!("serialized data is malformed");
         }
 
         if check_version {
-            let version = std::str::from_utf8(&bytes[1..1 + version_len])?;
+            let version = std::str::from_utf8(&metadata[1..1 + version_len])?;
             if version != env!("CARGO_PKG_VERSION") {
                 bail!(
                     "Module was compiled with incompatible Wasmtime version '{}'",
@@ -311,13 +419,47 @@ impl<'a> SerializedModule<'a> {
             }
         }
 
-        Ok(bincode_options()
-            .deserialize::<SerializedModule<'_>>(&bytes[1 + version_len..])
-            .context("deserialize compilation artifacts")?)
+        let metadata = bincode::deserialize::<Metadata>(&metadata[1 + version_len..])
+            .context("deserialize compilation artifacts")?;
+
+        return Ok(SerializedModule {
+            artifacts,
+            metadata,
+        });
+
+        /// This function will drain the beginning contents of `mmap` which
+        /// correspond to an ELF object file. The ELF file is only very lightly
+        /// validated.
+        ///
+        /// The `mmap` passed in will be reset to just after the ELF file, and
+        /// the `MmapVec` returned represents the extend of the ELF file
+        /// itself.
+        fn read_file(mmap: &mut MmapVec) -> Result<MmapVec> {
+            use object::NativeEndian as NE;
+            // There's not actually a great utility for figuring out where
+            // the end of an ELF file is in the `object` crate. In lieu of that
+            // we build our own which leverages the format of ELF files, which
+            // is that the header comes first, that tells us where the section
+            // headers are, and for our ELF files the end of the file is the
+            // end of the section headers.
+            let mut bytes = Bytes(mmap);
+            let header = bytes
+                .read::<object::elf::FileHeader64<NE>>()
+                .map_err(|()| anyhow!("artifact truncated, can't read header"))?;
+            if !header.is_supported() {
+                bail!("invalid elf header");
+            }
+            let sections = header
+                .section_headers(NE, &mmap[..])
+                .context("failed to read section headers")?;
+            let range = subslice_range(object::bytes_of_slice(sections), mmap);
+            Ok(mmap.drain(..range.end))
+        }
     }
 
     fn check_triple(&self, other: &target_lexicon::Triple) -> Result<()> {
-        let triple = target_lexicon::Triple::from_str(&self.target).map_err(|e| anyhow!(e))?;
+        let triple =
+            target_lexicon::Triple::from_str(&self.metadata.target).map_err(|e| anyhow!(e))?;
 
         if triple.architecture != other.architecture {
             bail!(
@@ -337,7 +479,7 @@ impl<'a> SerializedModule<'a> {
     }
 
     fn check_shared_flags(&mut self, compiler: &dyn Compiler) -> Result<()> {
-        let mut shared_flags = std::mem::take(&mut self.shared_flags);
+        let mut shared_flags = std::mem::take(&mut self.metadata.shared_flags);
         for (name, host) in compiler.flags() {
             match shared_flags.remove(&name) {
                 Some(v) => {
@@ -360,7 +502,7 @@ impl<'a> SerializedModule<'a> {
     }
 
     fn check_isa_flags(&mut self, compiler: &dyn Compiler) -> Result<()> {
-        let mut isa_flags = std::mem::take(&mut self.isa_flags);
+        let mut isa_flags = std::mem::take(&mut self.metadata.isa_flags);
         for (name, host) in compiler.isa_flags() {
             match isa_flags.remove(&name) {
                 Some(v) => match (&v, &host) {
@@ -432,7 +574,7 @@ impl<'a> SerializedModule<'a> {
 
             // This doesn't affect compilation, it's just a runtime setting.
             dynamic_memory_growth_reserve: _,
-        } = self.tunables;
+        } = self.metadata.tunables;
 
         Self::check_int(
             static_memory_bound,
@@ -488,7 +630,7 @@ impl<'a> SerializedModule<'a> {
             multi_memory,
             exceptions,
             memory64,
-        } = self.features;
+        } = self.metadata.features;
 
         Self::check_bool(
             reference_types,
@@ -538,6 +680,12 @@ impl<'a> SerializedModule<'a> {
     }
 }
 
+/// Aligns the `val` specified up to `align`, which must be a power of two
+fn align_to(val: usize, align: usize) -> usize {
+    debug_assert!(align.is_power_of_two());
+    (val + (align - 1)) & (!(align - 1))
+}
+
 #[cfg(test)]
 mod test {
     use super::*;
@@ -550,7 +698,7 @@ mod test {
         let module = Module::new(&engine, "(module)")?;
 
         let mut serialized = SerializedModule::new(&module);
-        serialized.target = "unknown-generic-linux".to_string();
+        serialized.metadata.target = "unknown-generic-linux".to_string();
 
         match serialized.into_module(&engine) {
             Ok(_) => unreachable!(),
@@ -569,7 +717,7 @@ mod test {
         let module = Module::new(&engine, "(module)")?;
 
         let mut serialized = SerializedModule::new(&module);
-        serialized.target = format!(
+        serialized.metadata.target = format!(
             "{}-generic-unknown",
             target_lexicon::Triple::host().architecture
         );
@@ -591,7 +739,7 @@ mod test {
         let module = Module::new(&engine, "(module)")?;
 
         let mut serialized = SerializedModule::new(&module);
-        serialized.shared_flags.insert(
+        serialized.metadata.shared_flags.insert(
             "opt_level".to_string(),
             FlagValue::Enum(Cow::Borrowed("none")),
         );
@@ -615,6 +763,7 @@ mod test {
         let mut serialized = SerializedModule::new(&module);
 
         serialized
+            .metadata
             .isa_flags
             .insert("not_a_flag".to_string(), FlagValue::Bool(true));
 
@@ -636,7 +785,7 @@ mod test {
         let module = Module::new(&engine, "(module)")?;
 
         let mut serialized = SerializedModule::new(&module);
-        serialized.strategy = CompilationStrategy::Lightbeam;
+        serialized.metadata.strategy = CompilationStrategy::Lightbeam;
 
         match serialized.into_module(&engine) {
             Ok(_) => unreachable!(),
@@ -655,7 +804,7 @@ mod test {
         let module = Module::new(&engine, "(module)")?;
 
         let mut serialized = SerializedModule::new(&module);
-        serialized.tunables.static_memory_offset_guard_size = 0;
+        serialized.metadata.tunables.static_memory_offset_guard_size = 0;
 
         match serialized.into_module(&engine) {
             Ok(_) => unreachable!(),
@@ -674,7 +823,7 @@ mod test {
         let module = Module::new(&engine, "(module)")?;
 
         let mut serialized = SerializedModule::new(&module);
-        serialized.tunables.interruptable = false;
+        serialized.metadata.tunables.interruptable = false;
 
         match serialized.into_module(&engine) {
             Ok(_) => unreachable!(),
@@ -691,7 +840,7 @@ mod test {
         let module = Module::new(&engine, "(module)")?;
 
         let mut serialized = SerializedModule::new(&module);
-        serialized.tunables.interruptable = true;
+        serialized.metadata.tunables.interruptable = true;
 
         match serialized.into_module(&engine) {
             Ok(_) => unreachable!(),
@@ -713,7 +862,7 @@ mod test {
         let module = Module::new(&engine, "(module)")?;
 
         let mut serialized = SerializedModule::new(&module);
-        serialized.features.simd = false;
+        serialized.metadata.features.simd = false;
 
         match serialized.into_module(&engine) {
             Ok(_) => unreachable!(),
@@ -727,7 +876,7 @@ mod test {
         let module = Module::new(&engine, "(module)")?;
 
         let mut serialized = SerializedModule::new(&module);
-        serialized.features.simd = true;
+        serialized.metadata.features.simd = true;
 
         match serialized.into_module(&engine) {
             Ok(_) => unreachable!(),
diff --git a/tests/all/module_serialize.rs b/tests/all/module_serialize.rs
index e444487a9e..3c67ebf99f 100644
--- a/tests/all/module_serialize.rs
+++ b/tests/all/module_serialize.rs
@@ -15,7 +15,9 @@ unsafe fn deserialize_and_instantiate(store: &mut Store<()>, buffer: &[u8]) -> R
 fn test_version_mismatch() -> Result<()> {
     let engine = Engine::default();
     let mut buffer = serialize(&engine, "(module)")?;
-    buffer[13 /* header length */ + 1 /* version length */] = 'x' as u8;
+    const HEADER: &[u8] = b"\0wasmtime-aot";
+    let pos = memchr::memmem::rfind_iter(&buffer, HEADER).next().unwrap();
+    buffer[pos + HEADER.len() + 1 /* version length */] = 'x' as u8;
 
     match unsafe { Module::deserialize(&engine, &buffer) } {
         Ok(_) => bail!("expected deserialization to fail"),