Update memfd image construction to avoid excessively large images (#3819)

* Update memfd image construction to avoid excessively large images

Previously memfd-based image construction had a hard limit of a 1GB
memory image but this mean that tiny wasm modules could allocate up to
1GB of memory which became a bit excessive especially in terms of memory
usage during fuzzing. To fix this the conversion to a static memory
image has been updated to first do a conversion to paged memory
initialization, which is sparse, followed by a second conversion to
static memory initialization.

The sparse construction for the paged step should make it such that the
upper/lower bounds of the initialization image are easily computed, and
then afterwards this limit can be checked against some heuristics to
determine if we're willing to commit to building up a whole static image
for that module. The heuristics have been tweaked from "must be less
than 1GB" to one of two conditions must be true:

* Either the total memory image size is at most twice the size of the
  original paged data itself.

* Otherwise the memory image size must be smaller than a reasonable
  threshold, currently 1MB.

We'll likely need to tweak this over time and it's still possible to
cause a lot of extra memory consumption, but for now this should be
enough to appease the fuzzers.

Closes #3815

* Review comments
This commit is contained in:
Alex Crichton
2022-02-17 10:37:17 -06:00
committed by GitHub
parent 9c3d5c7b9f
commit b62fe21914

View File

@@ -6,6 +6,7 @@ use indexmap::IndexMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::convert::TryFrom; use std::convert::TryFrom;
use std::mem;
use std::ops::Range; use std::ops::Range;
use wasmtime_types::*; use wasmtime_types::*;
@@ -297,6 +298,11 @@ impl ModuleTranslation<'_> {
for (memory, pages) in page_contents { for (memory, pages) in page_contents {
let mut page_offsets = Vec::with_capacity(pages.len()); let mut page_offsets = Vec::with_capacity(pages.len());
for (page_index, page) in pages { for (page_index, page) in pages {
// Skip entirely zero pages since they don't need to participate
// in initialization.
if page.iter().all(|b| *b == 0) {
continue;
}
let end = offset + (page.len() as u32); let end = offset + (page.len() as u32);
page_offsets.push(StaticMemoryInitializer { page_offsets.push(StaticMemoryInitializer {
offset: page_index * u64::from(WASM_PAGE_SIZE), offset: page_index * u64::from(WASM_PAGE_SIZE),
@@ -317,147 +323,125 @@ impl ModuleTranslation<'_> {
/// Note that the constraints for `Paged` are the same as those for /// Note that the constraints for `Paged` are the same as those for
/// `Static`. /// `Static`.
pub fn try_static_init(&mut self, page_size: u64) { pub fn try_static_init(&mut self, page_size: u64) {
const MAX_IMAGE_SIZE: usize = 1024 * 1024 * 1024; // limit to 1GiB. // First try to switch this memory initialization to the `Paged`
// variant, if it isn't already. This will perform static bounds checks
// and everything and massage it all into a format which is a bit easier
// to process here.
self.try_paged_init();
let map = match &mut self.module.memory_initialization {
MemoryInitialization::Paged { map } => map,
_ => return,
};
// This method only attempts to transform a a `Segmented` memory init // Maximum size, in bytes, of an initialization image which is
// into a `Static` one, no other state. // unconditionally allowed regardless of the input module's size and
if !self.module.memory_initialization.is_segmented() { // properties. This is chosen to be modestly small to allow useful cases
// to use an initialization image but not too large that if every module
// ran up to the limit here it shouldn't cause a problem.
const MAX_IMAGE_SIZE_ALWAYS_ALLOWED: u64 = 1 << 20; // 1 MB
let memory_init_size = |pages: &[StaticMemoryInitializer]| {
if pages.len() == 0 {
return 0;
}
let first = &pages[0];
let last = &pages[pages.len() - 1];
last.offset - first.offset + (last.data.len() as u64)
};
// Perform a check, on all memories, that the memory initialization is
// compatible with static memory initialization. The main concern here
// is that construction of the memory image shouldn't consume excessive
// resources here during compilation. At this point we're already using
// paged initialization so we're theoretically using O(data size)
// memory already, and we don't want to use excessively more than that
// during image construction. Some heuristics are applied here to see if
// they're compatible.
let mut data = self.data.as_slice();
for (_memory_index, pages) in map.iter() {
let (memory_data, rest) = data.split_at(pages.len());
data = rest;
// Calculate the total size of data used to initialized this memory
// (the sum of all the page sizes), and then also calculate the
// actual memory initialization size assuming it's initialized in
// one whole chunk in one whole go.
let data_size = memory_data.iter().map(|d| d.len()).sum::<usize>() as u64;
let memory_init_size = memory_init_size(pages);
// If the range of memory being initialized is less than twice the
// total size of the data itself then it's assumed that static
// initialization is ok. This means we'll at most double memory
// consumption during the memory image creation process, which is
// currently assumed to "probably be ok" but this will likely need
// tweaks over time.
if memory_init_size < data_size.saturating_mul(2) {
continue;
}
// If the memory initialization image is larger than the size of all
// data, then we still allow memory initialization if the image will
// be of a relatively modest size, such as 1MB here.
if memory_init_size < MAX_IMAGE_SIZE_ALWAYS_ALLOWED {
continue;
}
// At this point memory initialization is concluded to be too
// expensive to do at compile time so it's entirely deferred to
// happen at runtime.
return; return;
} }
let page_align = |x: u64| x & !(page_size - 1); assert!(data.is_empty());
let page_align_up = |x: u64| page_align(x + page_size - 1);
// First build up an in-memory image for each memory. This in-memory // Here's where we've now committed to changing to static memory. The
// representation is discarded if the memory initializers aren't "of // memory initialization image is built here from the page data and then
// the right shape" where the desired shape is: // it's converted to a single initializer.
// let data = mem::replace(&mut self.data, Vec::new());
// * Only initializers for defined memories. let mut data = data.iter();
// * Only initializers with static offsets (no globals). let mut image_map = PrimaryMap::with_capacity(map.len());
// * Only in-bound initializers.
//
// The `init_memory` method of `MemoryInitialization` is used here to
// do most of the validation for us, and otherwise the data chunks are
// collected into the `images` array here.
let mut images: PrimaryMap<MemoryIndex, Vec<u8>> =
PrimaryMap::with_capacity(self.module.memory_plans.len());
for _ in 0..self.module.memory_plans.len() {
images.push(Vec::new());
}
let mut data = self.data.iter();
let ok = self.module.memory_initialization.init_memory(
InitMemory::CompileTime(&self.module),
&mut |memory, init| {
let data = data.next().unwrap();
assert_eq!(data.len(), init.data.len());
// Static initialization with only one memcpy is only possible
// for defined memories which have a known-starting-as-zero
// state to account for holes between data segments. This means
// that if this is an imported memory then static memory
// initialization isn't possible.
if self.module.defined_memory_index(memory).is_none() {
return false;
}
// Splat the `data_range` into the `image` for this memory,
// updating it as necessary with 0s for holes and such.
let image = &mut images[memory];
let offset = usize::try_from(init.offset).unwrap();
let new_image_len = offset + data.len();
if image.len() < new_image_len {
if new_image_len > MAX_IMAGE_SIZE {
return false;
}
image.resize(new_image_len, 0);
}
image[offset..][..data.len()].copy_from_slice(data);
true
},
);
// If any initializer wasn't applicable then we skip static init
// entirely.
if !ok {
return;
}
// At this point all memories in this module are initialized with
// in-bounds initializers which are all statically known to be valid.
// This means that the memory `images` built up so far are valid for an
// instance of this linear memory. These images are trimmed of their
// leading and trailing zeros and then `self.data` is re-populated with
// new data.
self.data.clear();
assert!(self.data_align.is_none());
self.data_align = Some(page_size);
let mut map = PrimaryMap::with_capacity(images.len());
let mut offset = 0u32; let mut offset = 0u32;
for (memory, mut image) in images { for (memory_index, pages) in map.iter() {
// Find the first nonzero byte, and if all the bytes are zero then // Allocate the memory image and then fill it in with data. Note
// we can skip initialization of this memory entirely since memories // that `pages` should be sorted in increasing order of offsets.
// otherwise start with all zero bytes. let capacity = usize::try_from(memory_init_size(pages)).unwrap();
let nonzero_start = match image.iter().position(|b| *b != 0) { let mut image = Vec::with_capacity(capacity);
Some(i) => i as u64, for page in pages {
None => { let image_offset = usize::try_from(page.offset - pages[0].offset).unwrap();
map.push(None); assert!(image.len() <= image_offset);
continue; image.resize(image_offset, 0u8);
} image.extend_from_slice(data.next().unwrap());
}
assert_eq!(image.len(), capacity);
assert_eq!(image.capacity(), capacity);
// Convert the `image` to a single `StaticMemoryInitializer` if it's
// not empty.
let init = if image.len() > 0 {
let data_offset = offset;
let len = u32::try_from(image.len()).unwrap();
let data_offset_end = data_offset.checked_add(len).unwrap();
offset += len;
// Offset/length should always be page-aligned since our pages
// are always wasm pages right now which are 64k and we
// otherwise won't work at all on systems larger page sizes.
assert!(u64::from(data_offset) % page_size == 0);
assert!(u64::from(len) % page_size == 0);
self.data.push(image.into());
Some(StaticMemoryInitializer {
offset: pages[0].offset,
data: data_offset..data_offset_end,
})
} else {
None
}; };
let idx = image_map.push(init);
// Find the last nonzero byte, which must exist at this point since assert_eq!(idx, memory_index);
// we found one going forward. Add one to find the index of the
// last zero, which may also be the length of the image.
let nonzero_end = image.iter().rposition(|b| *b != 0).unwrap() as u64 + 1;
// The offset and length of this image are now page-aligned. This
// isn't strictly required for a runtime-compiled module which is
// never persisted to disk. The purpose of doing this, however is to
// enable the in-memory image, if persisted to disk, possible to
// mmap into an address space at a future date to enable efficient
// initialization of memory. Mapping a file into the address space
// requires a page-aligned offset in the file as well as a
// page-aligned length, so the offset/length are aligned here.
//
// Note that in the future if we can distinguish between modules
// only ever used at runtime and those persisted to disk and used
// later then it's possible to pass a page size parameter to this
// function as "1" which means we won't pad the data with extra
// zeros or anything like that.
let image_offset = page_align(nonzero_start);
let image_len = page_align_up(nonzero_end - image_offset);
assert_eq!(image_offset % page_size, 0);
assert_eq!(image_len % page_size, 0);
// Drop the leading zero bytes on the image, and then also set the
// length of the image to the specified length. Note that this may
// truncate trailing zeros from the image or it may extend the image
// to be page-aligned with zeros.
image.drain(..image_offset as usize);
image.resize(image_len as usize, 0);
self.data.push(image.into());
// Record how this image is initialized, where `image_offset` is the
// offset from the start of linear memory and the length is added to
// the `offset` variable outside of this loop which keeps track of
// the current offset in the data section. This is used to build the
// range `offset..end` which is the range, in the concatenation of
// all memory images of `self.data`, of what memory is located at
// `image_offset`.
let end = offset
.checked_add(u32::try_from(image_len).unwrap())
.unwrap();
let idx = map.push(Some(StaticMemoryInitializer {
offset: image_offset,
data: offset..end,
}));
assert_eq!(idx, memory);
offset = end;
assert_eq!(offset % (page_size as u32), 0);
} }
assert!(data.next().is_none());
self.module.memory_initialization = MemoryInitialization::Static { map }; self.data_align = Some(page_size);
self.module.memory_initialization = MemoryInitialization::Static { map: image_map };
} }
/// Attempts to convert the module's table initializers to /// Attempts to convert the module's table initializers to