Remove the Paged memory initialization variant (#4046)

* Remove the `Paged` memory initialization variant

This commit simplifies the `MemoryInitialization` enum by removing the
`Paged` variant. The `Paged` variant was originally added for uffd, but
that support has now been removed in #4040. This is no longer necessary
but is still used as an intermediate step of becoming a `Static` variant
of initialized memory (which copy-on-write uses). As a result this
commit largely modifies the static initialization of memory steps and
folds the two methods together.

* Apply suggestions from code review

Co-authored-by: Peter Huene <peter@huene.dev>

Co-authored-by: Peter Huene <peter@huene.dev>
This commit is contained in:
Alex Crichton
2022-05-05 09:44:48 -05:00
committed by GitHub
parent 5c3642fcb1
commit 7fdc616368
3 changed files with 148 additions and 237 deletions

View File

@@ -144,43 +144,12 @@ pub enum MemoryInitialization {
/// This is the default memory initialization type.
Segmented(Vec<MemoryInitializer>),
/// Memory initialization is paged.
///
/// To be paged, the following requirements must be met:
///
/// * All data segments must reference defined memories.
/// * All data segments must not use a global base.
///
/// Paged initialization is performed by copying (or mapping) entire
/// WebAssembly pages to each linear memory.
///
/// The `uffd` feature makes use of this type of memory initialization
/// because it can instruct the kernel to back an entire WebAssembly page
/// from an existing set of in-memory pages.
///
/// By processing the data segments at module compilation time, the uffd
/// fault handler doesn't have to do any work to point the kernel at the
/// right linear memory page to use.
Paged {
/// The map of defined memory index to a list of initialization pages.
///
/// The list of page data is sparse, with each element starting with
/// the offset in memory where it will be placed (specified here, as
/// a page index, with a `u64`). Each page of initialization data is
/// WebAssembly page-sized (64 KiB). Pages whose offset are not
/// specified in this array start with 0s in memory. The `Range`
/// indices, like those in `MemoryInitializer`, point within a data
/// segment that will come as an auxiliary descriptor with other data
/// such as the compiled code for the wasm module.
map: PrimaryMap<MemoryIndex, Vec<StaticMemoryInitializer>>,
},
/// Memory initialization is statically known and involves a single `memcpy`
/// or otherwise simply making the defined data visible.
///
/// To be statically initialized the same requirements as `Paged` must be
/// met, namely that everything references a dfeined memory and all data
/// segments have a staitcally known in-bounds base (no globals).
/// To be statically initialized everything must reference a defined memory
/// and all data segments have a statically known in-bounds base (no
/// globals).
///
/// This form of memory initialization is a more optimized version of
/// `Segmented` where memory can be initialized with one of a few methods:
@@ -212,116 +181,12 @@ pub enum MemoryInitialization {
}
impl ModuleTranslation<'_> {
/// Attempts to convert segmented memory initialization into paged
/// Attempts to convert segmented memory initialization into static
/// initialization for the module that this translation represents.
///
/// If this module's memory initialization is not compatible with paged
/// initialization then this won't change anything. Otherwise if it is
/// compatible then the `memory_initialization` field will be updated.
pub fn try_paged_init(&mut self) {
// This method only attempts to transform a a `Segmented` memory init
// into a `Paged` one, no other state.
if !self.module.memory_initialization.is_segmented() {
return;
}
// Initially all memories start out as all zeros, represented with a
// lack of entries in the `BTreeMap` here. The map indexes byte offset
// (which is always wasm-page-aligned) to the contents of the page, with
// missing entries implicitly as all zeros.
let mut page_contents = PrimaryMap::with_capacity(self.module.memory_plans.len());
for _ in 0..self.module.memory_plans.len() {
page_contents.push(BTreeMap::new());
}
// Perform a "dry run" of memory initialization which will fail if we
// can't switch to paged initialization. When data is written it's
// transformed into the representation of `page_contents`.
let mut data = self.data.iter();
let ok = self.module.memory_initialization.init_memory(
InitMemory::CompileTime(&self.module),
&mut |memory, init| {
let data = data.next().unwrap();
assert_eq!(data.len(), init.data.len());
// If an initializer references an imported memory then
// everything will need to be processed in-order anyway to
// handle the dynamic limits of the memory specified.
if self.module.defined_memory_index(memory).is_none() {
return false;
};
let page_size = u64::from(WASM_PAGE_SIZE);
let contents = &mut page_contents[memory];
let mut page_index = init.offset / page_size;
let mut page_offset = (init.offset % page_size) as usize;
let mut data = &data[..];
while !data.is_empty() {
// If this page hasn't been seen before, then it starts out
// as all zeros.
let page = contents
.entry(page_index)
.or_insert_with(|| vec![0; page_size as usize]);
let page = &mut page[page_offset..];
let len = std::cmp::min(data.len(), page.len());
page[..len].copy_from_slice(&data[..len]);
page_index += 1;
page_offset = 0;
data = &data[len..];
}
true
},
);
// If anything failed above or hit an unknown case then bail out
// entirely since this module cannot use paged initialization.
if !ok {
return;
}
// If we've gotten this far then we're switching to paged
// initialization. The contents of the initial wasm memory are
// specified by `page_contents`, so the job now is to transform data
// representation of wasm memory back into the representation we use
// in a `Module`.
//
// This is done by clearing `self.data`, the original data segments,
// since those are now all represented in `page_contents`. Afterwards
// all the pages are subsequently pushed onto `self.data` and the
// offsets within `self.data` are recorded in each segment that's part
// of `Paged`.
self.data.clear();
let mut map = PrimaryMap::with_capacity(page_contents.len());
let mut offset = 0;
for (memory, pages) in page_contents {
let mut page_offsets = Vec::with_capacity(pages.len());
for (page_index, page) in pages {
// Skip entirely zero pages since they don't need to participate
// in initialization.
if page.iter().all(|b| *b == 0) {
continue;
}
let end = offset + (page.len() as u32);
page_offsets.push(StaticMemoryInitializer {
offset: page_index * u64::from(WASM_PAGE_SIZE),
data: offset..end,
});
offset = end;
self.data.push(page.into());
}
let index = map.push(page_offsets);
assert_eq!(index, memory);
}
self.module.memory_initialization = MemoryInitialization::Paged { map };
}
/// Similar to the `try_paged_init` method, but attempts to use the
/// `MemoryInitialization::Static` variant.
///
/// Note that the constraints for `Paged` are the same as those for
/// `Static`.
///
/// Takes a `page_size` argument in order to ensure that all
/// initialization is page-aligned for mmap-ability, and
@@ -347,44 +212,62 @@ impl ModuleTranslation<'_> {
/// for now, this is sufficient to allow a system that "knows what
/// it's doing" to always get static init.
pub fn try_static_init(&mut self, page_size: u64, max_image_size_always_allowed: u64) {
// First try to switch this memory initialization to the `Paged`
// variant, if it isn't already. This will perform static bounds checks
// and everything and massage it all into a format which is a bit easier
// to process here.
self.try_paged_init();
let map = match &mut self.module.memory_initialization {
MemoryInitialization::Paged { map } => map,
_ => return,
};
// This method only attempts to transform a `Segmented` memory init
// into a `Static` one, no other state.
if !self.module.memory_initialization.is_segmented() {
return;
}
let memory_init_size = |pages: &[StaticMemoryInitializer]| {
if pages.len() == 0 {
return 0;
}
let first = &pages[0];
let last = &pages[pages.len() - 1];
last.offset - first.offset + (last.data.len() as u64)
};
// First a dry run of memory initialization is performed. This
// collects information about the extent of memory initialized for each
// memory as well as the size of all data segments being copied in.
struct Memory {
data_size: u64,
min_addr: u64,
max_addr: u64,
// The `usize` here is a pointer into `self.data` which is the list
// of data segments corresponding to what was found in the original
// wasm module.
segments: Vec<(usize, StaticMemoryInitializer)>,
}
let mut info = PrimaryMap::with_capacity(self.module.memory_plans.len());
for _ in 0..self.module.memory_plans.len() {
info.push(Memory {
data_size: 0,
min_addr: u64::MAX,
max_addr: 0,
segments: Vec::new(),
});
}
let mut idx = 0;
let ok = self.module.memory_initialization.init_memory(
InitMemory::CompileTime(&self.module),
&mut |memory, init| {
// Currently `Static` only applies to locally-defined memories,
// so if a data segment references an imported memory then
// transitioning to a `Static` memory initializer is not
// possible.
if self.module.defined_memory_index(memory).is_none() {
return false;
};
let info = &mut info[memory];
let data_len = u64::from(init.data.end - init.data.start);
info.data_size += data_len;
info.min_addr = info.min_addr.min(init.offset);
info.max_addr = info.max_addr.max(init.offset + data_len);
info.segments.push((idx, init.clone()));
idx += 1;
true
},
);
if !ok {
return;
}
// Perform a check, on all memories, that the memory initialization is
// compatible with static memory initialization. The main concern here
// is that construction of the memory image shouldn't consume excessive
// resources here during compilation. At this point we're already using
// paged initialization so we're theoretically using O(data size)
// memory already, and we don't want to use excessively more than that
// during image construction. Some heuristics are applied here to see if
// they're compatible.
let mut data = self.data.as_slice();
for (_memory_index, pages) in map.iter() {
let (memory_data, rest) = data.split_at(pages.len());
data = rest;
// Calculate the total size of data used to initialized this memory
// (the sum of all the page sizes), and then also calculate the
// actual memory initialization size assuming it's initialized in
// one whole chunk in one whole go.
let data_size = memory_data.iter().map(|d| d.len()).sum::<usize>() as u64;
let memory_init_size = memory_init_size(pages);
// Validate that the memory information collected is indeed valid for
// static memory initialization.
for info in info.values().filter(|i| i.data_size > 0) {
let image_size = info.max_addr - info.min_addr;
// If the range of memory being initialized is less than twice the
// total size of the data itself then it's assumed that static
@@ -392,14 +275,14 @@ impl ModuleTranslation<'_> {
// consumption during the memory image creation process, which is
// currently assumed to "probably be ok" but this will likely need
// tweaks over time.
if memory_init_size < data_size.saturating_mul(2) {
if image_size < info.data_size.saturating_mul(2) {
continue;
}
// If the memory initialization image is larger than the size of all
// data, then we still allow memory initialization if the image will
// be of a relatively modest size, such as 1MB here.
if memory_init_size < max_image_size_always_allowed {
if image_size < max_image_size_always_allowed {
continue;
}
@@ -408,57 +291,101 @@ impl ModuleTranslation<'_> {
// happen at runtime.
return;
}
assert!(data.is_empty());
// Here's where we've now committed to changing to static memory. The
// memory initialization image is built here from the page data and then
// it's converted to a single initializer.
let data = mem::replace(&mut self.data, Vec::new());
let mut data = data.iter();
let mut image_map = PrimaryMap::with_capacity(map.len());
let mut offset = 0u32;
for (memory_index, pages) in map.iter() {
// Allocate the memory image and then fill it in with data. Note
// that `pages` should be sorted in increasing order of offsets.
let capacity = usize::try_from(memory_init_size(pages)).unwrap();
let mut image = Vec::with_capacity(capacity);
for page in pages {
let image_offset = usize::try_from(page.offset - pages[0].offset).unwrap();
assert!(image.len() <= image_offset);
image.resize(image_offset, 0u8);
image.extend_from_slice(data.next().unwrap());
let mut map = PrimaryMap::with_capacity(info.len());
let mut module_data_size = 0u32;
for (memory, info) in info.iter() {
// Create the in-memory `image` which is the initialized contents of
// this linear memory.
let extent = if info.segments.len() > 0 {
(info.max_addr - info.min_addr) as usize
} else {
0
};
let mut image = Vec::with_capacity(extent);
for (idx, init) in info.segments.iter() {
let data = &data[*idx];
assert_eq!(data.len(), init.data.len());
let offset = usize::try_from(init.offset - info.min_addr).unwrap();
if image.len() < offset {
image.resize(offset, 0u8);
image.extend_from_slice(data);
} else {
image.splice(
offset..(offset + data.len()).min(image.len()),
data.iter().copied(),
);
}
}
assert_eq!(image.len(), capacity);
assert_eq!(image.capacity(), capacity);
assert_eq!(image.len(), extent);
assert_eq!(image.capacity(), extent);
let mut offset = if info.segments.len() > 0 {
info.min_addr
} else {
0
};
// Convert the `image` to a single `StaticMemoryInitializer` if it's
// not empty.
let init = if image.len() > 0 {
let data_offset = offset;
let len = u32::try_from(image.len()).unwrap();
let data_offset_end = data_offset.checked_add(len).unwrap();
offset += len;
// Chop off trailing zeros from the image as memory is already
// zero-initialized. Note that `i` is the position of a nonzero
// entry here, so to not lose it we truncate to `i + 1`.
if let Some(i) = image.iter().rposition(|i| *i != 0) {
image.truncate(i + 1);
}
// Offset/length should always be page-aligned since our pages
// are always wasm pages right now which are 64k and we
// otherwise won't work at all on systems larger page sizes.
assert!(u64::from(data_offset) % page_size == 0);
assert!(u64::from(len) % page_size == 0);
self.data.push(image.into());
// Also chop off leading zeros, if any.
if let Some(i) = image.iter().position(|i| *i != 0) {
offset += i as u64;
image.drain(..i);
}
let mut len = u64::try_from(image.len()).unwrap();
// The goal is to enable mapping this image directly into memory, so
// the offset into linear memory must be a multiple of the page
// size. If that's not already the case then the image is padded at
// the front and back with extra zeros as necessary
if offset % page_size != 0 {
let zero_padding = offset % page_size;
self.data.push(vec![0; zero_padding as usize].into());
offset -= zero_padding;
len += zero_padding;
}
self.data.push(image.into());
if len % page_size != 0 {
let zero_padding = page_size - (len % page_size);
self.data.push(vec![0; zero_padding as usize].into());
len += zero_padding;
}
// Offset/length should now always be page-aligned.
assert!(offset % page_size == 0);
assert!(len % page_size == 0);
// Create the `StaticMemoryInitializer` which describes this image,
// only needed if the image is actually present and has a nonzero
// length. The `offset` has been calculates above, originally
// sourced from `info.min_addr`. The `data` field is the extent
// within the final data segment we'll emit to an ELF image, which
// is the concatenation of `self.data`, so here it's the size of
// the section-so-far plus the current segment we're appending.
let len = u32::try_from(len).unwrap();
let init = if len > 0 {
Some(StaticMemoryInitializer {
offset: pages[0].offset,
data: data_offset..data_offset_end,
offset,
data: module_data_size..module_data_size + len,
})
} else {
None
};
let idx = image_map.push(init);
assert_eq!(idx, memory_index);
let idx = map.push(init);
assert_eq!(idx, memory);
module_data_size += len;
}
assert!(data.next().is_none());
self.data_align = Some(page_size);
self.module.memory_initialization = MemoryInitialization::Static { map: image_map };
self.module.memory_initialization = MemoryInitialization::Static { map };
}
/// Attempts to convert the module's table initializers to
@@ -606,26 +533,11 @@ impl MemoryInitialization {
// initialization.
MemoryInitialization::Segmented(list) => list,
// If previously switched to paged initialization then pass through
// If previously switched to static initialization then pass through
// all those parameters here to the `write` callback.
//
// Note that existence of `Paged` already guarantees that all
// Note that existence of `Static` already guarantees that all
// indices are in-bounds.
MemoryInitialization::Paged { map } => {
for (index, pages) in map {
for init in pages {
debug_assert_eq!(init.data.end - init.data.start, WASM_PAGE_SIZE);
let result = write(index, init);
if !result {
return result;
}
}
}
return true;
}
// Like `Paged` above everything's already been validated so this
// can simply forward through the data.
MemoryInitialization::Static { map } => {
for (index, init) in map {
if let Some(init) = init {

View File

@@ -179,13 +179,12 @@ pub fn finish_compile(
SectionKind::ReadOnlyData,
);
let mut total_data_len = 0;
for data in data {
let offset = obj.append_section_data(data_id, &data, data_align.unwrap_or(1));
// All data segments are expected to be adjacent to one another, and
// with a higher alignment each data segment needs to be individually
// aligned to make this so, so assert that the offset this was placed at
// is always against the previous segment.
assert_eq!(offset as usize, total_data_len);
for (i, data) in data.iter().enumerate() {
// The first data segment has its alignment specified as the alignment
// for the entire section, but everything afterwards is adjacent so it
// has alignment of 1.
let align = if i == 0 { data_align.unwrap_or(1) } else { 1 };
obj.append_section_data(data_id, data, align);
total_data_len += data.len();
}
for data in passive_data.iter() {

View File

@@ -396,7 +396,7 @@ fn check_init_bounds(instance: &mut Instance, module: &Module) -> Result<(), Ins
check_memory_init_bounds(instance, initializers)?;
}
// Statically validated already to have everything in-bounds.
MemoryInitialization::Paged { .. } | MemoryInitialization::Static { .. } => {}
MemoryInitialization::Static { .. } => {}
}
Ok(())