Lazily allocate the bump-alloc chunk in the externref table (#3739)
This commit updates the allocation of a `VMExternRefActivationsTable` structure to perform zero malloc memory allocations. Previously it would allocate a page-size of `chunk` plus some space in hash sets for future insertions. The main trick here implemented is that after the first gc during the slow path the fast chunk allocation is allocated and configured. The motivation for this PR is that given our recent work to further refine and optimize the instantiation process this allocation started to show up in a nontrivial fashion. Most modules today never touch this table anyway as almost none of them use reference types, so the time spent allocation and deallocating the table per-store was largely wasted time. Concretely on a microbenchmark this PR speeds up instantiation of a module with one function by 30%, decreasing the instantiation cost from 1.8us to 1.2us. Overall a pretty minor win but when the instantiation times we're measuring start being in the single-digit microseconds this win ends up getting magnified!
This commit is contained in:
@@ -566,8 +566,13 @@ impl VMExternRefActivationsTable {
|
|||||||
|
|
||||||
/// Create a new `VMExternRefActivationsTable`.
|
/// Create a new `VMExternRefActivationsTable`.
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
let mut chunk = Self::new_chunk(Self::CHUNK_SIZE);
|
// Start with an empty chunk in case this activations table isn't used.
|
||||||
let next = chunk.as_mut_ptr().cast::<TableElem>();
|
// This means that there's no space in the bump-allocation area which
|
||||||
|
// will force any path trying to use this to the slow gc path. The first
|
||||||
|
// time this happens, though, the slow gc path will allocate a new chunk
|
||||||
|
// for actual fast-bumping.
|
||||||
|
let mut chunk: Box<[TableElem]> = Box::new([]);
|
||||||
|
let next = chunk.as_mut_ptr();
|
||||||
let end = unsafe { next.add(chunk.len()) };
|
let end = unsafe { next.add(chunk.len()) };
|
||||||
|
|
||||||
VMExternRefActivationsTable {
|
VMExternRefActivationsTable {
|
||||||
@@ -576,8 +581,8 @@ impl VMExternRefActivationsTable {
|
|||||||
end: NonNull::new(end).unwrap(),
|
end: NonNull::new(end).unwrap(),
|
||||||
chunk,
|
chunk,
|
||||||
},
|
},
|
||||||
over_approximated_stack_roots: HashSet::with_capacity(Self::CHUNK_SIZE),
|
over_approximated_stack_roots: HashSet::new(),
|
||||||
precise_stack_roots: HashSet::with_capacity(Self::CHUNK_SIZE),
|
precise_stack_roots: HashSet::new(),
|
||||||
stack_canary: None,
|
stack_canary: None,
|
||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
gc_okay: true,
|
gc_okay: true,
|
||||||
@@ -728,9 +733,18 @@ impl VMExternRefActivationsTable {
|
|||||||
"after sweeping the bump chunk, all slots should be `None`"
|
"after sweeping the bump chunk, all slots should be `None`"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// If this is the first instance of gc then the initial chunk is empty,
|
||||||
|
// so we lazily allocate space for fast bump-allocation in the future.
|
||||||
|
if self.alloc.chunk.is_empty() {
|
||||||
|
self.alloc.chunk = Self::new_chunk(Self::CHUNK_SIZE);
|
||||||
|
self.alloc.end =
|
||||||
|
NonNull::new(unsafe { self.alloc.chunk.as_mut_ptr().add(self.alloc.chunk.len()) })
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
// Reset our `next` finger to the start of the bump allocation chunk.
|
// Reset our `next` finger to the start of the bump allocation chunk.
|
||||||
unsafe {
|
unsafe {
|
||||||
let next = self.alloc.chunk.as_mut_ptr().cast::<TableElem>();
|
let next = self.alloc.chunk.as_mut_ptr();
|
||||||
debug_assert!(!next.is_null());
|
debug_assert!(!next.is_null());
|
||||||
*self.alloc.next.get() = NonNull::new_unchecked(next);
|
*self.alloc.next.get() = NonNull::new_unchecked(next);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user