Optimize some functions in the wiggle crate (#5566)

* wiggle: Inline some trivial functions

This commit marks a number of functions in wiggle as `#[inline]` as
they're otherwise trivial, mostly returning constants. This comes out of
some work I looked at recently with Andrew where some of these functions
showed up in profiles when they shouldn't.

* wiggle: Optimize the `GuestMemory` for shared memory

This commit implements a minor optimization to the `GuestMemory`
implementation for Wasmtime to skip most methods if a shared memory is
in play. Shared memories never get borrowed and this can be used to
internally skip some borrow-checker methods.

* wiggle: Optimize `GuestPtr::to_vec`

This commit replaces the safe implementation of `GuestPtr::to_vec` with
an unsafe implementation. The purpose of this is to speed up the
function when used with shared memory which otherwise performs a bunch
of atomic reads for types like `u8` which does validation-per-element
and isn't vectorizable. On a benchmark I was helping Andrew with this
sped up the host code enough to the point that guest code dwarfed the
execution time.

* Fix build
This commit is contained in:
Alex Crichton
2023-01-12 15:49:56 -06:00
committed by GitHub
parent d3e6b7bd2a
commit cbeec5ddb9
7 changed files with 68 additions and 7 deletions

View File

@@ -61,7 +61,9 @@ pub unsafe trait GuestTypeTransparent<'a>: GuestType<'a> {}
macro_rules! integer_primitives {
($([$ty:ident, $ty_atomic:ident],)*) => ($(
impl<'a> GuestType<'a> for $ty {
#[inline]
fn guest_size() -> u32 { mem::size_of::<Self>() as u32 }
#[inline]
fn guest_align() -> usize { mem::align_of::<Self>() }
#[inline]
@@ -122,7 +124,9 @@ macro_rules! integer_primitives {
macro_rules! float_primitives {
($([$ty:ident, $ty_unsigned:ident, $ty_atomic:ident],)*) => ($(
impl<'a> GuestType<'a> for $ty {
#[inline]
fn guest_size() -> u32 { mem::size_of::<Self>() as u32 }
#[inline]
fn guest_align() -> usize { mem::align_of::<Self>() }
#[inline]
@@ -183,10 +187,12 @@ float_primitives! {
// Support pointers-to-pointers where pointers are always 32-bits in wasm land
impl<'a, T> GuestType<'a> for GuestPtr<'a, T> {
#[inline]
fn guest_size() -> u32 {
u32::guest_size()
}
#[inline]
fn guest_align() -> usize {
u32::guest_align()
}
@@ -206,10 +212,12 @@ impl<'a, T> GuestType<'a> for GuestPtr<'a, [T]>
where
T: GuestType<'a>,
{
#[inline]
fn guest_size() -> u32 {
u32::guest_size() * 2
}
#[inline]
fn guest_align() -> usize {
u32::guest_align()
}

View File

@@ -610,10 +610,22 @@ impl<'a, T> GuestPtr<'a, [T]> {
T: GuestTypeTransparent<'a> + Copy + 'a,
{
let guest_slice = self.as_unsafe_slice_mut()?;
let mut vec = Vec::with_capacity(guest_slice.ptr.len());
for offs in 0..guest_slice.ptr.len() {
let elem = self.get(offs as u32).expect("already validated the size");
vec.push(elem.read()?);
let len = guest_slice.ptr.len();
let mut vec = Vec::with_capacity(len);
// SAFETY: The `guest_slice` variable is already a valid pointer into
// the guest's memory, and it may or may not be a pointer into shared
// memory. We can't naively use `.to_vec(..)` which could introduce data
// races but all that needs to happen is to copy data into our local
// `vec` as all the data is `Copy` and transparent anyway. For this
// purpose the `ptr::copy` function should be sufficient for copying
// over all the data.
//
// TODO: audit that this use of `std::ptr::copy` is safe with shared
// memory (https://github.com/bytecodealliance/wasmtime/issues/4203)
unsafe {
std::ptr::copy(guest_slice.ptr.as_ptr().cast::<T>(), vec.as_mut_ptr(), len);
vec.set_len(len);
}
Ok(vec)
}

View File

@@ -51,30 +51,50 @@ impl<'a> WasmtimeGuestMemory<'a> {
}
unsafe impl GuestMemory for WasmtimeGuestMemory<'_> {
#[inline]
fn base(&self) -> &[UnsafeCell<u8>] {
self.mem
}
// Note that this implementation has special cases for shared memory
// specifically because no regions of a shared memory can ever be borrowed.
// In the shared memory cases `shared_borrow` and `mut_borrow` are never
// called so that can be used to optimize the other methods by quickly
// checking a flag before calling the more expensive borrow-checker methods.
#[inline]
fn has_outstanding_borrows(&self) -> bool {
self.bc.has_outstanding_borrows()
!self.shared && self.bc.has_outstanding_borrows()
}
#[inline]
fn is_shared_borrowed(&self, r: Region) -> bool {
self.bc.is_shared_borrowed(r)
!self.shared && self.bc.is_shared_borrowed(r)
}
#[inline]
fn is_mut_borrowed(&self, r: Region) -> bool {
self.bc.is_mut_borrowed(r)
!self.shared && self.bc.is_mut_borrowed(r)
}
#[inline]
fn shared_borrow(&self, r: Region) -> Result<BorrowHandle, GuestError> {
debug_assert!(!self.shared);
self.bc.shared_borrow(r)
}
#[inline]
fn mut_borrow(&self, r: Region) -> Result<BorrowHandle, GuestError> {
debug_assert!(!self.shared);
self.bc.mut_borrow(r)
}
#[inline]
fn shared_unborrow(&self, h: BorrowHandle) {
debug_assert!(!self.shared);
self.bc.shared_unborrow(h)
}
#[inline]
fn mut_unborrow(&self, h: BorrowHandle) {
debug_assert!(!self.shared);
self.bc.mut_unborrow(h)
}
#[inline]
fn is_shared_memory(&self) -> bool {
self.shared
}