Flush Icache on AArch64 Windows (#4997)
* cranelift: Add FlushInstructionCache for AArch64 on Windows This was previously done on #3426 for linux. * wasmtime: Add FlushInstructionCache for AArch64 on Windows This was previously done on #3426 for linux. * cranelift: Add MemoryUse flag to JIT Memory Manager This allows us to keep the icache flushing code self-contained and not leak implementation details. This also changes the windows icache flushing code to only flush pages that were previously unflushed. * Add jit-icache-coherence crate * cranelift: Use `jit-icache-coherence` * wasmtime: Use `jit-icache-coherence` * jit-icache-coherence: Make rustix feature additive Mutually exclusive features cause issues. * wasmtime: Remove rustix from wasmtime-jit We now use it via jit-icache-coherence * Rename wasmtime-jit-icache-coherency crate * Use cfg-if in wasmtime-jit-icache-coherency crate * Use inline instead of inline(always) * Add unsafe marker to clear_cache * Conditionally compile all rustix operations membarrier does not exist on MacOS * Publish `wasmtime-jit-icache-coherence` * Remove explicit windows check This is implied by the target_os = "windows" above * cranelift: Remove len != 0 check This is redundant as it is done in non_protected_allocations_iter * Comment cleanups Thanks @akirilov-arm! * Make clear_cache safe * Rename pipeline_flush to pipeline_flush_mt * Revert "Make clear_cache safe" This reverts commit 21165d81c9030ed9b291a1021a367214d2942c90. * More docs! * Fix pipeline_flush reference on clear_cache * Update more docs! * Move pipeline flush after `mprotect` calls Technically the `clear_cache` operation is a lie in AArch64, so move the pipeline flush after the `mprotect` calls so that it benefits from the implicit cache cleaning done by it. * wasmtime: Remove rustix backend from icache crate * wasmtime: Use libc for macos * wasmtime: Flush icache on all arch's for windows * wasmtime: Add flags to membarrier call
This commit is contained in:
12
Cargo.lock
generated
12
Cargo.lock
generated
@@ -667,6 +667,7 @@ dependencies = [
|
||||
"memmap2",
|
||||
"region",
|
||||
"target-lexicon",
|
||||
"wasmtime-jit-icache-coherence",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
@@ -3665,12 +3666,12 @@ dependencies = [
|
||||
"log",
|
||||
"object",
|
||||
"rustc-demangle",
|
||||
"rustix",
|
||||
"serde",
|
||||
"target-lexicon",
|
||||
"thiserror",
|
||||
"wasmtime-environ",
|
||||
"wasmtime-jit-debug",
|
||||
"wasmtime-jit-icache-coherence",
|
||||
"wasmtime-runtime",
|
||||
"windows-sys",
|
||||
]
|
||||
@@ -3684,6 +3685,15 @@ dependencies = [
|
||||
"rustix",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasmtime-jit-icache-coherence"
|
||||
version = "2.0.0"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasmtime-runtime"
|
||||
version = "3.0.0"
|
||||
|
||||
@@ -84,6 +84,7 @@ members = [
|
||||
"crates/c-api",
|
||||
"crates/cli-flags",
|
||||
"crates/environ/fuzz",
|
||||
"crates/jit-icache-coherence",
|
||||
"examples/fib-debug/wasm",
|
||||
"examples/wasi/wasm",
|
||||
"examples/tokio/wasm",
|
||||
@@ -126,6 +127,7 @@ wasi-common = { path = "crates/wasi-common", version = "=3.0.0" }
|
||||
wasi-tokio = { path = "crates/wasi-common/tokio", version = "=3.0.0" }
|
||||
wasi-cap-std-sync = { path = "crates/wasi-common/cap-std-sync", version = "=3.0.0" }
|
||||
wasmtime-fuzzing = { path = "crates/fuzzing" }
|
||||
wasmtime-jit-icache-coherence = { path = "crates/jit-icache-coherence", version = "=2.0.0" }
|
||||
|
||||
cranelift-wasm = { path = "cranelift/wasm", version = "0.90.0" }
|
||||
cranelift-codegen = { path = "cranelift/codegen", version = "0.90.0" }
|
||||
|
||||
@@ -20,6 +20,7 @@ libc = { version = "0.2.42" }
|
||||
target-lexicon = { workspace = true }
|
||||
memmap2 = { version = "0.2.1", optional = true }
|
||||
log = { workspace = true }
|
||||
wasmtime-jit-icache-coherence = { workspace = true }
|
||||
|
||||
[target.'cfg(windows)'.dependencies.windows-sys]
|
||||
workspace = true
|
||||
|
||||
@@ -458,14 +458,6 @@ impl JITModule {
|
||||
self.memory.readonly.set_readonly();
|
||||
self.memory.code.set_readable_and_executable();
|
||||
|
||||
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
|
||||
{
|
||||
let cmd: libc::c_int = 32; // MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
|
||||
|
||||
// Ensure that no processor has fetched a stale instruction stream.
|
||||
unsafe { libc::syscall(libc::SYS_membarrier, cmd) };
|
||||
}
|
||||
|
||||
for update in self.pending_got_updates.drain(..) {
|
||||
unsafe { update.entry.as_ref() }.store(update.ptr as *mut _, Ordering::SeqCst);
|
||||
}
|
||||
@@ -530,15 +522,6 @@ impl JITModule {
|
||||
module.libcall_plt_entries.insert(libcall, plt_entry);
|
||||
}
|
||||
|
||||
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
|
||||
{
|
||||
let cmd: libc::c_int = 64; // MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE
|
||||
|
||||
// This is a requirement of the membarrier() call executed by
|
||||
// the finalize_definitions() method.
|
||||
unsafe { libc::syscall(libc::SYS_membarrier, cmd) };
|
||||
}
|
||||
|
||||
module
|
||||
}
|
||||
|
||||
|
||||
@@ -4,9 +4,11 @@ use memmap2::MmapMut;
|
||||
#[cfg(not(any(feature = "selinux-fix", windows)))]
|
||||
use std::alloc;
|
||||
use std::convert::TryFrom;
|
||||
use std::ffi::c_void;
|
||||
use std::io;
|
||||
use std::mem;
|
||||
use std::ptr;
|
||||
use wasmtime_jit_icache_coherence as icache_coherence;
|
||||
|
||||
/// A simple struct consisting of a pointer and length.
|
||||
struct PtrLen {
|
||||
@@ -161,6 +163,7 @@ impl Memory {
|
||||
// TODO: Allocate more at a time.
|
||||
self.current = PtrLen::with_size(size)?;
|
||||
self.position = size;
|
||||
|
||||
Ok(self.current.ptr)
|
||||
}
|
||||
|
||||
@@ -168,45 +171,45 @@ impl Memory {
|
||||
pub(crate) fn set_readable_and_executable(&mut self) {
|
||||
self.finish_current();
|
||||
|
||||
// Clear all the newly allocated code from cache if the processor requires it
|
||||
//
|
||||
// Do this before marking the memory as R+X, technically we should be able to do it after
|
||||
// but there are some CPU's that have had errata about doing this with read only memory.
|
||||
for &PtrLen { ptr, len, .. } in self.non_protected_allocations_iter() {
|
||||
unsafe {
|
||||
icache_coherence::clear_cache(ptr as *const c_void, len)
|
||||
.expect("Failed cache clear")
|
||||
};
|
||||
}
|
||||
|
||||
let set_region_readable_and_executable = |ptr, len| {
|
||||
if len != 0 {
|
||||
if self.branch_protection == BranchProtection::BTI {
|
||||
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
|
||||
if std::arch::is_aarch64_feature_detected!("bti") {
|
||||
let prot = libc::PROT_EXEC | libc::PROT_READ | /* PROT_BTI */ 0x10;
|
||||
if self.branch_protection == BranchProtection::BTI {
|
||||
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
|
||||
if std::arch::is_aarch64_feature_detected!("bti") {
|
||||
let prot = libc::PROT_EXEC | libc::PROT_READ | /* PROT_BTI */ 0x10;
|
||||
|
||||
unsafe {
|
||||
if libc::mprotect(ptr as *mut libc::c_void, len, prot) < 0 {
|
||||
panic!("unable to make memory readable+executable");
|
||||
}
|
||||
unsafe {
|
||||
if libc::mprotect(ptr as *mut libc::c_void, len, prot) < 0 {
|
||||
panic!("unable to make memory readable+executable");
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
unsafe {
|
||||
region::protect(ptr, len, region::Protection::READ_EXECUTE)
|
||||
.expect("unable to make memory readable+executable");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
unsafe {
|
||||
region::protect(ptr, len, region::Protection::READ_EXECUTE)
|
||||
.expect("unable to make memory readable+executable");
|
||||
}
|
||||
};
|
||||
|
||||
#[cfg(feature = "selinux-fix")]
|
||||
{
|
||||
for &PtrLen { ref map, ptr, len } in &self.allocations[self.already_protected..] {
|
||||
if map.is_some() {
|
||||
set_region_readable_and_executable(ptr, len);
|
||||
}
|
||||
}
|
||||
for &PtrLen { ptr, len, .. } in self.non_protected_allocations_iter() {
|
||||
set_region_readable_and_executable(ptr, len);
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "selinux-fix"))]
|
||||
{
|
||||
for &PtrLen { ptr, len } in &self.allocations[self.already_protected..] {
|
||||
set_region_readable_and_executable(ptr, len);
|
||||
}
|
||||
}
|
||||
// Flush any in-flight instructions from the pipeline
|
||||
icache_coherence::pipeline_flush_mt().expect("Failed pipeline flush");
|
||||
|
||||
self.already_protected = self.allocations.len();
|
||||
}
|
||||
@@ -215,33 +218,27 @@ impl Memory {
|
||||
pub(crate) fn set_readonly(&mut self) {
|
||||
self.finish_current();
|
||||
|
||||
#[cfg(feature = "selinux-fix")]
|
||||
{
|
||||
for &PtrLen { ref map, ptr, len } in &self.allocations[self.already_protected..] {
|
||||
if len != 0 && map.is_some() {
|
||||
unsafe {
|
||||
region::protect(ptr, len, region::Protection::READ)
|
||||
.expect("unable to make memory readonly");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "selinux-fix"))]
|
||||
{
|
||||
for &PtrLen { ptr, len } in &self.allocations[self.already_protected..] {
|
||||
if len != 0 {
|
||||
unsafe {
|
||||
region::protect(ptr, len, region::Protection::READ)
|
||||
.expect("unable to make memory readonly");
|
||||
}
|
||||
}
|
||||
for &PtrLen { ptr, len, .. } in self.non_protected_allocations_iter() {
|
||||
unsafe {
|
||||
region::protect(ptr, len, region::Protection::READ)
|
||||
.expect("unable to make memory readonly");
|
||||
}
|
||||
}
|
||||
|
||||
self.already_protected = self.allocations.len();
|
||||
}
|
||||
|
||||
/// Iterates non protected memory allocations that are of not zero bytes in size.
|
||||
fn non_protected_allocations_iter(&self) -> impl Iterator<Item = &PtrLen> {
|
||||
let iter = self.allocations[self.already_protected..].iter();
|
||||
|
||||
#[cfg(feature = "selinux-fix")]
|
||||
return iter.filter(|&PtrLen { ref map, len, .. }| len != 0 && map.is_some());
|
||||
|
||||
#[cfg(not(feature = "selinux-fix"))]
|
||||
return iter.filter(|&PtrLen { len, .. }| *len != 0);
|
||||
}
|
||||
|
||||
/// Frees all allocated memory regions that would be leaked otherwise.
|
||||
/// Likely to invalidate existing function pointers, causing unsafety.
|
||||
pub(crate) unsafe fn free_memory(&mut self) {
|
||||
|
||||
23
crates/jit-icache-coherence/Cargo.toml
Normal file
23
crates/jit-icache-coherence/Cargo.toml
Normal file
@@ -0,0 +1,23 @@
|
||||
[package]
|
||||
name = "wasmtime-jit-icache-coherence"
|
||||
version = "2.0.0"
|
||||
authors.workspace = true
|
||||
description = "Utilities for JIT icache maintenance"
|
||||
documentation = "https://docs.rs/jit-icache-coherence"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
repository = "https://github.com/bytecodealliance/wasmtime"
|
||||
edition.workspace = true
|
||||
|
||||
[dependencies]
|
||||
cfg-if = "1.0"
|
||||
|
||||
[target.'cfg(target_os = "windows")'.dependencies.windows-sys]
|
||||
workspace = true
|
||||
features = [
|
||||
"Win32_Foundation",
|
||||
"Win32_System_Threading",
|
||||
"Win32_System_Diagnostics_Debug",
|
||||
]
|
||||
|
||||
[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies.libc]
|
||||
version = "0.2.42"
|
||||
105
crates/jit-icache-coherence/src/lib.rs
Normal file
105
crates/jit-icache-coherence/src/lib.rs
Normal file
@@ -0,0 +1,105 @@
|
||||
//! This crate provides utilities for instruction cache maintenance for JIT authors.
|
||||
//!
|
||||
//! In self modifying codes such as when writing a JIT, special care must be taken when marking the
|
||||
//! code as ready for execution. On fully coherent architectures (X86, S390X) the data cache (D-Cache)
|
||||
//! and the instruction cache (I-Cache) are always in sync. However this is not guaranteed for all
|
||||
//! architectures such as AArch64 where these caches are not coherent with each other.
|
||||
//!
|
||||
//! When writing new code there may be a I-cache entry for that same address which causes the
|
||||
//! processor to execute whatever was in the cache instead of the new code.
|
||||
//!
|
||||
//! See the [ARM Community - Caches and Self-Modifying Code] blog post that contains a great
|
||||
//! explanation of the above. (It references AArch32 but it has a high level overview of this problem).
|
||||
//!
|
||||
//! ## Usage
|
||||
//!
|
||||
//! You should call [clear_cache] on any pages that you write with the new code that you're intending
|
||||
//! to execute. You can do this at any point in the code from the moment that you write the page up to
|
||||
//! the moment where the code is executed.
|
||||
//!
|
||||
//! You also need to call [pipeline_flush_mt] to ensure that there isn't any invalid instruction currently
|
||||
//! in the pipeline if you are running in a multi threaded environment.
|
||||
//!
|
||||
//! For single threaded programs you are free to omit [pipeline_flush_mt], otherwise you need to
|
||||
//! call both [clear_cache] and [pipeline_flush_mt] in that order.
|
||||
//!
|
||||
//! ### Example:
|
||||
//! ```
|
||||
//! # use std::ffi::c_void;
|
||||
//! # use std::io;
|
||||
//! # use wasmtime_jit_icache_coherence::*;
|
||||
//! #
|
||||
//! # struct Page {
|
||||
//! # addr: *const c_void,
|
||||
//! # len: usize,
|
||||
//! # }
|
||||
//! #
|
||||
//! # fn main() -> io::Result<()> {
|
||||
//! #
|
||||
//! # let run_code = || {};
|
||||
//! # let code = vec![0u8; 64];
|
||||
//! # let newly_written_pages = vec![Page {
|
||||
//! # addr: &code[0] as *const u8 as *const c_void,
|
||||
//! # len: code.len(),
|
||||
//! # }];
|
||||
//! # unsafe {
|
||||
//! // Invalidate the cache for all the newly written pages where we wrote our new code.
|
||||
//! for page in newly_written_pages {
|
||||
//! clear_cache(page.addr, page.len)?;
|
||||
//! }
|
||||
//!
|
||||
//! // Once those are invalidated we also need to flush the pipeline
|
||||
//! pipeline_flush_mt()?;
|
||||
//!
|
||||
//! // We can now safely execute our new code.
|
||||
//! run_code();
|
||||
//! # }
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! <div class="example-wrap" style="display:inline-block"><pre class="compile_fail" style="white-space:normal;font:inherit;">
|
||||
//!
|
||||
//! **Warning**: In order to correctly use this interface you should always call [clear_cache].
|
||||
//! A followup call to [pipeline_flush_mt] is required if you are running in a multi-threaded environment.
|
||||
//!
|
||||
//! </pre></div>
|
||||
//!
|
||||
//! [ARM Community - Caches and Self-Modifying Code]: https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/caches-and-self-modifying-code
|
||||
|
||||
use std::ffi::c_void;
|
||||
use std::io::Result;
|
||||
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(target_os = "windows")] {
|
||||
mod win;
|
||||
use win as imp;
|
||||
} else {
|
||||
mod libc;
|
||||
use crate::libc as imp;
|
||||
}
|
||||
}
|
||||
|
||||
/// Flushes instructions in the processor pipeline
|
||||
///
|
||||
/// This pipeline flush is broadcast to all processors that are executing threads in the current process.
|
||||
///
|
||||
/// Calling [pipeline_flush_mt] is only required for multi-threaded programs and it *must* be called
|
||||
/// after all calls to [clear_cache].
|
||||
///
|
||||
/// If the architecture does not require a pipeline flush, this function does nothing.
|
||||
pub fn pipeline_flush_mt() -> Result<()> {
|
||||
imp::pipeline_flush_mt()
|
||||
}
|
||||
|
||||
/// Flushes the instruction cache for a region of memory.
|
||||
///
|
||||
/// If the architecture does not require an instruction cache flush, this function does nothing.
|
||||
///
|
||||
/// # Unsafe
|
||||
///
|
||||
/// It is necessary to call [pipeline_flush_mt] after this function if you are running in a multi-threaded
|
||||
/// environment.
|
||||
pub unsafe fn clear_cache(ptr: *const c_void, len: usize) -> Result<()> {
|
||||
imp::clear_cache(ptr, len)
|
||||
}
|
||||
88
crates/jit-icache-coherence/src/libc.rs
Normal file
88
crates/jit-icache-coherence/src/libc.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
#![allow(unused)]
|
||||
|
||||
use libc::{syscall, EINVAL, EPERM};
|
||||
use std::ffi::c_void;
|
||||
use std::io::{Error, Result};
|
||||
|
||||
const MEMBARRIER_CMD_GLOBAL: libc::c_int = 1;
|
||||
const MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE: libc::c_int = 32;
|
||||
const MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE: libc::c_int = 64;
|
||||
|
||||
/// See docs on [crate::pipeline_flush_mt] for a description of what this function is trying to do.
|
||||
#[inline]
|
||||
pub(crate) fn pipeline_flush_mt() -> Result<()> {
|
||||
// Ensure that no processor has fetched a stale instruction stream.
|
||||
//
|
||||
// On AArch64 we try to do this by executing a "broadcast" `ISB` which is not something that the
|
||||
// architecture provides us but we can emulate it using the membarrier kernel interface.
|
||||
//
|
||||
// This behaviour was documented in a patch, however it seems that it hasn't been upstreamed yet
|
||||
// Nevertheless it clearly explains the guarantees that the Linux kernel provides us regarding the
|
||||
// membarrier interface, and how to use it for JIT contexts.
|
||||
// https://lkml.kernel.org/lkml/07a8b963002cb955b7516e61bad19514a3acaa82.1623813516.git.luto@kernel.org/
|
||||
//
|
||||
// I couldn't find the follow up for that patch but there doesn't seem to be disagreement about
|
||||
// that specific part in the replies.
|
||||
// TODO: Check if the kernel has updated the membarrier documentation
|
||||
//
|
||||
// See the following issues for more info:
|
||||
// * https://github.com/bytecodealliance/wasmtime/pull/3426
|
||||
// * https://github.com/bytecodealliance/wasmtime/pull/4997
|
||||
//
|
||||
// TODO: x86 and s390x have coherent caches so they don't need this, but RISCV does not
|
||||
// guarantee that, so we may need to do something similar for it. However as noted in the above
|
||||
// kernel patch the SYNC_CORE membarrier has different guarantees on each architecture
|
||||
// so we need follow up and check what it provides us.
|
||||
// See: https://github.com/bytecodealliance/wasmtime/issues/5033
|
||||
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
|
||||
match membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
|
||||
Ok(_) => {}
|
||||
|
||||
// EPERM happens if the calling process hasn't yet called the register membarrier.
|
||||
// We can call the register membarrier now, and then retry the actual membarrier,
|
||||
//
|
||||
// This does have some overhead since on the first time we call this function we
|
||||
// actually execute three membarriers, but this only happens once per process and only
|
||||
// one slow membarrier is actually executed (The last one, which actually generates an IPI).
|
||||
Err(e) if e.raw_os_error().unwrap() == EPERM => {
|
||||
membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)?;
|
||||
membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE)?;
|
||||
}
|
||||
|
||||
// On kernels older than 4.16 the above syscall does not exist, so we can
|
||||
// fallback to MEMBARRIER_CMD_GLOBAL which is an alias for MEMBARRIER_CMD_SHARED
|
||||
// that has existed since 4.3. GLOBAL is a lot slower, but allows us to have
|
||||
// compatibility with older kernels.
|
||||
Err(e) if e.raw_os_error().unwrap() == EINVAL => {
|
||||
membarrier(MEMBARRIER_CMD_GLOBAL)?;
|
||||
}
|
||||
|
||||
// In any other case we got an actual error, so lets propagate that up
|
||||
e => e?,
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn membarrier(barrier: libc::c_int) -> Result<()> {
|
||||
let flags: libc::c_int = 0;
|
||||
let res = unsafe { syscall(libc::SYS_membarrier, barrier, flags) };
|
||||
if res == 0 {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::last_os_error())
|
||||
}
|
||||
}
|
||||
|
||||
/// See docs on [crate::clear_cache] for a description of what this function is trying to do.
|
||||
#[inline]
|
||||
pub(crate) fn clear_cache(_ptr: *const c_void, _len: usize) -> Result<()> {
|
||||
// TODO: On AArch64 we currently rely on the `mprotect` call that switches the memory from W+R to R+X
|
||||
// to do this for us, however that is an implementation detail and should not be relied upon
|
||||
// We should call some implementation of `clear_cache` here
|
||||
//
|
||||
// See: https://github.com/bytecodealliance/wasmtime/issues/3310
|
||||
|
||||
Ok(())
|
||||
}
|
||||
45
crates/jit-icache-coherence/src/win.rs
Normal file
45
crates/jit-icache-coherence/src/win.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
use std::ffi::c_void;
|
||||
use std::io::{Error, Result};
|
||||
use windows_sys::Win32::System::Diagnostics::Debug::FlushInstructionCache;
|
||||
use windows_sys::Win32::System::Threading::FlushProcessWriteBuffers;
|
||||
use windows_sys::Win32::System::Threading::GetCurrentProcess;
|
||||
|
||||
/// See docs on [crate::pipeline_flush_mt] for a description of what this function is trying to do.
|
||||
#[inline]
|
||||
pub(crate) fn pipeline_flush_mt() -> Result<()> {
|
||||
// If we are here, it means that the user has already called [cache_clear] for all buffers that
|
||||
// are going to be holding code. We don't really care about flushing the write buffers, but
|
||||
// the other guarantee that microsoft provides on this API. As documented:
|
||||
//
|
||||
// "The function generates an interprocessor interrupt (IPI) to all processors that are part of
|
||||
// the current process affinity. It guarantees the visibility of write operations performed on
|
||||
// one processor to the other processors."
|
||||
//
|
||||
// This all-core IPI acts as a core serializing operation, equivalent to a "broadcast" `ISB`
|
||||
// instruction that the architecture does not provide and which is what we really want.
|
||||
//
|
||||
// See: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-flushprocesswritebuffers
|
||||
if cfg!(target_arch = "aarch64") {
|
||||
unsafe {
|
||||
FlushProcessWriteBuffers();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// See docs on [crate::clear_cache] for a description of what this function is trying to do.
|
||||
#[inline]
|
||||
pub(crate) fn clear_cache(ptr: *const c_void, len: usize) -> Result<()> {
|
||||
// See:
|
||||
// * https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-flushinstructioncache
|
||||
// * https://devblogs.microsoft.com/oldnewthing/20190902-00/?p=102828
|
||||
unsafe {
|
||||
let res = FlushInstructionCache(GetCurrentProcess(), ptr, len);
|
||||
if res == 0 {
|
||||
return Err(Error::last_os_error());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -26,6 +26,7 @@ bincode = "1.2.1"
|
||||
rustc-demangle = "0.1.16"
|
||||
cpp_demangle = "0.3.2"
|
||||
log = { workspace = true }
|
||||
wasmtime-jit-icache-coherence = { workspace = true }
|
||||
|
||||
[target.'cfg(target_os = "windows")'.dependencies.windows-sys]
|
||||
workspace = true
|
||||
@@ -33,9 +34,6 @@ features = [
|
||||
"Win32_System_Diagnostics_Debug",
|
||||
]
|
||||
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
rustix = { workspace = true, features = ["process"] }
|
||||
|
||||
[target.'cfg(target_arch = "x86_64")'.dependencies]
|
||||
ittapi = { version = "0.3.0", optional = true }
|
||||
|
||||
|
||||
@@ -3,7 +3,9 @@
|
||||
use crate::unwind::UnwindRegistration;
|
||||
use anyhow::{bail, Context, Result};
|
||||
use object::read::{File, Object, ObjectSection};
|
||||
use std::ffi::c_void;
|
||||
use std::mem::ManuallyDrop;
|
||||
use wasmtime_jit_icache_coherence as icache_coherence;
|
||||
use wasmtime_runtime::MmapVec;
|
||||
|
||||
/// Management of executable memory within a `MmapVec`
|
||||
@@ -54,15 +56,6 @@ impl CodeMemory {
|
||||
/// The returned `CodeMemory` manages the internal `MmapVec` and the
|
||||
/// `publish` method is used to actually make the memory executable.
|
||||
pub fn new(mmap: MmapVec) -> Self {
|
||||
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
|
||||
{
|
||||
// This is a requirement of the `membarrier` call executed by the `publish` method.
|
||||
rustix::process::membarrier(
|
||||
rustix::process::MembarrierCommand::RegisterPrivateExpeditedSyncCore,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
Self {
|
||||
mmap: ManuallyDrop::new(mmap),
|
||||
unwind_registration: ManuallyDrop::new(None),
|
||||
@@ -155,6 +148,13 @@ impl CodeMemory {
|
||||
// must be added here, though, if relocations pop up.
|
||||
assert!(text.relocations().count() == 0);
|
||||
|
||||
// Clear the newly allocated code from cache if the processor requires it
|
||||
//
|
||||
// Do this before marking the memory as R+X, technically we should be able to do it after
|
||||
// but there are some CPU's that have had errata about doing this with read only memory.
|
||||
icache_coherence::clear_cache(ret.text.as_ptr() as *const c_void, ret.text.len())
|
||||
.expect("Failed cache clear");
|
||||
|
||||
// Switch the executable portion from read/write to
|
||||
// read/execute, notably not using read/write/execute to prevent
|
||||
// modifications.
|
||||
@@ -162,14 +162,8 @@ impl CodeMemory {
|
||||
.make_executable(text_range.clone(), enable_branch_protection)
|
||||
.expect("unable to make memory executable");
|
||||
|
||||
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
|
||||
{
|
||||
// Ensure that no processor has fetched a stale instruction stream.
|
||||
rustix::process::membarrier(
|
||||
rustix::process::MembarrierCommand::PrivateExpeditedSyncCore,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
// Flush any in-flight instructions from the pipeline
|
||||
icache_coherence::pipeline_flush_mt().expect("Failed pipeline flush");
|
||||
|
||||
// With all our memory set up use the platform-specific
|
||||
// `UnwindRegistration` implementation to inform the general
|
||||
|
||||
@@ -36,6 +36,7 @@ const CRATES_TO_PUBLISH: &[&str] = &[
|
||||
"cranelift-object",
|
||||
"cranelift-interpreter",
|
||||
"cranelift",
|
||||
"wasmtime-jit-icache-coherence",
|
||||
"cranelift-jit",
|
||||
// wiggle
|
||||
"wiggle-generate",
|
||||
|
||||
Reference in New Issue
Block a user