Flush icache on android aarch64 too (#5331)
This commit is contained in:
@@ -19,5 +19,5 @@ features = [
|
|||||||
"Win32_System_Diagnostics_Debug",
|
"Win32_System_Diagnostics_Debug",
|
||||||
]
|
]
|
||||||
|
|
||||||
[target.'cfg(any(target_os = "linux", target_os = "macos", target_os = "freebsd"))'.dependencies.libc]
|
[target.'cfg(any(target_os = "linux", target_os = "macos", target_os = "freebsd", target_os = "android"))'.dependencies.libc]
|
||||||
version = "0.2.42"
|
version = "0.2.42"
|
||||||
|
|||||||
@@ -1,86 +1,106 @@
|
|||||||
#![allow(unused)]
|
|
||||||
|
|
||||||
use libc::{syscall, EINVAL, EPERM};
|
|
||||||
use std::ffi::c_void;
|
use std::ffi::c_void;
|
||||||
use std::io::{Error, Result};
|
use std::io::Result;
|
||||||
|
|
||||||
const MEMBARRIER_CMD_GLOBAL: libc::c_int = 1;
|
#[cfg(all(
|
||||||
const MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE: libc::c_int = 32;
|
target_arch = "aarch64",
|
||||||
const MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE: libc::c_int = 64;
|
any(target_os = "linux", target_os = "android")
|
||||||
|
))]
|
||||||
|
mod details {
|
||||||
|
use super::*;
|
||||||
|
use libc::{syscall, EINVAL, EPERM};
|
||||||
|
use std::io::Error;
|
||||||
|
|
||||||
/// See docs on [crate::pipeline_flush_mt] for a description of what this function is trying to do.
|
const MEMBARRIER_CMD_GLOBAL: libc::c_int = 1;
|
||||||
#[inline]
|
const MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE: libc::c_int = 32;
|
||||||
pub(crate) fn pipeline_flush_mt() -> Result<()> {
|
const MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE: libc::c_int = 64;
|
||||||
// Ensure that no processor has fetched a stale instruction stream.
|
|
||||||
//
|
|
||||||
// On AArch64 we try to do this by executing a "broadcast" `ISB` which is not something that the
|
|
||||||
// architecture provides us but we can emulate it using the membarrier kernel interface.
|
|
||||||
//
|
|
||||||
// This behaviour was documented in a patch, however it seems that it hasn't been upstreamed yet
|
|
||||||
// Nevertheless it clearly explains the guarantees that the Linux kernel provides us regarding the
|
|
||||||
// membarrier interface, and how to use it for JIT contexts.
|
|
||||||
// https://lkml.kernel.org/lkml/07a8b963002cb955b7516e61bad19514a3acaa82.1623813516.git.luto@kernel.org/
|
|
||||||
//
|
|
||||||
// I couldn't find the follow up for that patch but there doesn't seem to be disagreement about
|
|
||||||
// that specific part in the replies.
|
|
||||||
// TODO: Check if the kernel has updated the membarrier documentation
|
|
||||||
//
|
|
||||||
// See the following issues for more info:
|
|
||||||
// * https://github.com/bytecodealliance/wasmtime/pull/3426
|
|
||||||
// * https://github.com/bytecodealliance/wasmtime/pull/4997
|
|
||||||
//
|
|
||||||
// TODO: x86 and s390x have coherent caches so they don't need this, but RISCV does not
|
|
||||||
// guarantee that, so we may need to do something similar for it. However as noted in the above
|
|
||||||
// kernel patch the SYNC_CORE membarrier has different guarantees on each architecture
|
|
||||||
// so we need follow up and check what it provides us.
|
|
||||||
// See: https://github.com/bytecodealliance/wasmtime/issues/5033
|
|
||||||
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
|
|
||||||
match membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
|
|
||||||
Ok(_) => {}
|
|
||||||
|
|
||||||
// EPERM happens if the calling process hasn't yet called the register membarrier.
|
/// See docs on [crate::pipeline_flush_mt] for a description of what this function is trying to do.
|
||||||
// We can call the register membarrier now, and then retry the actual membarrier,
|
#[inline]
|
||||||
|
pub(crate) fn pipeline_flush_mt() -> Result<()> {
|
||||||
|
// Ensure that no processor has fetched a stale instruction stream.
|
||||||
//
|
//
|
||||||
// This does have some overhead since on the first time we call this function we
|
// On AArch64 we try to do this by executing a "broadcast" `ISB` which is not something
|
||||||
// actually execute three membarriers, but this only happens once per process and only
|
// that the architecture provides us but we can emulate it using the membarrier kernel
|
||||||
// one slow membarrier is actually executed (The last one, which actually generates an IPI).
|
// interface.
|
||||||
Err(e) if e.raw_os_error().unwrap() == EPERM => {
|
//
|
||||||
membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)?;
|
// This behaviour was documented in a patch, however it seems that it hasn't been
|
||||||
membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE)?;
|
// upstreamed yet Nevertheless it clearly explains the guarantees that the Linux kernel
|
||||||
|
// provides us regarding the membarrier interface, and how to use it for JIT contexts.
|
||||||
|
// https://lkml.kernel.org/lkml/07a8b963002cb955b7516e61bad19514a3acaa82.1623813516.git.luto@kernel.org/
|
||||||
|
//
|
||||||
|
// I couldn't find the follow up for that patch but there doesn't seem to be disagreement
|
||||||
|
// about that specific part in the replies.
|
||||||
|
// TODO: Check if the kernel has updated the membarrier documentation
|
||||||
|
//
|
||||||
|
// See the following issues for more info:
|
||||||
|
// * https://github.com/bytecodealliance/wasmtime/pull/3426
|
||||||
|
// * https://github.com/bytecodealliance/wasmtime/pull/4997
|
||||||
|
//
|
||||||
|
// TODO: x86 and s390x have coherent caches so they don't need this, but RISCV does not
|
||||||
|
// guarantee that, so we may need to do something similar for it. However as noted in the
|
||||||
|
// above kernel patch the SYNC_CORE membarrier has different guarantees on each
|
||||||
|
// architecture so we need follow up and check what it provides us.
|
||||||
|
// See: https://github.com/bytecodealliance/wasmtime/issues/5033
|
||||||
|
match membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
|
||||||
|
Ok(_) => {}
|
||||||
|
|
||||||
|
// EPERM happens if the calling process hasn't yet called the register membarrier.
|
||||||
|
// We can call the register membarrier now, and then retry the actual membarrier,
|
||||||
|
//
|
||||||
|
// This does have some overhead since on the first time we call this function we
|
||||||
|
// actually execute three membarriers, but this only happens once per process and only
|
||||||
|
// one slow membarrier is actually executed (The last one, which actually generates an
|
||||||
|
// IPI).
|
||||||
|
Err(e) if e.raw_os_error().unwrap() == EPERM => {
|
||||||
|
membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)?;
|
||||||
|
membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// On kernels older than 4.16 the above syscall does not exist, so we can
|
||||||
|
// fallback to MEMBARRIER_CMD_GLOBAL which is an alias for MEMBARRIER_CMD_SHARED
|
||||||
|
// that has existed since 4.3. GLOBAL is a lot slower, but allows us to have
|
||||||
|
// compatibility with older kernels.
|
||||||
|
Err(e) if e.raw_os_error().unwrap() == EINVAL => {
|
||||||
|
membarrier(MEMBARRIER_CMD_GLOBAL)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// In any other case we got an actual error, so lets propagate that up
|
||||||
|
e => e?,
|
||||||
}
|
}
|
||||||
|
|
||||||
// On kernels older than 4.16 the above syscall does not exist, so we can
|
|
||||||
// fallback to MEMBARRIER_CMD_GLOBAL which is an alias for MEMBARRIER_CMD_SHARED
|
|
||||||
// that has existed since 4.3. GLOBAL is a lot slower, but allows us to have
|
|
||||||
// compatibility with older kernels.
|
|
||||||
Err(e) if e.raw_os_error().unwrap() == EINVAL => {
|
|
||||||
membarrier(MEMBARRIER_CMD_GLOBAL)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// In any other case we got an actual error, so lets propagate that up
|
|
||||||
e => e?,
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
|
||||||
fn membarrier(barrier: libc::c_int) -> Result<()> {
|
|
||||||
let flags: libc::c_int = 0;
|
|
||||||
let res = unsafe { syscall(libc::SYS_membarrier, barrier, flags) };
|
|
||||||
if res == 0 {
|
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
}
|
||||||
Err(Error::last_os_error())
|
|
||||||
|
fn membarrier(barrier: libc::c_int) -> Result<()> {
|
||||||
|
let flags: libc::c_int = 0;
|
||||||
|
let res = unsafe { syscall(libc::SYS_membarrier, barrier, flags) };
|
||||||
|
if res == 0 {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(Error::last_os_error())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(not(all(
|
||||||
|
target_arch = "aarch64",
|
||||||
|
any(target_os = "linux", target_os = "android")
|
||||||
|
)))]
|
||||||
|
mod details {
|
||||||
|
pub(crate) fn pipeline_flush_mt() -> std::io::Result<()> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) use details::*;
|
||||||
|
|
||||||
/// See docs on [crate::clear_cache] for a description of what this function is trying to do.
|
/// See docs on [crate::clear_cache] for a description of what this function is trying to do.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub(crate) fn clear_cache(_ptr: *const c_void, _len: usize) -> Result<()> {
|
pub(crate) fn clear_cache(_ptr: *const c_void, _len: usize) -> Result<()> {
|
||||||
// TODO: On AArch64 we currently rely on the `mprotect` call that switches the memory from W+R to R+X
|
// TODO: On AArch64 we currently rely on the `mprotect` call that switches the memory from W+R
|
||||||
// to do this for us, however that is an implementation detail and should not be relied upon
|
// to R+X to do this for us, however that is an implementation detail and should not be relied
|
||||||
// We should call some implementation of `clear_cache` here
|
// upon.
|
||||||
|
// We should call some implementation of `clear_cache` here.
|
||||||
//
|
//
|
||||||
// See: https://github.com/bytecodealliance/wasmtime/issues/3310
|
// See: https://github.com/bytecodealliance/wasmtime/issues/3310
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user