Resolve libcall relocations for older CPUs (#5567)
* Resolve libcall relocations for older CPUs Long ago Wasmtime used to have logic for resolving relocations post-compilation for libcalls which I ended up removing during refactorings last year. As #5563 points out, however, it's possible to get Wasmtime to panic by disabling SSE features which forces Cranelift to use libcalls for some floating-point operations instead. Note that this also requires disabling SIMD because SIMD support has a baseline of SSE 4.2. This commit pulls back the old implementations of various libcalls and reimplements logic necessary to have them work on CPUs without SSE 4.2 Closes #5563 * Fix log message in `wast` support * Fix offset listed in relocations Be sure to factor in the offset of the function itself * Review comments
This commit is contained in:
@@ -15,6 +15,8 @@
|
||||
|
||||
use crate::{CompiledFunction, RelocationTarget};
|
||||
use anyhow::Result;
|
||||
use cranelift_codegen::binemit::Reloc;
|
||||
use cranelift_codegen::ir::LibCall;
|
||||
use cranelift_codegen::isa::{
|
||||
unwind::{systemv, UnwindInfo},
|
||||
TargetIsa,
|
||||
@@ -24,6 +26,7 @@ use gimli::write::{Address, EhFrame, EndianVec, FrameTable, Writer};
|
||||
use gimli::RunTimeEndian;
|
||||
use object::write::{Object, SectionId, StandardSegment, Symbol, SymbolId, SymbolSection};
|
||||
use object::{Architecture, SectionKind, SymbolFlags, SymbolKind, SymbolScope};
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::ops::Range;
|
||||
use wasmtime_environ::FuncIndex;
|
||||
@@ -52,6 +55,13 @@ pub struct ModuleTextBuilder<'a> {
|
||||
/// In-progress text section that we're using cranelift's `MachBuffer` to
|
||||
/// build to resolve relocations (calls) between functions.
|
||||
text: Box<dyn TextSectionBuilder>,
|
||||
|
||||
/// Symbols defined in the object for libcalls that relocations are applied
|
||||
/// against.
|
||||
///
|
||||
/// Note that this isn't typically used. It's only used for SSE-disabled
|
||||
/// builds without SIMD on x86_64 right now.
|
||||
libcall_symbols: HashMap<LibCall, SymbolId>,
|
||||
}
|
||||
|
||||
impl<'a> ModuleTextBuilder<'a> {
|
||||
@@ -76,6 +86,7 @@ impl<'a> ModuleTextBuilder<'a> {
|
||||
text_section,
|
||||
unwind_info: Default::default(),
|
||||
text: isa.text_section_builder(num_funcs),
|
||||
libcall_symbols: HashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -146,13 +157,49 @@ impl<'a> ModuleTextBuilder<'a> {
|
||||
);
|
||||
}
|
||||
|
||||
// At this time it's not expected that any libcall relocations
|
||||
// are generated. Ideally we don't want relocations against
|
||||
// libcalls anyway as libcalls should go through indirect
|
||||
// `VMContext` tables to avoid needing to apply relocations at
|
||||
// module-load time as well.
|
||||
// Relocations against libcalls are not common at this time and
|
||||
// are only used in non-default configurations that disable wasm
|
||||
// SIMD, disable SSE features, and for wasm modules that still
|
||||
// use floating point operations.
|
||||
//
|
||||
// Currently these relocations are all expected to be absolute
|
||||
// 8-byte relocations so that's asserted here and then encoded
|
||||
// directly into the object as a normal object relocation. This
|
||||
// is processed at module load time to resolve the relocations.
|
||||
RelocationTarget::LibCall(call) => {
|
||||
unimplemented!("cannot generate relocation against libcall {call:?}");
|
||||
let symbol = *self.libcall_symbols.entry(call).or_insert_with(|| {
|
||||
self.obj.add_symbol(Symbol {
|
||||
name: libcall_name(call).as_bytes().to_vec(),
|
||||
value: 0,
|
||||
size: 0,
|
||||
kind: SymbolKind::Text,
|
||||
scope: SymbolScope::Linkage,
|
||||
weak: false,
|
||||
section: SymbolSection::Undefined,
|
||||
flags: SymbolFlags::None,
|
||||
})
|
||||
});
|
||||
let (encoding, kind, size) = match r.reloc {
|
||||
Reloc::Abs8 => (
|
||||
object::RelocationEncoding::Generic,
|
||||
object::RelocationKind::Absolute,
|
||||
8,
|
||||
),
|
||||
other => unimplemented!("unimplemented relocation kind {other:?}"),
|
||||
};
|
||||
self.obj
|
||||
.add_relocation(
|
||||
self.text_section,
|
||||
object::write::Relocation {
|
||||
symbol,
|
||||
size,
|
||||
kind,
|
||||
encoding,
|
||||
offset: off + u64::from(r.offset),
|
||||
addend: r.addend,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -486,3 +533,19 @@ impl<'a> UnwindInfoBuilder<'a> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn libcall_name(call: LibCall) -> &'static str {
|
||||
use wasmtime_environ::obj::LibCall as LC;
|
||||
let other = match call {
|
||||
LibCall::FloorF32 => LC::FloorF32,
|
||||
LibCall::FloorF64 => LC::FloorF64,
|
||||
LibCall::NearestF32 => LC::NearestF32,
|
||||
LibCall::NearestF64 => LC::NearestF64,
|
||||
LibCall::CeilF32 => LC::CeilF32,
|
||||
LibCall::CeilF64 => LC::CeilF64,
|
||||
LibCall::TruncF32 => LC::TruncF32,
|
||||
LibCall::TruncF64 => LC::TruncF64,
|
||||
_ => panic!("unknown libcall to give a name to: {call:?}"),
|
||||
};
|
||||
other.symbol()
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ pub const ELF_WASMTIME_ADDRMAP: &str = ".wasmtime.addrmap";
|
||||
/// encodes the ability to map an offset in the text section to the trap code
|
||||
/// that it corresponds to.
|
||||
///
|
||||
/// This section is used at runtime to determine what flavor fo trap happened to
|
||||
/// This section is used at runtime to determine what flavor of trap happened to
|
||||
/// ensure that embedders and debuggers know the reason for the wasm trap. The
|
||||
/// encoding of this section is custom to Wasmtime and managed with helpers in
|
||||
/// the `object` crate:
|
||||
@@ -63,7 +63,7 @@ pub const ELF_WASMTIME_ADDRMAP: &str = ".wasmtime.addrmap";
|
||||
///
|
||||
/// This section is decoded by `lookup_trap_code` below which will read the
|
||||
/// section count, slice some bytes to get the various arrays, and then perform
|
||||
/// a binary search on the offsets array to find the an index corresponding to
|
||||
/// a binary search on the offsets array to find the index corresponding to
|
||||
/// the pc being looked up. If found the same index in the trap array (the array
|
||||
/// of bytes) is the trap code for that offset.
|
||||
///
|
||||
@@ -128,3 +128,42 @@ pub const ELF_NAME_DATA: &'static str = ".name.wasm";
|
||||
/// and is instead indexed directly by relative indices stored in compilation
|
||||
/// metadata.
|
||||
pub const ELF_WASMTIME_DWARF: &str = ".wasmtime.dwarf";
|
||||
|
||||
macro_rules! libcalls {
|
||||
($($rust:ident = $sym:tt)*) => (
|
||||
#[allow(missing_docs)]
|
||||
pub enum LibCall {
|
||||
$($rust,)*
|
||||
}
|
||||
|
||||
impl LibCall {
|
||||
/// Returns the libcall corresponding to the provided symbol name,
|
||||
/// if one matches.
|
||||
pub fn from_str(s: &str) -> Option<LibCall> {
|
||||
match s {
|
||||
$($sym => Some(LibCall::$rust),)*
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the symbol name in object files associated with this
|
||||
/// libcall.
|
||||
pub fn symbol(&self) -> &'static str {
|
||||
match self {
|
||||
$(LibCall::$rust => $sym,)*
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
libcalls! {
|
||||
FloorF32 = "libcall_floor32"
|
||||
FloorF64 = "libcall_floor64"
|
||||
NearestF32 = "libcall_nearestf32"
|
||||
NearestF64 = "libcall_nearestf64"
|
||||
CeilF32 = "libcall_ceilf32"
|
||||
CeilF64 = "libcall_ceilf64"
|
||||
TruncF32 = "libcall_truncf32"
|
||||
TruncF64 = "libcall_truncf64"
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
//! Generate Cranelift compiler settings.
|
||||
|
||||
use crate::generators::ModuleConfig;
|
||||
use arbitrary::{Arbitrary, Unstructured};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Choose between matching the host architecture or a cross-compilation target.
|
||||
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
|
||||
@@ -32,6 +34,42 @@ impl CodegenSettings {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Features such as sse4.2 are unconditionally enabled on the x86_64 target
|
||||
/// because they are hard required for SIMD, but when SIMD is disabled, for
|
||||
/// example, we support disabling these features.
|
||||
///
|
||||
/// This method will take the wasm feature selection chosen, through
|
||||
/// `module_config`, and possibly try to disable some more features by
|
||||
/// reading more of the input.
|
||||
pub fn maybe_disable_more_features(
|
||||
&mut self,
|
||||
module_config: &ModuleConfig,
|
||||
u: &mut Unstructured<'_>,
|
||||
) -> arbitrary::Result<()> {
|
||||
let flags = match self {
|
||||
CodegenSettings::Target { flags, .. } => flags,
|
||||
_ => return Ok(()),
|
||||
};
|
||||
|
||||
if !module_config.config.simd_enabled {
|
||||
// Note that regardless of architecture these booleans are generated
|
||||
// to have test case failures unrelated to codegen setting input
|
||||
// that fail on one architecture to fail on other architectures as
|
||||
// well.
|
||||
let new_flags = ["has_sse3", "has_ssse3", "has_sse41", "has_sse42"]
|
||||
.into_iter()
|
||||
.map(|name| Ok((name, u.arbitrary()?)))
|
||||
.collect::<arbitrary::Result<HashMap<_, bool>>>()?;
|
||||
|
||||
for (name, val) in flags {
|
||||
if let Some(new_value) = new_flags.get(name.as_str()) {
|
||||
*val = new_value.to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Arbitrary<'a> for CodegenSettings {
|
||||
@@ -103,6 +141,9 @@ impl<'a> Arbitrary<'a> for CodegenSettings {
|
||||
// fail if these features are disabled, so unconditionally
|
||||
// enable them as we're not interested in fuzzing without
|
||||
// them.
|
||||
//
|
||||
// Note that these may still be disabled above in
|
||||
// `maybe_disable_more_features`.
|
||||
std:"sse3" => clif:"has_sse3" ratio: 1 in 1,
|
||||
std:"ssse3" => clif:"has_ssse3" ratio: 1 in 1,
|
||||
std:"sse4.1" => clif:"has_sse41" ratio: 1 in 1,
|
||||
|
||||
@@ -294,6 +294,11 @@ impl<'a> Arbitrary<'a> for Config {
|
||||
module_config: u.arbitrary()?,
|
||||
};
|
||||
|
||||
config
|
||||
.wasmtime
|
||||
.codegen
|
||||
.maybe_disable_more_features(&config.module_config, u)?;
|
||||
|
||||
// If using the pooling allocator, constrain the memory and module configurations
|
||||
// to the module limits.
|
||||
if let InstanceAllocationStrategy::Pooling(pooling) = &mut config.wasmtime.strategy {
|
||||
|
||||
@@ -4,12 +4,14 @@ use crate::subslice_range;
|
||||
use crate::unwind::UnwindRegistration;
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use object::read::{File, Object, ObjectSection};
|
||||
use object::ObjectSymbol;
|
||||
use std::mem;
|
||||
use std::mem::ManuallyDrop;
|
||||
use std::ops::Range;
|
||||
use wasmtime_environ::obj;
|
||||
use wasmtime_environ::FunctionLoc;
|
||||
use wasmtime_jit_icache_coherence as icache_coherence;
|
||||
use wasmtime_runtime::libcalls;
|
||||
use wasmtime_runtime::{MmapVec, VMTrampoline};
|
||||
|
||||
/// Management of executable memory within a `MmapVec`
|
||||
@@ -24,6 +26,8 @@ pub struct CodeMemory {
|
||||
published: bool,
|
||||
enable_branch_protection: bool,
|
||||
|
||||
relocations: Vec<(usize, obj::LibCall)>,
|
||||
|
||||
// Ranges within `self.mmap` of where the particular sections lie.
|
||||
text: Range<usize>,
|
||||
unwind: Range<usize>,
|
||||
@@ -60,6 +64,7 @@ impl CodeMemory {
|
||||
let obj = File::parse(&mmap[..])
|
||||
.with_context(|| "failed to parse internal compilation artifact")?;
|
||||
|
||||
let mut relocations = Vec::new();
|
||||
let mut text = 0..0;
|
||||
let mut unwind = 0..0;
|
||||
let mut enable_branch_protection = None;
|
||||
@@ -93,11 +98,28 @@ impl CodeMemory {
|
||||
".text" => {
|
||||
text = range;
|
||||
|
||||
// Double-check there are no relocations in the text section. At
|
||||
// this time relocations are not expected at all from loaded code
|
||||
// since everything should be resolved at compile time. Handling
|
||||
// must be added here, though, if relocations pop up.
|
||||
assert!(section.relocations().count() == 0);
|
||||
// The text section might have relocations for things like
|
||||
// libcalls which need to be applied, so handle those here.
|
||||
//
|
||||
// Note that only a small subset of possible relocations are
|
||||
// handled. Only those required by the compiler side of
|
||||
// things are processed.
|
||||
for (offset, reloc) in section.relocations() {
|
||||
assert_eq!(reloc.kind(), object::RelocationKind::Absolute);
|
||||
assert_eq!(reloc.encoding(), object::RelocationEncoding::Generic);
|
||||
assert_eq!(usize::from(reloc.size()), std::mem::size_of::<usize>());
|
||||
assert_eq!(reloc.addend(), 0);
|
||||
let sym = match reloc.target() {
|
||||
object::RelocationTarget::Symbol(id) => id,
|
||||
other => panic!("unknown relocation target {other:?}"),
|
||||
};
|
||||
let sym = obj.symbol_by_index(sym).unwrap().name().unwrap();
|
||||
let libcall = obj::LibCall::from_str(sym)
|
||||
.unwrap_or_else(|| panic!("unknown symbol relocation: {sym}"));
|
||||
|
||||
let offset = usize::try_from(offset).unwrap();
|
||||
relocations.push((offset, libcall));
|
||||
}
|
||||
}
|
||||
UnwindRegistration::SECTION_NAME => unwind = range,
|
||||
obj::ELF_WASM_DATA => wasm_data = range,
|
||||
@@ -124,6 +146,7 @@ impl CodeMemory {
|
||||
dwarf,
|
||||
info_data,
|
||||
wasm_data,
|
||||
relocations,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -214,6 +237,8 @@ impl CodeMemory {
|
||||
// both the actual unwinding tables as well as the validity of the
|
||||
// pointers we pass in itself.
|
||||
unsafe {
|
||||
self.apply_relocations()?;
|
||||
|
||||
let text = self.text();
|
||||
|
||||
// Clear the newly allocated code from cache if the processor requires it
|
||||
@@ -243,6 +268,35 @@ impl CodeMemory {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn apply_relocations(&mut self) -> Result<()> {
|
||||
if self.relocations.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Mmaps currently all start as readonly so before updating relocations
|
||||
// the mapping needs to be made writable first. Note that this isn't
|
||||
// reset back to readonly since the `make_executable` call, which
|
||||
// happens after this, will implicitly remove the writable bit and leave
|
||||
// it as just read/execute.
|
||||
self.mmap.make_writable(self.text.clone())?;
|
||||
|
||||
for (offset, libcall) in self.relocations.iter() {
|
||||
let offset = self.text.start + offset;
|
||||
let libcall = match libcall {
|
||||
obj::LibCall::FloorF32 => libcalls::relocs::floorf32 as usize,
|
||||
obj::LibCall::FloorF64 => libcalls::relocs::floorf64 as usize,
|
||||
obj::LibCall::NearestF32 => libcalls::relocs::nearestf32 as usize,
|
||||
obj::LibCall::NearestF64 => libcalls::relocs::nearestf64 as usize,
|
||||
obj::LibCall::CeilF32 => libcalls::relocs::ceilf32 as usize,
|
||||
obj::LibCall::CeilF64 => libcalls::relocs::ceilf64 as usize,
|
||||
obj::LibCall::TruncF32 => libcalls::relocs::truncf32 as usize,
|
||||
obj::LibCall::TruncF64 => libcalls::relocs::truncf64 as usize,
|
||||
};
|
||||
*self.mmap.as_mut_ptr().add(offset).cast::<usize>() = libcall;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn register_unwind_info(&mut self) -> Result<()> {
|
||||
if self.unwind.len() == 0 {
|
||||
return Ok(());
|
||||
|
||||
@@ -492,3 +492,85 @@ unsafe fn out_of_gas(vmctx: *mut VMContext) -> Result<()> {
|
||||
unsafe fn new_epoch(vmctx: *mut VMContext) -> Result<u64> {
|
||||
(*(*vmctx).instance().store()).new_epoch()
|
||||
}
|
||||
|
||||
/// This module contains functions which are used for resolving relocations at
|
||||
/// runtime if necessary.
|
||||
///
|
||||
/// These functions are not used by default and currently the only platform
|
||||
/// they're used for is on x86_64 when SIMD is disabled and then SSE features
|
||||
/// are further disabled. In these configurations Cranelift isn't allowed to use
|
||||
/// native CPU instructions so it falls back to libcalls and we rely on the Rust
|
||||
/// standard library generally for implementing these.
|
||||
#[allow(missing_docs)]
|
||||
pub mod relocs {
|
||||
pub extern "C" fn floorf32(f: f32) -> f32 {
|
||||
f.floor()
|
||||
}
|
||||
|
||||
pub extern "C" fn floorf64(f: f64) -> f64 {
|
||||
f.floor()
|
||||
}
|
||||
|
||||
pub extern "C" fn ceilf32(f: f32) -> f32 {
|
||||
f.ceil()
|
||||
}
|
||||
|
||||
pub extern "C" fn ceilf64(f: f64) -> f64 {
|
||||
f.ceil()
|
||||
}
|
||||
|
||||
pub extern "C" fn truncf32(f: f32) -> f32 {
|
||||
f.trunc()
|
||||
}
|
||||
|
||||
pub extern "C" fn truncf64(f: f64) -> f64 {
|
||||
f.trunc()
|
||||
}
|
||||
|
||||
const TOINT_32: f32 = 1.0 / f32::EPSILON;
|
||||
const TOINT_64: f64 = 1.0 / f64::EPSILON;
|
||||
|
||||
// NB: replace with `round_ties_even` from libstd when it's stable as
|
||||
// tracked by rust-lang/rust#96710
|
||||
pub extern "C" fn nearestf32(x: f32) -> f32 {
|
||||
// Rust doesn't have a nearest function; there's nearbyint, but it's not
|
||||
// stabilized, so do it manually.
|
||||
// Nearest is either ceil or floor depending on which is nearest or even.
|
||||
// This approach exploited round half to even default mode.
|
||||
let i = x.to_bits();
|
||||
let e = i >> 23 & 0xff;
|
||||
if e >= 0x7f_u32 + 23 {
|
||||
// Check for NaNs.
|
||||
if e == 0xff {
|
||||
// Read the 23-bits significand.
|
||||
if i & 0x7fffff != 0 {
|
||||
// Ensure it's arithmetic by setting the significand's most
|
||||
// significant bit to 1; it also works for canonical NaNs.
|
||||
return f32::from_bits(i | (1 << 22));
|
||||
}
|
||||
}
|
||||
x
|
||||
} else {
|
||||
(x.abs() + TOINT_32 - TOINT_32).copysign(x)
|
||||
}
|
||||
}
|
||||
|
||||
pub extern "C" fn nearestf64(x: f64) -> f64 {
|
||||
let i = x.to_bits();
|
||||
let e = i >> 52 & 0x7ff;
|
||||
if e >= 0x3ff_u64 + 52 {
|
||||
// Check for NaNs.
|
||||
if e == 0x7ff {
|
||||
// Read the 52-bits significand.
|
||||
if i & 0xfffffffffffff != 0 {
|
||||
// Ensure it's arithmetic by setting the significand's most
|
||||
// significant bit to 1; it also works for canonical NaNs.
|
||||
return f64::from_bits(i | (1 << 51));
|
||||
}
|
||||
}
|
||||
x
|
||||
} else {
|
||||
(x.abs() + TOINT_64 - TOINT_64).copysign(x)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -363,7 +363,7 @@ impl<T> WastContext<T> {
|
||||
let sp = directive.span();
|
||||
if log::log_enabled!(log::Level::Debug) {
|
||||
let (line, col) = sp.linecol_in(wast);
|
||||
log::debug!("failed directive on {}:{}:{}", filename, line + 1, col);
|
||||
log::debug!("running directive on {}:{}:{}", filename, line + 1, col);
|
||||
}
|
||||
self.run_directive(directive)
|
||||
.map_err(|e| match e.downcast() {
|
||||
|
||||
Reference in New Issue
Block a user