Resolve libcall relocations for older CPUs (#5567)

* Resolve libcall relocations for older CPUs

Long ago Wasmtime used to have logic for resolving relocations
post-compilation for libcalls which I ended up removing during
refactorings last year. As #5563 points out, however, it's possible to
get Wasmtime to panic by disabling SSE features which forces Cranelift
to use libcalls for some floating-point operations instead. Note that
this also requires disabling SIMD because SIMD support has a baseline of
SSE 4.2.

This commit pulls back the old implementations of various libcalls and
reimplements logic necessary to have them work on CPUs without SSE 4.2

Closes #5563

* Fix log message in `wast` support

* Fix offset listed in relocations

Be sure to factor in the offset of the function itself

* Review comments
This commit is contained in:
Alex Crichton
2023-01-18 09:04:10 -06:00
committed by GitHub
parent 94b51cdb17
commit 9b896d2a70
7 changed files with 298 additions and 14 deletions

View File

@@ -15,6 +15,8 @@
use crate::{CompiledFunction, RelocationTarget}; use crate::{CompiledFunction, RelocationTarget};
use anyhow::Result; use anyhow::Result;
use cranelift_codegen::binemit::Reloc;
use cranelift_codegen::ir::LibCall;
use cranelift_codegen::isa::{ use cranelift_codegen::isa::{
unwind::{systemv, UnwindInfo}, unwind::{systemv, UnwindInfo},
TargetIsa, TargetIsa,
@@ -24,6 +26,7 @@ use gimli::write::{Address, EhFrame, EndianVec, FrameTable, Writer};
use gimli::RunTimeEndian; use gimli::RunTimeEndian;
use object::write::{Object, SectionId, StandardSegment, Symbol, SymbolId, SymbolSection}; use object::write::{Object, SectionId, StandardSegment, Symbol, SymbolId, SymbolSection};
use object::{Architecture, SectionKind, SymbolFlags, SymbolKind, SymbolScope}; use object::{Architecture, SectionKind, SymbolFlags, SymbolKind, SymbolScope};
use std::collections::HashMap;
use std::convert::TryFrom; use std::convert::TryFrom;
use std::ops::Range; use std::ops::Range;
use wasmtime_environ::FuncIndex; use wasmtime_environ::FuncIndex;
@@ -52,6 +55,13 @@ pub struct ModuleTextBuilder<'a> {
/// In-progress text section that we're using cranelift's `MachBuffer` to /// In-progress text section that we're using cranelift's `MachBuffer` to
/// build to resolve relocations (calls) between functions. /// build to resolve relocations (calls) between functions.
text: Box<dyn TextSectionBuilder>, text: Box<dyn TextSectionBuilder>,
/// Symbols defined in the object for libcalls that relocations are applied
/// against.
///
/// Note that this isn't typically used. It's only used for SSE-disabled
/// builds without SIMD on x86_64 right now.
libcall_symbols: HashMap<LibCall, SymbolId>,
} }
impl<'a> ModuleTextBuilder<'a> { impl<'a> ModuleTextBuilder<'a> {
@@ -76,6 +86,7 @@ impl<'a> ModuleTextBuilder<'a> {
text_section, text_section,
unwind_info: Default::default(), unwind_info: Default::default(),
text: isa.text_section_builder(num_funcs), text: isa.text_section_builder(num_funcs),
libcall_symbols: HashMap::default(),
} }
} }
@@ -146,13 +157,49 @@ impl<'a> ModuleTextBuilder<'a> {
); );
} }
// At this time it's not expected that any libcall relocations // Relocations against libcalls are not common at this time and
// are generated. Ideally we don't want relocations against // are only used in non-default configurations that disable wasm
// libcalls anyway as libcalls should go through indirect // SIMD, disable SSE features, and for wasm modules that still
// `VMContext` tables to avoid needing to apply relocations at // use floating point operations.
// module-load time as well. //
// Currently these relocations are all expected to be absolute
// 8-byte relocations so that's asserted here and then encoded
// directly into the object as a normal object relocation. This
// is processed at module load time to resolve the relocations.
RelocationTarget::LibCall(call) => { RelocationTarget::LibCall(call) => {
unimplemented!("cannot generate relocation against libcall {call:?}"); let symbol = *self.libcall_symbols.entry(call).or_insert_with(|| {
self.obj.add_symbol(Symbol {
name: libcall_name(call).as_bytes().to_vec(),
value: 0,
size: 0,
kind: SymbolKind::Text,
scope: SymbolScope::Linkage,
weak: false,
section: SymbolSection::Undefined,
flags: SymbolFlags::None,
})
});
let (encoding, kind, size) = match r.reloc {
Reloc::Abs8 => (
object::RelocationEncoding::Generic,
object::RelocationKind::Absolute,
8,
),
other => unimplemented!("unimplemented relocation kind {other:?}"),
};
self.obj
.add_relocation(
self.text_section,
object::write::Relocation {
symbol,
size,
kind,
encoding,
offset: off + u64::from(r.offset),
addend: r.addend,
},
)
.unwrap();
} }
}; };
} }
@@ -486,3 +533,19 @@ impl<'a> UnwindInfoBuilder<'a> {
} }
} }
} }
fn libcall_name(call: LibCall) -> &'static str {
use wasmtime_environ::obj::LibCall as LC;
let other = match call {
LibCall::FloorF32 => LC::FloorF32,
LibCall::FloorF64 => LC::FloorF64,
LibCall::NearestF32 => LC::NearestF32,
LibCall::NearestF64 => LC::NearestF64,
LibCall::CeilF32 => LC::CeilF32,
LibCall::CeilF64 => LC::CeilF64,
LibCall::TruncF32 => LC::TruncF32,
LibCall::TruncF64 => LC::TruncF64,
_ => panic!("unknown libcall to give a name to: {call:?}"),
};
other.symbol()
}

View File

@@ -48,7 +48,7 @@ pub const ELF_WASMTIME_ADDRMAP: &str = ".wasmtime.addrmap";
/// encodes the ability to map an offset in the text section to the trap code /// encodes the ability to map an offset in the text section to the trap code
/// that it corresponds to. /// that it corresponds to.
/// ///
/// This section is used at runtime to determine what flavor fo trap happened to /// This section is used at runtime to determine what flavor of trap happened to
/// ensure that embedders and debuggers know the reason for the wasm trap. The /// ensure that embedders and debuggers know the reason for the wasm trap. The
/// encoding of this section is custom to Wasmtime and managed with helpers in /// encoding of this section is custom to Wasmtime and managed with helpers in
/// the `object` crate: /// the `object` crate:
@@ -63,7 +63,7 @@ pub const ELF_WASMTIME_ADDRMAP: &str = ".wasmtime.addrmap";
/// ///
/// This section is decoded by `lookup_trap_code` below which will read the /// This section is decoded by `lookup_trap_code` below which will read the
/// section count, slice some bytes to get the various arrays, and then perform /// section count, slice some bytes to get the various arrays, and then perform
/// a binary search on the offsets array to find the an index corresponding to /// a binary search on the offsets array to find the index corresponding to
/// the pc being looked up. If found the same index in the trap array (the array /// the pc being looked up. If found the same index in the trap array (the array
/// of bytes) is the trap code for that offset. /// of bytes) is the trap code for that offset.
/// ///
@@ -128,3 +128,42 @@ pub const ELF_NAME_DATA: &'static str = ".name.wasm";
/// and is instead indexed directly by relative indices stored in compilation /// and is instead indexed directly by relative indices stored in compilation
/// metadata. /// metadata.
pub const ELF_WASMTIME_DWARF: &str = ".wasmtime.dwarf"; pub const ELF_WASMTIME_DWARF: &str = ".wasmtime.dwarf";
macro_rules! libcalls {
($($rust:ident = $sym:tt)*) => (
#[allow(missing_docs)]
pub enum LibCall {
$($rust,)*
}
impl LibCall {
/// Returns the libcall corresponding to the provided symbol name,
/// if one matches.
pub fn from_str(s: &str) -> Option<LibCall> {
match s {
$($sym => Some(LibCall::$rust),)*
_ => None,
}
}
/// Returns the symbol name in object files associated with this
/// libcall.
pub fn symbol(&self) -> &'static str {
match self {
$(LibCall::$rust => $sym,)*
}
}
}
)
}
libcalls! {
FloorF32 = "libcall_floor32"
FloorF64 = "libcall_floor64"
NearestF32 = "libcall_nearestf32"
NearestF64 = "libcall_nearestf64"
CeilF32 = "libcall_ceilf32"
CeilF64 = "libcall_ceilf64"
TruncF32 = "libcall_truncf32"
TruncF64 = "libcall_truncf64"
}

View File

@@ -1,6 +1,8 @@
//! Generate Cranelift compiler settings. //! Generate Cranelift compiler settings.
use crate::generators::ModuleConfig;
use arbitrary::{Arbitrary, Unstructured}; use arbitrary::{Arbitrary, Unstructured};
use std::collections::HashMap;
/// Choose between matching the host architecture or a cross-compilation target. /// Choose between matching the host architecture or a cross-compilation target.
#[derive(Clone, Debug, Eq, Hash, PartialEq)] #[derive(Clone, Debug, Eq, Hash, PartialEq)]
@@ -32,6 +34,42 @@ impl CodegenSettings {
} }
} }
} }
/// Features such as sse4.2 are unconditionally enabled on the x86_64 target
/// because they are hard required for SIMD, but when SIMD is disabled, for
/// example, we support disabling these features.
///
/// This method will take the wasm feature selection chosen, through
/// `module_config`, and possibly try to disable some more features by
/// reading more of the input.
pub fn maybe_disable_more_features(
&mut self,
module_config: &ModuleConfig,
u: &mut Unstructured<'_>,
) -> arbitrary::Result<()> {
let flags = match self {
CodegenSettings::Target { flags, .. } => flags,
_ => return Ok(()),
};
if !module_config.config.simd_enabled {
// Note that regardless of architecture these booleans are generated
// to have test case failures unrelated to codegen setting input
// that fail on one architecture to fail on other architectures as
// well.
let new_flags = ["has_sse3", "has_ssse3", "has_sse41", "has_sse42"]
.into_iter()
.map(|name| Ok((name, u.arbitrary()?)))
.collect::<arbitrary::Result<HashMap<_, bool>>>()?;
for (name, val) in flags {
if let Some(new_value) = new_flags.get(name.as_str()) {
*val = new_value.to_string();
}
}
}
Ok(())
}
} }
impl<'a> Arbitrary<'a> for CodegenSettings { impl<'a> Arbitrary<'a> for CodegenSettings {
@@ -103,6 +141,9 @@ impl<'a> Arbitrary<'a> for CodegenSettings {
// fail if these features are disabled, so unconditionally // fail if these features are disabled, so unconditionally
// enable them as we're not interested in fuzzing without // enable them as we're not interested in fuzzing without
// them. // them.
//
// Note that these may still be disabled above in
// `maybe_disable_more_features`.
std:"sse3" => clif:"has_sse3" ratio: 1 in 1, std:"sse3" => clif:"has_sse3" ratio: 1 in 1,
std:"ssse3" => clif:"has_ssse3" ratio: 1 in 1, std:"ssse3" => clif:"has_ssse3" ratio: 1 in 1,
std:"sse4.1" => clif:"has_sse41" ratio: 1 in 1, std:"sse4.1" => clif:"has_sse41" ratio: 1 in 1,

View File

@@ -294,6 +294,11 @@ impl<'a> Arbitrary<'a> for Config {
module_config: u.arbitrary()?, module_config: u.arbitrary()?,
}; };
config
.wasmtime
.codegen
.maybe_disable_more_features(&config.module_config, u)?;
// If using the pooling allocator, constrain the memory and module configurations // If using the pooling allocator, constrain the memory and module configurations
// to the module limits. // to the module limits.
if let InstanceAllocationStrategy::Pooling(pooling) = &mut config.wasmtime.strategy { if let InstanceAllocationStrategy::Pooling(pooling) = &mut config.wasmtime.strategy {

View File

@@ -4,12 +4,14 @@ use crate::subslice_range;
use crate::unwind::UnwindRegistration; use crate::unwind::UnwindRegistration;
use anyhow::{anyhow, bail, Context, Result}; use anyhow::{anyhow, bail, Context, Result};
use object::read::{File, Object, ObjectSection}; use object::read::{File, Object, ObjectSection};
use object::ObjectSymbol;
use std::mem; use std::mem;
use std::mem::ManuallyDrop; use std::mem::ManuallyDrop;
use std::ops::Range; use std::ops::Range;
use wasmtime_environ::obj; use wasmtime_environ::obj;
use wasmtime_environ::FunctionLoc; use wasmtime_environ::FunctionLoc;
use wasmtime_jit_icache_coherence as icache_coherence; use wasmtime_jit_icache_coherence as icache_coherence;
use wasmtime_runtime::libcalls;
use wasmtime_runtime::{MmapVec, VMTrampoline}; use wasmtime_runtime::{MmapVec, VMTrampoline};
/// Management of executable memory within a `MmapVec` /// Management of executable memory within a `MmapVec`
@@ -24,6 +26,8 @@ pub struct CodeMemory {
published: bool, published: bool,
enable_branch_protection: bool, enable_branch_protection: bool,
relocations: Vec<(usize, obj::LibCall)>,
// Ranges within `self.mmap` of where the particular sections lie. // Ranges within `self.mmap` of where the particular sections lie.
text: Range<usize>, text: Range<usize>,
unwind: Range<usize>, unwind: Range<usize>,
@@ -60,6 +64,7 @@ impl CodeMemory {
let obj = File::parse(&mmap[..]) let obj = File::parse(&mmap[..])
.with_context(|| "failed to parse internal compilation artifact")?; .with_context(|| "failed to parse internal compilation artifact")?;
let mut relocations = Vec::new();
let mut text = 0..0; let mut text = 0..0;
let mut unwind = 0..0; let mut unwind = 0..0;
let mut enable_branch_protection = None; let mut enable_branch_protection = None;
@@ -93,11 +98,28 @@ impl CodeMemory {
".text" => { ".text" => {
text = range; text = range;
// Double-check there are no relocations in the text section. At // The text section might have relocations for things like
// this time relocations are not expected at all from loaded code // libcalls which need to be applied, so handle those here.
// since everything should be resolved at compile time. Handling //
// must be added here, though, if relocations pop up. // Note that only a small subset of possible relocations are
assert!(section.relocations().count() == 0); // handled. Only those required by the compiler side of
// things are processed.
for (offset, reloc) in section.relocations() {
assert_eq!(reloc.kind(), object::RelocationKind::Absolute);
assert_eq!(reloc.encoding(), object::RelocationEncoding::Generic);
assert_eq!(usize::from(reloc.size()), std::mem::size_of::<usize>());
assert_eq!(reloc.addend(), 0);
let sym = match reloc.target() {
object::RelocationTarget::Symbol(id) => id,
other => panic!("unknown relocation target {other:?}"),
};
let sym = obj.symbol_by_index(sym).unwrap().name().unwrap();
let libcall = obj::LibCall::from_str(sym)
.unwrap_or_else(|| panic!("unknown symbol relocation: {sym}"));
let offset = usize::try_from(offset).unwrap();
relocations.push((offset, libcall));
}
} }
UnwindRegistration::SECTION_NAME => unwind = range, UnwindRegistration::SECTION_NAME => unwind = range,
obj::ELF_WASM_DATA => wasm_data = range, obj::ELF_WASM_DATA => wasm_data = range,
@@ -124,6 +146,7 @@ impl CodeMemory {
dwarf, dwarf,
info_data, info_data,
wasm_data, wasm_data,
relocations,
}) })
} }
@@ -214,6 +237,8 @@ impl CodeMemory {
// both the actual unwinding tables as well as the validity of the // both the actual unwinding tables as well as the validity of the
// pointers we pass in itself. // pointers we pass in itself.
unsafe { unsafe {
self.apply_relocations()?;
let text = self.text(); let text = self.text();
// Clear the newly allocated code from cache if the processor requires it // Clear the newly allocated code from cache if the processor requires it
@@ -243,6 +268,35 @@ impl CodeMemory {
Ok(()) Ok(())
} }
unsafe fn apply_relocations(&mut self) -> Result<()> {
if self.relocations.is_empty() {
return Ok(());
}
// Mmaps currently all start as readonly so before updating relocations
// the mapping needs to be made writable first. Note that this isn't
// reset back to readonly since the `make_executable` call, which
// happens after this, will implicitly remove the writable bit and leave
// it as just read/execute.
self.mmap.make_writable(self.text.clone())?;
for (offset, libcall) in self.relocations.iter() {
let offset = self.text.start + offset;
let libcall = match libcall {
obj::LibCall::FloorF32 => libcalls::relocs::floorf32 as usize,
obj::LibCall::FloorF64 => libcalls::relocs::floorf64 as usize,
obj::LibCall::NearestF32 => libcalls::relocs::nearestf32 as usize,
obj::LibCall::NearestF64 => libcalls::relocs::nearestf64 as usize,
obj::LibCall::CeilF32 => libcalls::relocs::ceilf32 as usize,
obj::LibCall::CeilF64 => libcalls::relocs::ceilf64 as usize,
obj::LibCall::TruncF32 => libcalls::relocs::truncf32 as usize,
obj::LibCall::TruncF64 => libcalls::relocs::truncf64 as usize,
};
*self.mmap.as_mut_ptr().add(offset).cast::<usize>() = libcall;
}
Ok(())
}
unsafe fn register_unwind_info(&mut self) -> Result<()> { unsafe fn register_unwind_info(&mut self) -> Result<()> {
if self.unwind.len() == 0 { if self.unwind.len() == 0 {
return Ok(()); return Ok(());

View File

@@ -492,3 +492,85 @@ unsafe fn out_of_gas(vmctx: *mut VMContext) -> Result<()> {
unsafe fn new_epoch(vmctx: *mut VMContext) -> Result<u64> { unsafe fn new_epoch(vmctx: *mut VMContext) -> Result<u64> {
(*(*vmctx).instance().store()).new_epoch() (*(*vmctx).instance().store()).new_epoch()
} }
/// This module contains functions which are used for resolving relocations at
/// runtime if necessary.
///
/// These functions are not used by default and currently the only platform
/// they're used for is on x86_64 when SIMD is disabled and then SSE features
/// are further disabled. In these configurations Cranelift isn't allowed to use
/// native CPU instructions so it falls back to libcalls and we rely on the Rust
/// standard library generally for implementing these.
#[allow(missing_docs)]
pub mod relocs {
pub extern "C" fn floorf32(f: f32) -> f32 {
f.floor()
}
pub extern "C" fn floorf64(f: f64) -> f64 {
f.floor()
}
pub extern "C" fn ceilf32(f: f32) -> f32 {
f.ceil()
}
pub extern "C" fn ceilf64(f: f64) -> f64 {
f.ceil()
}
pub extern "C" fn truncf32(f: f32) -> f32 {
f.trunc()
}
pub extern "C" fn truncf64(f: f64) -> f64 {
f.trunc()
}
const TOINT_32: f32 = 1.0 / f32::EPSILON;
const TOINT_64: f64 = 1.0 / f64::EPSILON;
// NB: replace with `round_ties_even` from libstd when it's stable as
// tracked by rust-lang/rust#96710
pub extern "C" fn nearestf32(x: f32) -> f32 {
// Rust doesn't have a nearest function; there's nearbyint, but it's not
// stabilized, so do it manually.
// Nearest is either ceil or floor depending on which is nearest or even.
// This approach exploited round half to even default mode.
let i = x.to_bits();
let e = i >> 23 & 0xff;
if e >= 0x7f_u32 + 23 {
// Check for NaNs.
if e == 0xff {
// Read the 23-bits significand.
if i & 0x7fffff != 0 {
// Ensure it's arithmetic by setting the significand's most
// significant bit to 1; it also works for canonical NaNs.
return f32::from_bits(i | (1 << 22));
}
}
x
} else {
(x.abs() + TOINT_32 - TOINT_32).copysign(x)
}
}
pub extern "C" fn nearestf64(x: f64) -> f64 {
let i = x.to_bits();
let e = i >> 52 & 0x7ff;
if e >= 0x3ff_u64 + 52 {
// Check for NaNs.
if e == 0x7ff {
// Read the 52-bits significand.
if i & 0xfffffffffffff != 0 {
// Ensure it's arithmetic by setting the significand's most
// significant bit to 1; it also works for canonical NaNs.
return f64::from_bits(i | (1 << 51));
}
}
x
} else {
(x.abs() + TOINT_64 - TOINT_64).copysign(x)
}
}
}

View File

@@ -363,7 +363,7 @@ impl<T> WastContext<T> {
let sp = directive.span(); let sp = directive.span();
if log::log_enabled!(log::Level::Debug) { if log::log_enabled!(log::Level::Debug) {
let (line, col) = sp.linecol_in(wast); let (line, col) = sp.linecol_in(wast);
log::debug!("failed directive on {}:{}:{}", filename, line + 1, col); log::debug!("running directive on {}:{}:{}", filename, line + 1, col);
} }
self.run_directive(directive) self.run_directive(directive)
.map_err(|e| match e.downcast() { .map_err(|e| match e.downcast() {