From f21aa98ccb111698d6251db0f96857f57bb026c3 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 9 Mar 2022 12:58:27 -0600 Subject: [PATCH] Fuzz-code-coverage motivated improvements (#3905) * fuzz: Fuzz padding between compiled functions This commit hooks up the custom `wasmtime_linkopt_padding_between_functions` configuration option to the cranelift compiler into the fuzz configuration, enabling us to ensure that randomly inserting a moderate amount of padding between functions shouldn't tamper with any results. * fuzz: Fuzz the `Config::generate_address_map` option This commit adds fuzz configuration where `generate_address_map` is either enabled or disabled, unlike how it's always enabled for fuzzing today. * Remove unnecessary handling of relocations This commit removes a number of bits and pieces all related to handling relocations in JIT code generated by Wasmtime. None of this is necessary nowadays that the "old backend" has been removed (quite some time ago) and relocations are no longer expected to be in the JIT code at all. Additionally with the minimum x86_64 features required to run wasm code it should be expected that no libcalls are required either for Wasmtime-based JIT code. --- crates/cranelift/Cargo.toml | 2 +- crates/cranelift/src/compiler.rs | 30 +++-------- crates/cranelift/src/obj.rs | 86 +++++++----------------------- crates/fuzzing/src/generators.rs | 15 +++++- crates/jit/src/code_memory.rs | 18 +++---- crates/jit/src/lib.rs | 1 - crates/jit/src/link.rs | 89 -------------------------------- 7 files changed, 48 insertions(+), 193 deletions(-) delete mode 100644 crates/jit/src/link.rs diff --git a/crates/cranelift/Cargo.toml b/crates/cranelift/Cargo.toml index 4f588334d5..a337bcfe3f 100644 --- a/crates/cranelift/Cargo.toml +++ b/crates/cranelift/Cargo.toml @@ -8,7 +8,7 @@ repository = "https://github.com/bytecodealliance/wasmtime" documentation = "https://docs.rs/wasmtime-cranelift/" categories = ["wasm"] keywords = ["webassembly", "wasm"] -edition = "2018" +edition = "2021" [dependencies] anyhow = "1.0" diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index fd41e2e663..7463c2fa45 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -547,36 +547,22 @@ impl Compiler { isa: &dyn TargetIsa, ) -> Result { let mut code_buf = Vec::new(); - let mut relocs = Vec::new(); context .compile_and_emit(isa, &mut code_buf) .map_err(|error| CompileError::Codegen(pretty_error(&context.func, error)))?; - for &MachReloc { - offset, - srcloc: _, - kind, - ref name, - addend, - } in context + // Processing relocations isn't the hardest thing in the world here but + // no trampoline should currently generate a relocation, so assert that + // they're all empty and if this ever trips in the future then handling + // will need to be added here to ensure they make their way into the + // `CompiledFunction` below. + assert!(context .mach_compile_result .as_ref() .unwrap() .buffer .relocs() - { - let reloc_target = if let ir::ExternalName::LibCall(libcall) = *name { - RelocationTarget::LibCall(libcall) - } else { - panic!("unrecognized external name") - }; - relocs.push(Relocation { - reloc: kind, - reloc_target, - offset, - addend, - }); - } + .is_empty()); let unwind_info = context .create_unwind_info(isa) @@ -585,7 +571,7 @@ impl Compiler { Ok(CompiledFunction { body: code_buf, unwind_info, - relocations: relocs, + relocations: Vec::new(), stack_slots: Default::default(), value_labels_ranges: Default::default(), info: Default::default(), diff --git a/crates/cranelift/src/obj.rs b/crates/cranelift/src/obj.rs index 62c7ed9ba0..63ae67c38e 100644 --- a/crates/cranelift/src/obj.rs +++ b/crates/cranelift/src/obj.rs @@ -17,7 +17,6 @@ use crate::debug::{DwarfSection, DwarfSectionRelocTarget}; use crate::{CompiledFunction, Relocation, RelocationTarget}; use anyhow::Result; use cranelift_codegen::binemit::Reloc; -use cranelift_codegen::ir::LibCall; use cranelift_codegen::isa::{ unwind::{systemv, UnwindInfo}, TargetIsa, @@ -69,30 +68,6 @@ macro_rules! for_each_libcall { }; } -fn write_libcall_symbols(obj: &mut Object) -> HashMap { - let mut libcalls = HashMap::new(); - macro_rules! add_libcall_symbol { - [$(($libcall:ident, $export:ident)),*] => {{ - $( - let symbol_id = obj.add_symbol(Symbol { - name: stringify!($export).as_bytes().to_vec(), - value: 0, - size: 0, - kind: SymbolKind::Text, - scope: SymbolScope::Linkage, - weak: true, - section: SymbolSection::Undefined, - flags: SymbolFlags::None, - }); - libcalls.insert(LibCall::$libcall, symbol_id); - )+ - }}; - } - for_each_libcall!(add_libcall_symbol); - - libcalls -} - /// A helper structure used to assemble the final text section of an exectuable, /// plus unwinding information and other related details. /// @@ -110,10 +85,6 @@ pub struct ObjectBuilder<'a> { /// The WebAssembly module we're generating code for. module: &'a Module, - /// Map of injected symbols for all possible libcalls, used whenever there's - /// a relocation against a libcall. - libcalls: HashMap, - windows_unwind_info_id: Option, /// Packed form of windows unwind tables which, if present, will get emitted @@ -190,15 +161,12 @@ impl<'a> ObjectBuilder<'a> { func_symbols.push(symbol_id); } - let libcalls = write_libcall_symbols(obj); - Self { isa, obj, module, text_section, func_symbols, - libcalls, windows_unwind_info_id: None, windows_unwind_info: Vec::new(), systemv_unwind_info_id: None, @@ -266,7 +234,7 @@ impl<'a> ObjectBuilder<'a> { } for r in func.relocations.iter() { - let (symbol, symbol_offset) = match r.reloc_target { + match r.reloc_target { // Relocations against user-defined functions means that this is // a relocation against a module-local function, typically a // call between functions. The `text` field is given priority to @@ -284,43 +252,27 @@ impl<'a> ObjectBuilder<'a> { continue; } - // FIXME(#3009) once the old backend is removed all - // inter-function relocations should be handled by - // `self.text`. This can become `unreachable!()` in that - // case. - self.relocations.push((r, off)); - continue; + // At this time it's expected that all relocations are + // handled by `text.resolve_reloc`, and anything that isn't + // handled is a bug in `text.resolve_reloc` or something + // transitively there. If truly necessary, though, then this + // loop could also be updated to forward the relocation to + // the final object file as well. + panic!( + "unresolved relocation could not be procesed against \ + {index:?}: {r:?}" + ); } - // These relocations, unlike against user funcs above, typically - // involve absolute addresses and need to get resolved at load - // time. These are persisted immediately into the object file. - // - // FIXME: these, like user-defined-functions, should probably - // use relative jumps and avoid absolute relocations. They don't - // seem too common though so aren't necessarily that important - // to optimize. - RelocationTarget::LibCall(call) => (self.libcalls[&call], 0), + // At this time it's not expected that any libcall relocations + // are generated. Ideally we don't want relocations against + // libcalls anyway as libcalls should go through indirect + // `VMContext` tables to avoid needing to apply relocations at + // module-load time as well. + RelocationTarget::LibCall(call) => { + unimplemented!("cannot generate relocation against libcall {call:?}"); + } }; - let (kind, encoding, size) = match r.reloc { - Reloc::Abs4 => (RelocationKind::Absolute, RelocationEncoding::Generic, 32), - Reloc::Abs8 => (RelocationKind::Absolute, RelocationEncoding::Generic, 64), - - other => unimplemented!("Unimplemented relocation {:?}", other), - }; - self.obj - .add_relocation( - self.text_section, - ObjectRelocation { - offset: off + r.offset as u64, - size, - kind, - encoding, - symbol, - addend: r.addend.wrapping_add(symbol_offset as i64), - }, - ) - .unwrap(); } (symbol_id, off..off + body_len) } diff --git a/crates/fuzzing/src/generators.rs b/crates/fuzzing/src/generators.rs index 04c6b5f754..c91cf1f9b8 100644 --- a/crates/fuzzing/src/generators.rs +++ b/crates/fuzzing/src/generators.rs @@ -211,6 +211,8 @@ pub struct WasmtimeConfig { /// Configuration for the instance allocation strategy to use. pub strategy: InstanceAllocationStrategy, codegen: CodegenSettings, + padding_between_functions: Option, + generate_address_map: bool, } /// Configuration for linear memories in Wasmtime. @@ -393,7 +395,8 @@ impl Config { 16 << 20, self.wasmtime.memory_guaranteed_dense_image_size, )) - .allocation_strategy(self.wasmtime.strategy.to_wasmtime()); + .allocation_strategy(self.wasmtime.strategy.to_wasmtime()) + .generate_address_map(self.wasmtime.generate_address_map); self.wasmtime.codegen.configure(&mut cfg); @@ -418,6 +421,16 @@ impl Config { } } + if let Some(pad) = self.wasmtime.padding_between_functions { + unsafe { + cfg.cranelift_flag_set( + "wasmtime_linkopt_padding_between_functions", + &pad.to_string(), + ) + .unwrap(); + } + } + match &self.wasmtime.memory_config { MemoryConfig::Normal(memory_config) => { cfg.static_memory_maximum_size( diff --git a/crates/jit/src/code_memory.rs b/crates/jit/src/code_memory.rs index c4797c4619..5dfe1a1115 100644 --- a/crates/jit/src/code_memory.rs +++ b/crates/jit/src/code_memory.rs @@ -148,18 +148,12 @@ impl CodeMemory { std::slice::from_raw_parts_mut(ret.text.as_ptr() as *mut u8, ret.text.len()); let text_offset = ret.text.as_ptr() as usize - ret.mmap.as_ptr() as usize; let text_range = text_offset..text_offset + text_mut.len(); - let mut text_section_readwrite = false; - for (offset, r) in text.relocations() { - // If the text section was mapped at readonly we need to make it - // briefly read/write here as we apply relocations. - if !text_section_readwrite && self.mmap.is_readonly() { - self.mmap - .make_writable(text_range.clone()) - .expect("unable to make memory writable"); - text_section_readwrite = true; - } - crate::link::apply_reloc(&ret.obj, text_mut, offset, r); - } + + // Double-check there are no relocations in the text section. At + // this time relocations are not expected at all from loaded code + // since everything should be resolved at compile time. Handling + // must be added here, though, if relocations pop up. + assert!(text.relocations().count() == 0); // Switch the executable portion from read/write to // read/execute, notably not using read/write/execute to prevent diff --git a/crates/jit/src/lib.rs b/crates/jit/src/lib.rs index 4152f56c7a..07d93c0b2b 100644 --- a/crates/jit/src/lib.rs +++ b/crates/jit/src/lib.rs @@ -24,7 +24,6 @@ mod code_memory; mod debug; mod demangling; mod instantiate; -mod link; mod profiling; mod unwind; diff --git a/crates/jit/src/link.rs b/crates/jit/src/link.rs deleted file mode 100644 index 08f0cbf735..0000000000 --- a/crates/jit/src/link.rs +++ /dev/null @@ -1,89 +0,0 @@ -//! Linking for JIT-compiled code. - -use object::read::{Object, Relocation, RelocationTarget}; -use object::{File, NativeEndian as NE, ObjectSymbol, RelocationEncoding, RelocationKind}; -use std::convert::TryFrom; -use wasmtime_runtime::libcalls; - -type I32 = object::I32Bytes; -type U64 = object::U64Bytes; - -/// Applies the relocation `r` at `offset` within `code`, according to the -/// symbols found in `obj`. -/// -/// This method is used at runtime to resolve relocations in ELF images, -/// typically with respect to where the memory was placed in the final address -/// in memory. -pub fn apply_reloc(obj: &File, code: &mut [u8], offset: u64, r: Relocation) { - let target_func_address: usize = match r.target() { - RelocationTarget::Symbol(i) => { - // Processing relocation target is a named symbols that is compiled - // wasm function or runtime libcall. - let sym = obj.symbol_by_index(i).unwrap(); - if sym.is_local() { - &code[sym.address() as usize] as *const u8 as usize - } else { - match sym.name() { - Ok(name) => { - if let Some(addr) = to_libcall_address(name) { - addr - } else { - panic!("unknown function to link: {}", name); - } - } - Err(_) => panic!("unexpected relocation target: not a symbol"), - } - } - } - _ => panic!("unexpected relocation target: not a symbol"), - }; - - match (r.kind(), r.encoding(), r.size()) { - (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => { - let reloc_address = reloc_address::(code, offset); - let reloc_abs = (target_func_address as u64) - .checked_add(r.addend() as u64) - .unwrap(); - reloc_address.set(NE, reloc_abs); - } - - // FIXME(#3009) after the old backend is removed this won't ever show up - // again so it can be removed. - (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { - let reloc_address = reloc_address::(code, offset); - let val = (target_func_address as i64) - .wrapping_add(r.addend()) - .wrapping_sub(reloc_address as *const _ as i64); - reloc_address.set(NE, i32::try_from(val).expect("relocation out-of-bounds")); - } - - other => panic!("unsupported reloc kind: {:?}", other), - } -} - -fn reloc_address(code: &mut [u8], offset: u64) -> &mut T { - let (reloc, _rest) = usize::try_from(offset) - .ok() - .and_then(move |offset| code.get_mut(offset..)) - .and_then(|range| object::from_bytes_mut(range).ok()) - .expect("invalid reloc offset"); - reloc -} - -fn to_libcall_address(name: &str) -> Option { - use self::libcalls::*; - use wasmtime_environ::for_each_libcall; - macro_rules! add_libcall_symbol { - [$(($libcall:ident, $export:ident)),*] => { - Some(match name { - $( - stringify!($export) => $export as usize, - )+ - _ => { - return None; - } - }) - }; - } - for_each_libcall!(add_libcall_symbol) -}