582 lines
20 KiB
Rust
582 lines
20 KiB
Rust
//! Defines `SimpleJITModule`.
|
|
|
|
use crate::{compiled_blob::CompiledBlob, memory::Memory};
|
|
use cranelift_codegen::isa::TargetIsa;
|
|
use cranelift_codegen::settings::Configurable;
|
|
use cranelift_codegen::{self, ir, settings};
|
|
use cranelift_codegen::{
|
|
binemit::{self, Addend, CodeInfo, CodeOffset, Reloc, RelocSink, TrapSink},
|
|
CodegenError,
|
|
};
|
|
use cranelift_entity::SecondaryMap;
|
|
use cranelift_module::{
|
|
DataContext, DataDescription, DataId, FuncId, Init, Linkage, Module, ModuleCompiledFunction,
|
|
ModuleDeclarations, ModuleError, ModuleResult, RelocRecord,
|
|
};
|
|
use cranelift_native;
|
|
#[cfg(not(windows))]
|
|
use libc;
|
|
use log::info;
|
|
use std::collections::HashMap;
|
|
use std::convert::TryInto;
|
|
use std::ffi::CString;
|
|
use std::io::Write;
|
|
use std::ptr;
|
|
use target_lexicon::PointerWidth;
|
|
#[cfg(windows)]
|
|
use winapi;
|
|
|
|
const EXECUTABLE_DATA_ALIGNMENT: u64 = 0x10;
|
|
const WRITABLE_DATA_ALIGNMENT: u64 = 0x8;
|
|
const READONLY_DATA_ALIGNMENT: u64 = 0x1;
|
|
|
|
/// A builder for `SimpleJITModule`.
|
|
pub struct SimpleJITBuilder {
|
|
isa: Box<dyn TargetIsa>,
|
|
symbols: HashMap<String, *const u8>,
|
|
libcall_names: Box<dyn Fn(ir::LibCall) -> String + Send + Sync>,
|
|
}
|
|
|
|
impl SimpleJITBuilder {
|
|
/// Create a new `SimpleJITBuilder`.
|
|
///
|
|
/// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall`
|
|
/// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain
|
|
/// floating point instructions, and for stack probes. If you don't know what to use for this
|
|
/// argument, use `cranelift_module::default_libcall_names()`.
|
|
pub fn new(libcall_names: Box<dyn Fn(ir::LibCall) -> String + Send + Sync>) -> Self {
|
|
let mut flag_builder = settings::builder();
|
|
// On at least AArch64, "colocated" calls use shorter-range relocations,
|
|
// which might not reach all definitions; we can't handle that here, so
|
|
// we require long-range relocation types.
|
|
flag_builder.set("use_colocated_libcalls", "false").unwrap();
|
|
let isa_builder = cranelift_native::builder().unwrap_or_else(|msg| {
|
|
panic!("host machine is not supported: {}", msg);
|
|
});
|
|
let isa = isa_builder.finish(settings::Flags::new(flag_builder));
|
|
Self::with_isa(isa, libcall_names)
|
|
}
|
|
|
|
/// Create a new `SimpleJITBuilder` with an arbitrary target. This is mainly
|
|
/// useful for testing.
|
|
///
|
|
/// SimpleJIT requires a `TargetIsa` configured for non-PIC.
|
|
///
|
|
/// To create a `SimpleJITBuilder` for native use, use the `new` constructor
|
|
/// instead.
|
|
///
|
|
/// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall`
|
|
/// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain
|
|
/// floating point instructions, and for stack probes. If you don't know what to use for this
|
|
/// argument, use `cranelift_module::default_libcall_names()`.
|
|
pub fn with_isa(
|
|
isa: Box<dyn TargetIsa>,
|
|
libcall_names: Box<dyn Fn(ir::LibCall) -> String + Send + Sync>,
|
|
) -> Self {
|
|
debug_assert!(!isa.flags().is_pic(), "SimpleJIT requires non-PIC code");
|
|
let symbols = HashMap::new();
|
|
Self {
|
|
isa,
|
|
symbols,
|
|
libcall_names,
|
|
}
|
|
}
|
|
|
|
/// Define a symbol in the internal symbol table.
|
|
///
|
|
/// The JIT will use the symbol table to resolve names that are declared,
|
|
/// but not defined, in the module being compiled. A common example is
|
|
/// external functions. With this method, functions and data can be exposed
|
|
/// to the code being compiled which are defined by the host.
|
|
///
|
|
/// If a symbol is defined more than once, the most recent definition will
|
|
/// be retained.
|
|
///
|
|
/// If the JIT fails to find a symbol in its internal table, it will fall
|
|
/// back to a platform-specific search (this typically involves searching
|
|
/// the current process for public symbols, followed by searching the
|
|
/// platform's C runtime).
|
|
pub fn symbol<K>(&mut self, name: K, ptr: *const u8) -> &Self
|
|
where
|
|
K: Into<String>,
|
|
{
|
|
self.symbols.insert(name.into(), ptr);
|
|
self
|
|
}
|
|
|
|
/// Define multiple symbols in the internal symbol table.
|
|
///
|
|
/// Using this is equivalent to calling `symbol` on each element.
|
|
pub fn symbols<It, K>(&mut self, symbols: It) -> &Self
|
|
where
|
|
It: IntoIterator<Item = (K, *const u8)>,
|
|
K: Into<String>,
|
|
{
|
|
for (name, ptr) in symbols {
|
|
self.symbols.insert(name.into(), ptr);
|
|
}
|
|
self
|
|
}
|
|
}
|
|
|
|
/// A `SimpleJITModule` implements `Module` and emits code and data into memory where it can be
|
|
/// directly called and accessed.
|
|
///
|
|
/// See the `SimpleJITBuilder` for a convenient way to construct `SimpleJITModule` instances.
|
|
pub struct SimpleJITModule {
|
|
isa: Box<dyn TargetIsa>,
|
|
symbols: HashMap<String, *const u8>,
|
|
libcall_names: Box<dyn Fn(ir::LibCall) -> String>,
|
|
memory: MemoryHandle,
|
|
declarations: ModuleDeclarations,
|
|
compiled_functions: SecondaryMap<FuncId, Option<CompiledBlob>>,
|
|
compiled_data_objects: SecondaryMap<DataId, Option<CompiledBlob>>,
|
|
functions_to_finalize: Vec<FuncId>,
|
|
data_objects_to_finalize: Vec<DataId>,
|
|
}
|
|
|
|
/// A handle to allow freeing memory allocated by the `Module`.
|
|
struct MemoryHandle {
|
|
code: Memory,
|
|
readonly: Memory,
|
|
writable: Memory,
|
|
}
|
|
|
|
impl SimpleJITModule {
|
|
/// Free memory allocated for code and data segments of compiled functions.
|
|
///
|
|
/// # Safety
|
|
///
|
|
/// Because this function invalidates any pointers retrived from the
|
|
/// corresponding module, it should only be used when none of the functions
|
|
/// from that module are currently executing and none of the `fn` pointers
|
|
/// are called afterwards.
|
|
pub unsafe fn free_memory(&mut self) {
|
|
self.memory.code.free_memory();
|
|
self.memory.readonly.free_memory();
|
|
self.memory.writable.free_memory();
|
|
}
|
|
|
|
fn lookup_symbol(&self, name: &str) -> Option<*const u8> {
|
|
self.symbols
|
|
.get(name)
|
|
.copied()
|
|
.or_else(|| lookup_with_dlsym(name))
|
|
}
|
|
|
|
fn get_definition(&self, name: &ir::ExternalName) -> *const u8 {
|
|
match *name {
|
|
ir::ExternalName::User { .. } => {
|
|
let (name, linkage) = if ModuleDeclarations::is_function(name) {
|
|
let func_id = FuncId::from_name(name);
|
|
match &self.compiled_functions[func_id] {
|
|
Some(compiled) => return compiled.ptr,
|
|
None => {
|
|
let decl = self.declarations.get_function_decl(func_id);
|
|
(&decl.name, decl.linkage)
|
|
}
|
|
}
|
|
} else {
|
|
let data_id = DataId::from_name(name);
|
|
match &self.compiled_data_objects[data_id] {
|
|
Some(compiled) => return compiled.ptr,
|
|
None => {
|
|
let decl = self.declarations.get_data_decl(data_id);
|
|
(&decl.name, decl.linkage)
|
|
}
|
|
}
|
|
};
|
|
if let Some(ptr) = self.lookup_symbol(&name) {
|
|
ptr
|
|
} else if linkage == Linkage::Preemptible {
|
|
0 as *const u8
|
|
} else {
|
|
panic!("can't resolve symbol {}", name);
|
|
}
|
|
}
|
|
ir::ExternalName::LibCall(ref libcall) => {
|
|
let sym = (self.libcall_names)(*libcall);
|
|
self.lookup_symbol(&sym)
|
|
.unwrap_or_else(|| panic!("can't resolve libcall {}", sym))
|
|
}
|
|
_ => panic!("invalid ExternalName {}", name),
|
|
}
|
|
}
|
|
|
|
/// Returns the address of a finalized function.
|
|
pub fn get_finalized_function(&self, func_id: FuncId) -> *const u8 {
|
|
let info = &self.compiled_functions[func_id];
|
|
debug_assert!(
|
|
!self.functions_to_finalize.iter().any(|x| *x == func_id),
|
|
"function not yet finalized"
|
|
);
|
|
info.as_ref()
|
|
.expect("function must be compiled before it can be finalized")
|
|
.ptr
|
|
}
|
|
|
|
/// Returns the address and size of a finalized data object.
|
|
pub fn get_finalized_data(&self, data_id: DataId) -> (*const u8, usize) {
|
|
let info = &self.compiled_data_objects[data_id];
|
|
debug_assert!(
|
|
!self.data_objects_to_finalize.iter().any(|x| *x == data_id),
|
|
"data object not yet finalized"
|
|
);
|
|
let compiled = info
|
|
.as_ref()
|
|
.expect("data object must be compiled before it can be finalized");
|
|
|
|
(compiled.ptr, compiled.size)
|
|
}
|
|
|
|
fn record_function_for_perf(&self, ptr: *mut u8, size: usize, name: &str) {
|
|
// The Linux perf tool supports JIT code via a /tmp/perf-$PID.map file,
|
|
// which contains memory regions and their associated names. If we
|
|
// are profiling with perf and saving binaries to PERF_BUILDID_DIR
|
|
// for post-profile analysis, write information about each function
|
|
// we define.
|
|
if cfg!(target_os = "linux") && ::std::env::var_os("PERF_BUILDID_DIR").is_some() {
|
|
let mut map_file = ::std::fs::OpenOptions::new()
|
|
.create(true)
|
|
.append(true)
|
|
.open(format!("/tmp/perf-{}.map", ::std::process::id()))
|
|
.unwrap();
|
|
|
|
let _ = writeln!(map_file, "{:x} {:x} {}", ptr as usize, size, name);
|
|
}
|
|
}
|
|
|
|
/// Finalize all functions and data objects that are defined but not yet finalized.
|
|
/// All symbols referenced in their bodies that are declared as needing a definition
|
|
/// must be defined by this point.
|
|
///
|
|
/// Use `get_finalized_function` and `get_finalized_data` to obtain the final
|
|
/// artifacts.
|
|
pub fn finalize_definitions(&mut self) {
|
|
for func in std::mem::take(&mut self.functions_to_finalize) {
|
|
let decl = self.declarations.get_function_decl(func);
|
|
debug_assert!(decl.linkage.is_definable());
|
|
let func = self.compiled_functions[func]
|
|
.as_ref()
|
|
.expect("function must be compiled before it can be finalized");
|
|
func.perform_relocations(|name| self.get_definition(name));
|
|
}
|
|
for data in std::mem::take(&mut self.data_objects_to_finalize) {
|
|
let decl = self.declarations.get_data_decl(data);
|
|
debug_assert!(decl.linkage.is_definable());
|
|
let data = self.compiled_data_objects[data]
|
|
.as_ref()
|
|
.expect("data object must be compiled before it can be finalized");
|
|
data.perform_relocations(|name| self.get_definition(name));
|
|
}
|
|
|
|
// Now that we're done patching, prepare the memory for execution!
|
|
self.memory.readonly.set_readonly();
|
|
self.memory.code.set_readable_and_executable();
|
|
}
|
|
|
|
/// Create a new `SimpleJITModule`.
|
|
pub fn new(builder: SimpleJITBuilder) -> Self {
|
|
let memory = MemoryHandle {
|
|
code: Memory::new(),
|
|
readonly: Memory::new(),
|
|
writable: Memory::new(),
|
|
};
|
|
|
|
Self {
|
|
isa: builder.isa,
|
|
symbols: builder.symbols,
|
|
libcall_names: builder.libcall_names,
|
|
memory,
|
|
declarations: ModuleDeclarations::default(),
|
|
compiled_functions: SecondaryMap::new(),
|
|
compiled_data_objects: SecondaryMap::new(),
|
|
functions_to_finalize: Vec::new(),
|
|
data_objects_to_finalize: Vec::new(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'simple_jit_backend> Module for SimpleJITModule {
|
|
fn isa(&self) -> &dyn TargetIsa {
|
|
&*self.isa
|
|
}
|
|
|
|
fn declarations(&self) -> &ModuleDeclarations {
|
|
&self.declarations
|
|
}
|
|
|
|
fn declare_function(
|
|
&mut self,
|
|
name: &str,
|
|
linkage: Linkage,
|
|
signature: &ir::Signature,
|
|
) -> ModuleResult<FuncId> {
|
|
let (id, _decl) = self
|
|
.declarations
|
|
.declare_function(name, linkage, signature)?;
|
|
Ok(id)
|
|
}
|
|
|
|
fn declare_data(
|
|
&mut self,
|
|
name: &str,
|
|
linkage: Linkage,
|
|
writable: bool,
|
|
tls: bool,
|
|
) -> ModuleResult<DataId> {
|
|
assert!(!tls, "SimpleJIT doesn't yet support TLS");
|
|
let (id, _decl) = self
|
|
.declarations
|
|
.declare_data(name, linkage, writable, tls)?;
|
|
Ok(id)
|
|
}
|
|
|
|
fn define_function<TS>(
|
|
&mut self,
|
|
id: FuncId,
|
|
ctx: &mut cranelift_codegen::Context,
|
|
trap_sink: &mut TS,
|
|
) -> ModuleResult<ModuleCompiledFunction>
|
|
where
|
|
TS: TrapSink,
|
|
{
|
|
info!("defining function {}: {}", id, ctx.func.display(self.isa()));
|
|
let CodeInfo {
|
|
total_size: code_size,
|
|
..
|
|
} = ctx.compile(self.isa())?;
|
|
|
|
let decl = self.declarations.get_function_decl(id);
|
|
if !decl.linkage.is_definable() {
|
|
return Err(ModuleError::InvalidImportDefinition(decl.name.clone()));
|
|
}
|
|
|
|
if !self.compiled_functions[id].is_none() {
|
|
return Err(ModuleError::DuplicateDefinition(decl.name.to_owned()));
|
|
}
|
|
|
|
let size = code_size as usize;
|
|
let ptr = self
|
|
.memory
|
|
.code
|
|
.allocate(size, EXECUTABLE_DATA_ALIGNMENT)
|
|
.expect("TODO: handle OOM etc.");
|
|
|
|
let mut reloc_sink = SimpleJITRelocSink::default();
|
|
let mut stack_map_sink = binemit::NullStackMapSink {};
|
|
unsafe {
|
|
ctx.emit_to_memory(
|
|
&*self.isa,
|
|
ptr,
|
|
&mut reloc_sink,
|
|
trap_sink,
|
|
&mut stack_map_sink,
|
|
)
|
|
};
|
|
|
|
self.record_function_for_perf(ptr, size, &decl.name);
|
|
self.compiled_functions[id] = Some(CompiledBlob {
|
|
ptr,
|
|
size,
|
|
relocs: reloc_sink.relocs,
|
|
});
|
|
self.functions_to_finalize.push(id);
|
|
|
|
Ok(ModuleCompiledFunction { size: code_size })
|
|
}
|
|
|
|
fn define_function_bytes(
|
|
&mut self,
|
|
id: FuncId,
|
|
bytes: &[u8],
|
|
relocs: &[RelocRecord],
|
|
) -> ModuleResult<ModuleCompiledFunction> {
|
|
info!("defining function {} with bytes", id);
|
|
let total_size: u32 = match bytes.len().try_into() {
|
|
Ok(total_size) => total_size,
|
|
_ => Err(CodegenError::CodeTooLarge)?,
|
|
};
|
|
|
|
let decl = self.declarations.get_function_decl(id);
|
|
if !decl.linkage.is_definable() {
|
|
return Err(ModuleError::InvalidImportDefinition(decl.name.clone()));
|
|
}
|
|
|
|
if !self.compiled_functions[id].is_none() {
|
|
return Err(ModuleError::DuplicateDefinition(decl.name.to_owned()));
|
|
}
|
|
|
|
let size = bytes.len();
|
|
let ptr = self
|
|
.memory
|
|
.code
|
|
.allocate(size, EXECUTABLE_DATA_ALIGNMENT)
|
|
.expect("TODO: handle OOM etc.");
|
|
|
|
unsafe {
|
|
ptr::copy_nonoverlapping(bytes.as_ptr(), ptr, size);
|
|
}
|
|
|
|
self.record_function_for_perf(ptr, size, &decl.name);
|
|
self.compiled_functions[id] = Some(CompiledBlob {
|
|
ptr,
|
|
size,
|
|
relocs: relocs.to_vec(),
|
|
});
|
|
self.functions_to_finalize.push(id);
|
|
|
|
Ok(ModuleCompiledFunction { size: total_size })
|
|
}
|
|
|
|
fn define_data(&mut self, id: DataId, data: &DataContext) -> ModuleResult<()> {
|
|
let decl = self.declarations.get_data_decl(id);
|
|
if !decl.linkage.is_definable() {
|
|
return Err(ModuleError::InvalidImportDefinition(decl.name.clone()));
|
|
}
|
|
|
|
if !self.compiled_data_objects[id].is_none() {
|
|
return Err(ModuleError::DuplicateDefinition(decl.name.to_owned()));
|
|
}
|
|
|
|
assert!(!decl.tls, "SimpleJIT doesn't yet support TLS");
|
|
|
|
let &DataDescription {
|
|
ref init,
|
|
function_decls: _,
|
|
data_decls: _,
|
|
function_relocs: _,
|
|
data_relocs: _,
|
|
custom_segment_section: _,
|
|
align,
|
|
} = data.description();
|
|
|
|
let size = init.size();
|
|
let ptr = if decl.writable {
|
|
self.memory
|
|
.writable
|
|
.allocate(size, align.unwrap_or(WRITABLE_DATA_ALIGNMENT))
|
|
.expect("TODO: handle OOM etc.")
|
|
} else {
|
|
self.memory
|
|
.readonly
|
|
.allocate(size, align.unwrap_or(READONLY_DATA_ALIGNMENT))
|
|
.expect("TODO: handle OOM etc.")
|
|
};
|
|
|
|
match *init {
|
|
Init::Uninitialized => {
|
|
panic!("data is not initialized yet");
|
|
}
|
|
Init::Zeros { .. } => {
|
|
unsafe { ptr::write_bytes(ptr, 0, size) };
|
|
}
|
|
Init::Bytes { ref contents } => {
|
|
let src = contents.as_ptr();
|
|
unsafe { ptr::copy_nonoverlapping(src, ptr, size) };
|
|
}
|
|
}
|
|
|
|
let pointer_reloc = match self.isa.triple().pointer_width().unwrap() {
|
|
PointerWidth::U16 => panic!(),
|
|
PointerWidth::U32 => Reloc::Abs4,
|
|
PointerWidth::U64 => Reloc::Abs8,
|
|
};
|
|
let relocs = data
|
|
.description()
|
|
.all_relocs(pointer_reloc)
|
|
.collect::<Vec<_>>();
|
|
|
|
self.compiled_data_objects[id] = Some(CompiledBlob { ptr, size, relocs });
|
|
self.data_objects_to_finalize.push(id);
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[cfg(not(windows))]
|
|
fn lookup_with_dlsym(name: &str) -> Option<*const u8> {
|
|
let c_str = CString::new(name).unwrap();
|
|
let c_str_ptr = c_str.as_ptr();
|
|
let sym = unsafe { libc::dlsym(libc::RTLD_DEFAULT, c_str_ptr) };
|
|
if sym.is_null() {
|
|
None
|
|
} else {
|
|
Some(sym as *const u8)
|
|
}
|
|
}
|
|
|
|
#[cfg(windows)]
|
|
fn lookup_with_dlsym(name: &str) -> Option<*const u8> {
|
|
const MSVCRT_DLL: &[u8] = b"msvcrt.dll\0";
|
|
|
|
let c_str = CString::new(name).unwrap();
|
|
let c_str_ptr = c_str.as_ptr();
|
|
|
|
unsafe {
|
|
let handles = [
|
|
// try to find the searched symbol in the currently running executable
|
|
ptr::null_mut(),
|
|
// try to find the searched symbol in local c runtime
|
|
winapi::um::libloaderapi::GetModuleHandleA(MSVCRT_DLL.as_ptr() as *const i8),
|
|
];
|
|
|
|
for handle in &handles {
|
|
let addr = winapi::um::libloaderapi::GetProcAddress(*handle, c_str_ptr);
|
|
if addr.is_null() {
|
|
continue;
|
|
}
|
|
return Some(addr as *const u8);
|
|
}
|
|
|
|
None
|
|
}
|
|
}
|
|
|
|
#[derive(Default)]
|
|
struct SimpleJITRelocSink {
|
|
relocs: Vec<RelocRecord>,
|
|
}
|
|
|
|
impl RelocSink for SimpleJITRelocSink {
|
|
fn reloc_external(
|
|
&mut self,
|
|
offset: CodeOffset,
|
|
_srcloc: ir::SourceLoc,
|
|
reloc: Reloc,
|
|
name: &ir::ExternalName,
|
|
addend: Addend,
|
|
) {
|
|
self.relocs.push(RelocRecord {
|
|
offset,
|
|
reloc,
|
|
name: name.clone(),
|
|
addend,
|
|
});
|
|
}
|
|
|
|
fn reloc_jt(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::JumpTable) {
|
|
match reloc {
|
|
Reloc::X86PCRelRodata4 => {
|
|
// Not necessary to record this unless we are going to split apart code and its
|
|
// jumptbl/rodata.
|
|
}
|
|
_ => {
|
|
panic!("Unhandled reloc");
|
|
}
|
|
}
|
|
}
|
|
|
|
fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _constant: ir::ConstantOffset) {
|
|
match reloc {
|
|
Reloc::X86PCRelRodata4 => {
|
|
// Not necessary to record this unless we are going to split apart code and its
|
|
// jumptbl/rodata.
|
|
}
|
|
_ => {
|
|
panic!("Unhandled reloc");
|
|
}
|
|
}
|
|
}
|
|
}
|