//! Defines `SimpleJITModule`. use crate::{compiled_blob::CompiledBlob, memory::Memory}; use cranelift_codegen::isa::TargetIsa; use cranelift_codegen::settings::Configurable; use cranelift_codegen::{self, ir, settings}; use cranelift_codegen::{ binemit::{self, Addend, CodeInfo, CodeOffset, Reloc, RelocSink, TrapSink}, CodegenError, }; use cranelift_entity::SecondaryMap; use cranelift_module::{ DataContext, DataDescription, DataId, FuncId, Init, Linkage, Module, ModuleCompiledFunction, ModuleDeclarations, ModuleError, ModuleResult, RelocRecord, }; use cranelift_native; #[cfg(not(windows))] use libc; use log::info; use std::collections::HashMap; use std::convert::TryInto; use std::ffi::CString; use std::io::Write; use std::ptr; use target_lexicon::PointerWidth; #[cfg(windows)] use winapi; const EXECUTABLE_DATA_ALIGNMENT: u64 = 0x10; const WRITABLE_DATA_ALIGNMENT: u64 = 0x8; const READONLY_DATA_ALIGNMENT: u64 = 0x1; /// A builder for `SimpleJITModule`. pub struct SimpleJITBuilder { isa: Box, symbols: HashMap, libcall_names: Box String + Send + Sync>, } impl SimpleJITBuilder { /// Create a new `SimpleJITBuilder`. /// /// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall` /// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain /// floating point instructions, and for stack probes. If you don't know what to use for this /// argument, use `cranelift_module::default_libcall_names()`. pub fn new(libcall_names: Box String + Send + Sync>) -> Self { let mut flag_builder = settings::builder(); // On at least AArch64, "colocated" calls use shorter-range relocations, // which might not reach all definitions; we can't handle that here, so // we require long-range relocation types. flag_builder.set("use_colocated_libcalls", "false").unwrap(); let isa_builder = cranelift_native::builder().unwrap_or_else(|msg| { panic!("host machine is not supported: {}", msg); }); let isa = isa_builder.finish(settings::Flags::new(flag_builder)); Self::with_isa(isa, libcall_names) } /// Create a new `SimpleJITBuilder` with an arbitrary target. This is mainly /// useful for testing. /// /// SimpleJIT requires a `TargetIsa` configured for non-PIC. /// /// To create a `SimpleJITBuilder` for native use, use the `new` constructor /// instead. /// /// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall` /// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain /// floating point instructions, and for stack probes. If you don't know what to use for this /// argument, use `cranelift_module::default_libcall_names()`. pub fn with_isa( isa: Box, libcall_names: Box String + Send + Sync>, ) -> Self { debug_assert!(!isa.flags().is_pic(), "SimpleJIT requires non-PIC code"); let symbols = HashMap::new(); Self { isa, symbols, libcall_names, } } /// Define a symbol in the internal symbol table. /// /// The JIT will use the symbol table to resolve names that are declared, /// but not defined, in the module being compiled. A common example is /// external functions. With this method, functions and data can be exposed /// to the code being compiled which are defined by the host. /// /// If a symbol is defined more than once, the most recent definition will /// be retained. /// /// If the JIT fails to find a symbol in its internal table, it will fall /// back to a platform-specific search (this typically involves searching /// the current process for public symbols, followed by searching the /// platform's C runtime). pub fn symbol(&mut self, name: K, ptr: *const u8) -> &Self where K: Into, { self.symbols.insert(name.into(), ptr); self } /// Define multiple symbols in the internal symbol table. /// /// Using this is equivalent to calling `symbol` on each element. pub fn symbols(&mut self, symbols: It) -> &Self where It: IntoIterator, K: Into, { for (name, ptr) in symbols { self.symbols.insert(name.into(), ptr); } self } } /// A `SimpleJITModule` implements `Module` and emits code and data into memory where it can be /// directly called and accessed. /// /// See the `SimpleJITBuilder` for a convenient way to construct `SimpleJITModule` instances. pub struct SimpleJITModule { isa: Box, symbols: HashMap, libcall_names: Box String>, memory: MemoryHandle, declarations: ModuleDeclarations, functions: SecondaryMap>, data_objects: SecondaryMap>, functions_to_finalize: Vec, data_objects_to_finalize: Vec, } /// A handle to allow freeing memory allocated by the `Module`. struct MemoryHandle { code: Memory, readonly: Memory, writable: Memory, } impl SimpleJITModule { /// Free memory allocated for code and data segments of compiled functions. /// /// # Safety /// /// Because this function invalidates any pointers retrived from the /// corresponding module, it should only be used when none of the functions /// from that module are currently executing and none of the `fn` pointers /// are called afterwards. pub unsafe fn free_memory(&mut self) { self.memory.code.free_memory(); self.memory.readonly.free_memory(); self.memory.writable.free_memory(); } fn lookup_symbol(&self, name: &str) -> Option<*const u8> { self.symbols .get(name) .copied() .or_else(|| lookup_with_dlsym(name)) } fn get_definition(&self, name: &ir::ExternalName) -> *const u8 { match *name { ir::ExternalName::User { .. } => { let (name, linkage) = if self.declarations.is_function(name) { let func_id = self.declarations.get_function_id(name); match &self.functions[func_id] { Some(compiled) => return compiled.ptr, None => { let decl = self.declarations.get_function_decl(func_id); (&decl.name, decl.linkage) } } } else { let data_id = self.declarations.get_data_id(name); match &self.data_objects[data_id] { Some(compiled) => return compiled.ptr, None => { let decl = self.declarations.get_data_decl(data_id); (&decl.name, decl.linkage) } } }; if let Some(ptr) = self.lookup_symbol(&name) { ptr } else if linkage == Linkage::Preemptible { 0 as *const u8 } else { panic!("can't resolve symbol {}", name); } } ir::ExternalName::LibCall(ref libcall) => { let sym = (self.libcall_names)(*libcall); self.lookup_symbol(&sym) .unwrap_or_else(|| panic!("can't resolve libcall {}", sym)) } _ => panic!("invalid ExternalName {}", name), } } /// Returns the address of a finalized function. pub fn get_finalized_function(&self, func_id: FuncId) -> *const u8 { let info = &self.functions[func_id]; debug_assert!( !self.functions_to_finalize.iter().any(|x| *x == func_id), "function not yet finalized" ); info.as_ref() .expect("function must be compiled before it can be finalized") .ptr } /// Returns the address and size of a finalized data object. pub fn get_finalized_data(&self, data_id: DataId) -> (*const u8, usize) { let info = &self.data_objects[data_id]; debug_assert!( !self.data_objects_to_finalize.iter().any(|x| *x == data_id), "data object not yet finalized" ); let compiled = info .as_ref() .expect("data object must be compiled before it can be finalized"); (compiled.ptr, compiled.size) } fn record_function_for_perf(&self, ptr: *mut u8, size: usize, name: &str) { // The Linux perf tool supports JIT code via a /tmp/perf-$PID.map file, // which contains memory regions and their associated names. If we // are profiling with perf and saving binaries to PERF_BUILDID_DIR // for post-profile analysis, write information about each function // we define. if cfg!(target_os = "linux") && ::std::env::var_os("PERF_BUILDID_DIR").is_some() { let mut map_file = ::std::fs::OpenOptions::new() .create(true) .append(true) .open(format!("/tmp/perf-{}.map", ::std::process::id())) .unwrap(); let _ = writeln!(map_file, "{:x} {:x} {}", ptr as usize, size, name); } } /// Finalize all functions and data objects that are defined but not yet finalized. /// All symbols referenced in their bodies that are declared as needing a definition /// must be defined by this point. /// /// Use `get_finalized_function` and `get_finalized_data` to obtain the final /// artifacts. pub fn finalize_definitions(&mut self) { for func in std::mem::take(&mut self.functions_to_finalize) { let decl = self.declarations.get_function_decl(func); debug_assert!(decl.linkage.is_definable()); let func = self.functions[func] .as_ref() .expect("function must be compiled before it can be finalized"); func.perform_relocations(|name| self.get_definition(name)); } for data in std::mem::take(&mut self.data_objects_to_finalize) { let decl = self.declarations.get_data_decl(data); debug_assert!(decl.linkage.is_definable()); let data = self.data_objects[data] .as_ref() .expect("data object must be compiled before it can be finalized"); data.perform_relocations(|name| self.get_definition(name)); } // Now that we're done patching, prepare the memory for execution! self.memory.readonly.set_readonly(); self.memory.code.set_readable_and_executable(); } /// Create a new `SimpleJITModule`. pub fn new(builder: SimpleJITBuilder) -> Self { let memory = MemoryHandle { code: Memory::new(), readonly: Memory::new(), writable: Memory::new(), }; Self { isa: builder.isa, symbols: builder.symbols, libcall_names: builder.libcall_names, memory, declarations: ModuleDeclarations::default(), functions: SecondaryMap::new(), data_objects: SecondaryMap::new(), functions_to_finalize: Vec::new(), data_objects_to_finalize: Vec::new(), } } } impl<'simple_jit_backend> Module for SimpleJITModule { fn isa(&self) -> &dyn TargetIsa { &*self.isa } fn declarations(&self) -> &ModuleDeclarations { &self.declarations } fn declare_function( &mut self, name: &str, linkage: Linkage, signature: &ir::Signature, ) -> ModuleResult { let (id, _decl) = self .declarations .declare_function(name, linkage, signature)?; Ok(id) } fn declare_data( &mut self, name: &str, linkage: Linkage, writable: bool, tls: bool, ) -> ModuleResult { assert!(!tls, "SimpleJIT doesn't yet support TLS"); let (id, _decl) = self .declarations .declare_data(name, linkage, writable, tls)?; Ok(id) } fn define_function( &mut self, id: FuncId, ctx: &mut cranelift_codegen::Context, trap_sink: &mut TS, ) -> ModuleResult where TS: TrapSink, { info!("defining function {}: {}", id, ctx.func.display(self.isa())); let CodeInfo { total_size: code_size, .. } = ctx.compile(self.isa())?; let decl = self.declarations.get_function_decl(id); if !decl.linkage.is_definable() { return Err(ModuleError::InvalidImportDefinition(decl.name.clone())); } if !self.functions[id].is_none() { return Err(ModuleError::DuplicateDefinition(decl.name.to_owned())); } let size = code_size as usize; let ptr = self .memory .code .allocate(size, EXECUTABLE_DATA_ALIGNMENT) .expect("TODO: handle OOM etc."); let mut reloc_sink = SimpleJITRelocSink::default(); let mut stack_map_sink = binemit::NullStackMapSink {}; unsafe { ctx.emit_to_memory( &*self.isa, ptr, &mut reloc_sink, trap_sink, &mut stack_map_sink, ) }; self.record_function_for_perf(ptr, size, &decl.name); self.functions[id] = Some(CompiledBlob { ptr, size, relocs: reloc_sink.relocs, }); self.functions_to_finalize.push(id); Ok(ModuleCompiledFunction { size: code_size }) } fn define_function_bytes( &mut self, id: FuncId, bytes: &[u8], relocs: &[RelocRecord], ) -> ModuleResult { info!("defining function {} with bytes", id); let total_size: u32 = match bytes.len().try_into() { Ok(total_size) => total_size, _ => Err(CodegenError::CodeTooLarge)?, }; let decl = self.declarations.get_function_decl(id); if !decl.linkage.is_definable() { return Err(ModuleError::InvalidImportDefinition(decl.name.clone())); } if !self.functions[id].is_none() { return Err(ModuleError::DuplicateDefinition(decl.name.to_owned())); } let size = bytes.len(); let ptr = self .memory .code .allocate(size, EXECUTABLE_DATA_ALIGNMENT) .expect("TODO: handle OOM etc."); unsafe { ptr::copy_nonoverlapping(bytes.as_ptr(), ptr, size); } self.record_function_for_perf(ptr, size, &decl.name); self.functions[id] = Some(CompiledBlob { ptr, size, relocs: relocs.to_vec(), }); self.functions_to_finalize.push(id); Ok(ModuleCompiledFunction { size: total_size }) } fn define_data(&mut self, id: DataId, data: &DataContext) -> ModuleResult<()> { let decl = self.declarations.get_data_decl(id); if !decl.linkage.is_definable() { return Err(ModuleError::InvalidImportDefinition(decl.name.clone())); } if !self.data_objects[id].is_none() { return Err(ModuleError::DuplicateDefinition(decl.name.to_owned())); } assert!(!decl.tls, "SimpleJIT doesn't yet support TLS"); self.data_objects_to_finalize.push(id); let &DataDescription { ref init, ref function_decls, ref data_decls, ref function_relocs, ref data_relocs, custom_segment_section: _, align, } = data.description(); let size = init.size(); let ptr = if decl.writable { self.memory .writable .allocate(size, align.unwrap_or(WRITABLE_DATA_ALIGNMENT)) .expect("TODO: handle OOM etc.") } else { self.memory .readonly .allocate(size, align.unwrap_or(READONLY_DATA_ALIGNMENT)) .expect("TODO: handle OOM etc.") }; match *init { Init::Uninitialized => { panic!("data is not initialized yet"); } Init::Zeros { .. } => { unsafe { ptr::write_bytes(ptr, 0, size) }; } Init::Bytes { ref contents } => { let src = contents.as_ptr(); unsafe { ptr::copy_nonoverlapping(src, ptr, size) }; } } let reloc = match self.isa.triple().pointer_width().unwrap() { PointerWidth::U16 => panic!(), PointerWidth::U32 => Reloc::Abs4, PointerWidth::U64 => Reloc::Abs8, }; let mut relocs = Vec::new(); for &(offset, id) in function_relocs { relocs.push(RelocRecord { reloc, offset, name: function_decls[id].clone(), addend: 0, }); } for &(offset, id, addend) in data_relocs { relocs.push(RelocRecord { reloc, offset, name: data_decls[id].clone(), addend, }); } self.data_objects[id] = Some(CompiledBlob { ptr, size, relocs }); Ok(()) } } #[cfg(not(windows))] fn lookup_with_dlsym(name: &str) -> Option<*const u8> { let c_str = CString::new(name).unwrap(); let c_str_ptr = c_str.as_ptr(); let sym = unsafe { libc::dlsym(libc::RTLD_DEFAULT, c_str_ptr) }; if sym.is_null() { None } else { Some(sym as *const u8) } } #[cfg(windows)] fn lookup_with_dlsym(name: &str) -> Option<*const u8> { const MSVCRT_DLL: &[u8] = b"msvcrt.dll\0"; let c_str = CString::new(name).unwrap(); let c_str_ptr = c_str.as_ptr(); unsafe { let handles = [ // try to find the searched symbol in the currently running executable ptr::null_mut(), // try to find the searched symbol in local c runtime winapi::um::libloaderapi::GetModuleHandleA(MSVCRT_DLL.as_ptr() as *const i8), ]; for handle in &handles { let addr = winapi::um::libloaderapi::GetProcAddress(*handle, c_str_ptr); if addr.is_null() { continue; } return Some(addr as *const u8); } None } } #[derive(Default)] struct SimpleJITRelocSink { relocs: Vec, } impl RelocSink for SimpleJITRelocSink { fn reloc_block(&mut self, _offset: CodeOffset, _reloc: Reloc, _block_offset: CodeOffset) { unimplemented!(); } fn reloc_external( &mut self, offset: CodeOffset, _srcloc: ir::SourceLoc, reloc: Reloc, name: &ir::ExternalName, addend: Addend, ) { self.relocs.push(RelocRecord { offset, reloc, name: name.clone(), addend, }); } fn reloc_jt(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::JumpTable) { match reloc { Reloc::X86PCRelRodata4 => { // Not necessary to record this unless we are going to split apart code and its // jumptbl/rodata. } _ => { panic!("Unhandled reloc"); } } } fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _constant: ir::ConstantOffset) { match reloc { Reloc::X86PCRelRodata4 => { // Not necessary to record this unless we are going to split apart code and its // jumptbl/rodata. } _ => { panic!("Unhandled reloc"); } } } }