//! Support for jitdump files which can be used by perf for profiling jitted code. //! Spec definitions for the output format is as described here: //! https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt //! //! Usage Example: //! Record //! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --jitdump test.wasm //! Combine //! sudo perf inject -v -j -i perf.data -o perf.jit.data //! Report //! sudo perf report -i perf.jit.data -F+period,srcline //! Note: For descriptive results, the WASM file being executed should contain dwarf debug data use crate::ProfilingAgent; use anyhow::Result; use object::{Object, ObjectSection}; use scroll::{IOwrite, SizeWith, NATIVE}; use serde::{Deserialize, Serialize}; use std::fmt::Debug; use std::fs::{File, OpenOptions}; use std::io; use std::io::Write; use std::os::unix::prelude::*; use std::ptr; use std::sync::Mutex; use std::{borrow, mem, process}; use target_lexicon::Architecture; use wasmtime_environ::entity::PrimaryMap; use wasmtime_environ::wasm::DefinedFuncIndex; use wasmtime_environ::Module; use wasmtime_runtime::VMFunctionBody; use object::elf; /// Defines jitdump record types #[repr(u32)] pub enum RecordId { /// Value 0: JIT_CODE_LOAD: record describing a jitted function JitCodeLoad = 0, /// Value 1: JIT_CODE_MOVE: record describing an already jitted function which is moved _JitCodeMove = 1, /// Value 2: JIT_CODE_DEBUG_INFO: record describing the debug information for a jitted function JitCodeDebugInfo = 2, /// Value 3: JIT_CODE_CLOSE: record marking the end of the jit runtime (optional) _JitCodeClose = 3, /// Value 4: JIT_CODE_UNWINDING_INFO: record describing a function unwinding information _JitCodeUnwindingInfo = 4, } /// Each record starts with this fixed size record header which describes the record that follows #[derive(Serialize, Deserialize, Debug, Default, Clone, Copy, IOwrite, SizeWith)] #[repr(C)] pub struct RecordHeader { /// uint32_t id: a value identifying the record type (see below) id: u32, /// uint32_t total_size: the size in bytes of the record including the header. record_size: u32, /// uint64_t timestamp: a timestamp of when the record was created. timestamp: u64, } /// The CodeLoadRecord is used for describing jitted functions #[derive(Serialize, Deserialize, Debug, Default, Clone, Copy, IOwrite, SizeWith)] #[repr(C)] pub struct CodeLoadRecord { /// Fixed sized header that describes this record header: RecordHeader, /// uint32_t pid: OS process id of the runtime generating the jitted code pid: u32, /// uint32_t tid: OS thread identification of the runtime thread generating the jitted code tid: u32, /// uint64_t vma: virtual address of jitted code start virtual_address: u64, /// uint64_t code_addr: code start address for the jitted code. By default vma = code_addr address: u64, /// uint64_t code_size: size in bytes of the generated jitted code size: u64, /// uint64_t code_index: unique identifier for the jitted code (see below) index: u64, } /// Describes source line information for a jitted function #[derive(Serialize, Deserialize, Debug, Default)] #[repr(C)] pub struct DebugEntry { /// uint64_t code_addr: address of function for which the debug information is generated address: u64, /// uint32_t line: source file line number (starting at 1) line: u32, /// uint32_t discrim: column discriminator, 0 is default discriminator: u32, /// char name[n]: source file name in ASCII, including null termination filename: String, } /// Describes debug information for a jitted function. An array of debug entries are /// appended to this record during writting. Note, this record must preceed the code /// load record that describes the same jitted function. #[derive(Serialize, Deserialize, Debug, Default, Clone, Copy, IOwrite, SizeWith)] #[repr(C)] pub struct DebugInfoRecord { /// Fixed sized header that describes this record header: RecordHeader, /// uint64_t code_addr: address of function for which the debug information is generated address: u64, /// uint64_t nr_entry: number of debug entries for the function appended to this record count: u64, } /// Fixed-sized header for each jitdump file #[derive(Serialize, Deserialize, Debug, Default, IOwrite, SizeWith)] #[repr(C)] pub struct FileHeader { /// uint32_t magic: a magic number tagging the file type. The value is 4-byte long and represents the /// string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can /// be used to detect the endianness of the file magic: u32, /// uint32_t version: a 4-byte value representing the format version. It is currently set to 2 version: u32, /// uint32_t total_size: size in bytes of file header size: u32, /// uint32_t elf_mach: ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h) e_machine: u32, /// uint32_t pad1: padding. Reserved for future use pad1: u32, /// uint32_t pid: JIT runtime process identification (OS specific) pid: u32, /// uint64_t timestamp: timestamp of when the file was created timestamp: u64, /// uint64_t flags: a bitmask of flags flags: u64, } /// Interface for driving the creation of jitdump files pub struct JitDumpAgent { // Note that we use a mutex internally to serialize writing out to our // `jitdump_file` within this process, since multiple threads may be sharing // this jit agent. state: Mutex, } struct State { /// File instance for the jit dump file jitdump_file: File, map_addr: usize, /// Unique identifier for jitted code code_index: u64, /// Flag for experimenting with dumping code load record /// after each function (true) or after each module. This /// flag is currently set to true. dump_funcs: bool, } impl JitDumpAgent { /// Intialize a JitDumpAgent and write out the header pub fn new() -> Result { let filename = format!("./jit-{}.dump", process::id()); let jitdump_file = OpenOptions::new() .read(true) .write(true) .create(true) .truncate(true) .open(&filename)?; // After we make our `*.dump` file we execute an `mmap` syscall, // specifically with executable permissions, to map it into our address // space. This is required so `perf inject` will work later. The `perf // inject` command will see that an mmap syscall happened, and it'll see // the filename we mapped, and that'll trigger it to actually read and // parse the file. // // To match what some perf examples are doing we keep this `mmap` alive // until this agent goes away. let map_addr = unsafe { let ptr = libc::mmap( ptr::null_mut(), libc::sysconf(libc::_SC_PAGESIZE) as usize, libc::PROT_EXEC | libc::PROT_READ, libc::MAP_PRIVATE, jitdump_file.as_raw_fd(), 0, ); if ptr == libc::MAP_FAILED { return Err(io::Error::last_os_error().into()); } ptr as usize }; let mut state = State { jitdump_file, map_addr, code_index: 0, dump_funcs: true, }; state.write_file_header()?; Ok(JitDumpAgent { state: Mutex::new(state), }) } } impl ProfilingAgent for JitDumpAgent { fn module_load( &self, module: &Module, functions: &PrimaryMap, dbg_image: Option<&[u8]>, ) { self.state .lock() .unwrap() .module_load(module, functions, dbg_image); } } impl State { /// Returns timestamp from a single source fn get_time_stamp(&self) -> u64 { // We need to use `CLOCK_MONOTONIC` on Linux which is what `Instant` // conveniently also uses, but `Instant` doesn't allow us to get access // to nanoseconds as an internal detail, so we calculate the nanoseconds // ourselves here. unsafe { let mut ts = mem::MaybeUninit::zeroed(); assert_eq!( libc::clock_gettime(libc::CLOCK_MONOTONIC, ts.as_mut_ptr()), 0 ); let ts = ts.assume_init(); // TODO: What does it mean for either sec or nsec to be negative? (ts.tv_sec * 1_000_000_000 + ts.tv_nsec) as u64 } } /// Returns the ELF machine architecture. fn get_e_machine(&self) -> u32 { match target_lexicon::HOST.architecture { Architecture::X86_64 => elf::EM_X86_64 as u32, Architecture::X86_32(_) => elf::EM_386 as u32, Architecture::Arm(_) => elf::EM_ARM as u32, Architecture::Aarch64(_) => elf::EM_AARCH64 as u32, _ => unimplemented!("unrecognized architecture"), } } fn write_file_header(&mut self) -> Result<()> { let header = FileHeader { timestamp: self.get_time_stamp(), e_machine: self.get_e_machine(), magic: 0x4A695444, version: 1, size: mem::size_of::() as u32, pad1: 0, pid: process::id(), flags: 0, }; self.jitdump_file.iowrite_with(header, NATIVE)?; Ok(()) } fn write_code_load_record( &mut self, record_name: &str, cl_record: CodeLoadRecord, code_buffer: &[u8], ) -> Result<()> { self.jitdump_file.iowrite_with(cl_record, NATIVE)?; self.jitdump_file.write_all(record_name.as_bytes())?; self.jitdump_file.write_all(b"\0")?; self.jitdump_file.write_all(code_buffer)?; Ok(()) } /// Write DebugInfoRecord to open jit dump file. /// Must be written before the corresponding CodeLoadRecord. fn write_debug_info_record(&mut self, dir_record: DebugInfoRecord) -> Result<()> { self.jitdump_file.iowrite_with(dir_record, NATIVE)?; Ok(()) } /// Write DebugInfoRecord to open jit dump file. /// Must be written before the corresponding CodeLoadRecord. fn write_debug_info_entries(&mut self, die_entries: Vec) -> Result<()> { for entry in die_entries.iter() { self.jitdump_file.iowrite_with(entry.address, NATIVE)?; self.jitdump_file.iowrite_with(entry.line, NATIVE)?; self.jitdump_file .iowrite_with(entry.discriminator, NATIVE)?; self.jitdump_file.write_all(entry.filename.as_bytes())?; self.jitdump_file.write_all(b"\0")?; } Ok(()) } /// Sent when a method is compiled and loaded into memory by the VM. pub fn module_load( &mut self, module: &Module, functions: &PrimaryMap, dbg_image: Option<&[u8]>, ) -> () { let pid = process::id(); let tid = pid; // ThreadId does appear to track underlying thread. Using PID. for (idx, func) in functions.iter() { let (addr, len) = unsafe { ((**func).as_ptr() as *const u8, (**func).len()) }; if let Some(img) = &dbg_image { if let Err(err) = self.dump_from_debug_image(img, "wasm", addr, len, pid, tid) { println!( "Jitdump: module_load failed dumping from debug image: {:?}\n", err ); } } else { let timestamp = self.get_time_stamp(); let name = super::debug_name(module, idx); self.dump_code_load_record(&name, addr, len, timestamp, pid, tid); } } } fn dump_code_load_record( &mut self, method_name: &str, addr: *const u8, len: usize, timestamp: u64, pid: u32, tid: u32, ) -> () { let name_len = method_name.len() + 1; let size_limit = mem::size_of::(); let rh = RecordHeader { id: RecordId::JitCodeLoad as u32, record_size: size_limit as u32 + name_len as u32 + len as u32, timestamp: timestamp, }; let clr = CodeLoadRecord { header: rh, pid: pid, tid: tid, virtual_address: addr as u64, address: addr as u64, size: len as u64, index: self.code_index, }; self.code_index += 1; unsafe { let code_buffer: &[u8] = std::slice::from_raw_parts(addr, len); if let Err(err) = self.write_code_load_record(method_name, clr, code_buffer) { println!("Jitdump: write_code_load_failed_record failed: {:?}\n", err); } } } /// Attempts to dump debuginfo data structures, adding method and line level /// for the jitted function. pub fn dump_from_debug_image( &mut self, dbg_image: &[u8], module_name: &str, addr: *const u8, len: usize, pid: u32, tid: u32, ) -> Result<()> { let file = object::File::parse(&dbg_image).unwrap(); let endian = if file.is_little_endian() { gimli::RunTimeEndian::Little } else { gimli::RunTimeEndian::Big }; let load_section = |id: gimli::SectionId| -> Result> { if let Some(section) = file.section_by_name(id.name()) { Ok(section.data()?.into()) } else { Ok((&[] as &[u8]).into()) } }; let load_section_sup = |_| Ok(borrow::Cow::Borrowed(&[][..])); let dwarf_cow = gimli::Dwarf::load(&load_section, &load_section_sup)?; let borrow_section: &dyn for<'a> Fn( &'a borrow::Cow<[u8]>, ) -> gimli::EndianSlice<'a, gimli::RunTimeEndian> = &|section| gimli::EndianSlice::new(&*section, endian); let dwarf = dwarf_cow.borrow(&borrow_section); let mut iter = dwarf.units(); while let Some(header) = iter.next()? { let unit = match dwarf.unit(header) { Ok(unit) => unit, Err(_err) => { return Ok(()); } }; self.dump_entries(unit, &dwarf, module_name, addr, len, pid, tid)?; // TODO: Temp exit to avoid duplicate addresses being covered by only // processing the top unit break; } if !self.dump_funcs { let timestamp = self.get_time_stamp(); self.dump_code_load_record(module_name, addr, len, timestamp, pid, tid); } Ok(()) } fn dump_entries( &mut self, unit: gimli::Unit, dwarf: &gimli::Dwarf, module_name: &str, addr: *const u8, len: usize, pid: u32, tid: u32, ) -> Result<()> { let mut depth = 0; let mut entries = unit.entries(); while let Some((delta_depth, entry)) = entries.next_dfs()? { if self.dump_funcs { let record_header = RecordHeader { id: RecordId::JitCodeLoad as u32, record_size: 0, timestamp: 0, }; let mut clr = CodeLoadRecord { header: record_header, pid: pid, tid: tid, virtual_address: 0, address: 0, size: 0, index: 0, }; let mut clr_name: String = String::from(module_name); let mut get_debug_entry = false; depth += delta_depth; assert!(depth >= 0); if entry.tag() == gimli::constants::DW_TAG_subprogram { get_debug_entry = true; let mut attrs = entry.attrs(); while let Some(attr) = attrs.next()? { if let Some(n) = attr.name().static_string() { if n == "DW_AT_low_pc" { clr.address = match attr.value() { gimli::AttributeValue::Addr(address) => address, _ => 0, }; clr.virtual_address = clr.address; } else if n == "DW_AT_high_pc" { clr.size = match attr.value() { gimli::AttributeValue::Udata(data) => data, _ => 0, }; } else if n == "DW_AT_name" { clr_name = match attr.value() { gimli::AttributeValue::DebugStrRef(offset) => { if let Ok(s) = dwarf.debug_str.get_str(offset) { clr_name.push_str("::"); clr_name.push_str(&s.to_string_lossy()?); clr_name } else { clr_name.push_str("::"); clr_name.push_str("?"); clr_name } } _ => { clr_name.push_str("??"); clr_name } }; } } } } if get_debug_entry { // TODO: Temp check to make sure well only formed data is processed. if clr.address == 0 { continue; } // TODO: Temp check to make sure well only formed data is processed. if clr_name == "?" { continue; } if clr.address == 0 || clr.size == 0 { clr.address = addr as u64; clr.virtual_address = addr as u64; clr.size = len as u64; } clr.header.record_size = mem::size_of::() as u32 + (clr_name.len() + 1) as u32 + clr.size as u32; clr.index = self.code_index; self.code_index += 1; self.dump_debug_info(&unit, &dwarf, clr.address, clr.size, None)?; clr.header.timestamp = self.get_time_stamp(); unsafe { let code_buffer: &[u8] = std::slice::from_raw_parts(clr.address as *const u8, clr.size as usize); let _ = self.write_code_load_record(&clr_name, clr, code_buffer); } } } else { let mut func_name: String = String::from("?"); let mut func_addr = 0; let mut func_size = 0; let mut get_debug_entry = false; depth += delta_depth; assert!(depth >= 0); if entry.tag() == gimli::constants::DW_TAG_subprogram { get_debug_entry = true; let mut attrs = entry.attrs(); while let Some(attr) = attrs.next()? { if let Some(n) = attr.name().static_string() { if n == "DW_AT_low_pc" { func_addr = match attr.value() { gimli::AttributeValue::Addr(address) => address, _ => 0, }; } else if n == "DW_AT_high_pc" { func_size = match attr.value() { gimli::AttributeValue::Udata(data) => data, _ => 0, }; } else if n == "DW_AT_name" { func_name = match attr.value() { gimli::AttributeValue::DebugStrRef(offset) => { if let Ok(s) = dwarf.debug_str.get_str(offset) { func_name.clear(); func_name.push_str(&s.to_string_lossy()?); func_name } else { func_name.push_str("?"); func_name } } _ => { func_name.push_str("??"); func_name } }; } } } } if get_debug_entry { // TODO: Temp check to make sure well only formed data is processed. if func_addr == 0 { continue; } // TODO: Temp check to make sure well only formed data is processed. if func_name == "?" { continue; } self.dump_debug_info( &unit, &dwarf, func_addr, func_size, Some(func_name.as_str()), )?; } } } Ok(()) } fn dump_debug_info( &mut self, unit: &gimli::Unit, dwarf: &gimli::Dwarf, address: u64, size: u64, file_suffix: Option<&str>, ) -> Result<()> { let timestamp = self.get_time_stamp(); if let Some(program) = unit.line_program.clone() { let mut debug_info_record = DebugInfoRecord { header: RecordHeader { id: RecordId::JitCodeDebugInfo as u32, record_size: 0, timestamp: timestamp, }, address: address, count: 0, }; let mut debug_entries = Vec::new(); let mut debug_entries_total_filenames_len = 0; let mut rows = program.rows(); while let Some((header, row)) = rows.next_row()? { let row_file_index = row.file_index() - 1; let myfile = dwarf .attr_string( &unit, header.file_names()[row_file_index as usize].path_name(), ) .unwrap(); let filename = myfile.to_string_lossy()?; let line = row.line().unwrap_or(0); let column = match row.column() { gimli::ColumnType::Column(column) => column, gimli::ColumnType::LeftEdge => 0, }; if (row.address() < address) || (row.address() > (address + size)) { continue; } let mut debug_entry = DebugEntry { address: row.address(), line: line as u32, discriminator: column as u32, filename: filename.to_string(), }; if let Some(suffix) = file_suffix { debug_entry.filename.push_str("::"); debug_entry.filename.push_str(suffix); } debug_entries_total_filenames_len += debug_entry.filename.len() + 1; debug_entries.push(debug_entry); } debug_info_record.count = debug_entries.len() as u64; let debug_entries_size = (debug_info_record.count * (mem::size_of::() as u64 - mem::size_of::() as u64)) + debug_entries_total_filenames_len as u64; debug_info_record.header.record_size = mem::size_of::() as u32 + debug_entries_size as u32; let _ = self.write_debug_info_record(debug_info_record); let _ = self.write_debug_info_entries(debug_entries); } Ok(()) } } impl Drop for State { fn drop(&mut self) { unsafe { libc::munmap( self.map_addr as *mut _, libc::sysconf(libc::_SC_PAGESIZE) as usize, ); } } } trait Reader: gimli::Reader + Send + Sync {} impl<'input, Endian> Reader for gimli::EndianSlice<'input, Endian> where Endian: gimli::Endianity + Send + Sync { }