Files
wasmtime/crates/profiling/src/jitdump_linux.rs
Ulrich Weigand fe10cc9d52 Fix jitdump header magic field on big-endian platforms (#2511)
The jitdump header contains a "magic" field that is defined to hold
the value 0x4A695444 as u32 in native endianness.  (This allows
consumers of the file to detect the endianness of the platform
where the file was written, and apply it when reading other fields.)

However, current code always writes 0x4A695444 in little-endian
byte order, even on big-endian system.  This makes consumers fail
when attempting to read files written on big-endian platforms.

Fixed by always writing the magic in native endianness.
2020-12-15 08:44:43 -06:00

676 lines
25 KiB
Rust

//! Support for jitdump files which can be used by perf for profiling jitted code.
//! Spec definitions for the output format is as described here:
//! https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
//!
//! Usage Example:
//! Record
//! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --jitdump test.wasm
//! Combine
//! sudo perf inject -v -j -i perf.data -o perf.jit.data
//! Report
//! sudo perf report -i perf.jit.data -F+period,srcline
//! Note: For descriptive results, the WASM file being executed should contain dwarf debug data
use crate::ProfilingAgent;
use anyhow::Result;
use object::{Object, ObjectSection};
use scroll::{IOwrite, SizeWith, NATIVE};
use serde::{Deserialize, Serialize};
use std::fmt::Debug;
use std::fs::{File, OpenOptions};
use std::io;
use std::io::Write;
use std::os::unix::prelude::*;
use std::ptr;
use std::sync::Mutex;
use std::{borrow, mem, process};
use target_lexicon::Architecture;
use wasmtime_environ::entity::PrimaryMap;
use wasmtime_environ::wasm::DefinedFuncIndex;
use wasmtime_environ::Module;
use wasmtime_runtime::VMFunctionBody;
use object::elf;
/// Defines jitdump record types
#[repr(u32)]
pub enum RecordId {
/// Value 0: JIT_CODE_LOAD: record describing a jitted function
JitCodeLoad = 0,
/// Value 1: JIT_CODE_MOVE: record describing an already jitted function which is moved
_JitCodeMove = 1,
/// Value 2: JIT_CODE_DEBUG_INFO: record describing the debug information for a jitted function
JitCodeDebugInfo = 2,
/// Value 3: JIT_CODE_CLOSE: record marking the end of the jit runtime (optional)
_JitCodeClose = 3,
/// Value 4: JIT_CODE_UNWINDING_INFO: record describing a function unwinding information
_JitCodeUnwindingInfo = 4,
}
/// Each record starts with this fixed size record header which describes the record that follows
#[derive(Serialize, Deserialize, Debug, Default, Clone, Copy, IOwrite, SizeWith)]
#[repr(C)]
pub struct RecordHeader {
/// uint32_t id: a value identifying the record type (see below)
id: u32,
/// uint32_t total_size: the size in bytes of the record including the header.
record_size: u32,
/// uint64_t timestamp: a timestamp of when the record was created.
timestamp: u64,
}
/// The CodeLoadRecord is used for describing jitted functions
#[derive(Serialize, Deserialize, Debug, Default, Clone, Copy, IOwrite, SizeWith)]
#[repr(C)]
pub struct CodeLoadRecord {
/// Fixed sized header that describes this record
header: RecordHeader,
/// uint32_t pid: OS process id of the runtime generating the jitted code
pid: u32,
/// uint32_t tid: OS thread identification of the runtime thread generating the jitted code
tid: u32,
/// uint64_t vma: virtual address of jitted code start
virtual_address: u64,
/// uint64_t code_addr: code start address for the jitted code. By default vma = code_addr
address: u64,
/// uint64_t code_size: size in bytes of the generated jitted code
size: u64,
/// uint64_t code_index: unique identifier for the jitted code (see below)
index: u64,
}
/// Describes source line information for a jitted function
#[derive(Serialize, Deserialize, Debug, Default)]
#[repr(C)]
pub struct DebugEntry {
/// uint64_t code_addr: address of function for which the debug information is generated
address: u64,
/// uint32_t line: source file line number (starting at 1)
line: u32,
/// uint32_t discrim: column discriminator, 0 is default
discriminator: u32,
/// char name[n]: source file name in ASCII, including null termination
filename: String,
}
/// Describes debug information for a jitted function. An array of debug entries are
/// appended to this record during writting. Note, this record must preceed the code
/// load record that describes the same jitted function.
#[derive(Serialize, Deserialize, Debug, Default, Clone, Copy, IOwrite, SizeWith)]
#[repr(C)]
pub struct DebugInfoRecord {
/// Fixed sized header that describes this record
header: RecordHeader,
/// uint64_t code_addr: address of function for which the debug information is generated
address: u64,
/// uint64_t nr_entry: number of debug entries for the function appended to this record
count: u64,
}
/// Fixed-sized header for each jitdump file
#[derive(Serialize, Deserialize, Debug, Default, IOwrite, SizeWith)]
#[repr(C)]
pub struct FileHeader {
/// uint32_t magic: a magic number tagging the file type. The value is 4-byte long and represents the
/// string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can
/// be used to detect the endianness of the file
magic: u32,
/// uint32_t version: a 4-byte value representing the format version. It is currently set to 2
version: u32,
/// uint32_t total_size: size in bytes of file header
size: u32,
/// uint32_t elf_mach: ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h)
e_machine: u32,
/// uint32_t pad1: padding. Reserved for future use
pad1: u32,
/// uint32_t pid: JIT runtime process identification (OS specific)
pid: u32,
/// uint64_t timestamp: timestamp of when the file was created
timestamp: u64,
/// uint64_t flags: a bitmask of flags
flags: u64,
}
/// Interface for driving the creation of jitdump files
pub struct JitDumpAgent {
// Note that we use a mutex internally to serialize writing out to our
// `jitdump_file` within this process, since multiple threads may be sharing
// this jit agent.
state: Mutex<State>,
}
struct State {
/// File instance for the jit dump file
jitdump_file: File,
map_addr: usize,
/// Unique identifier for jitted code
code_index: u64,
/// Flag for experimenting with dumping code load record
/// after each function (true) or after each module. This
/// flag is currently set to true.
dump_funcs: bool,
}
impl JitDumpAgent {
/// Intialize a JitDumpAgent and write out the header
pub fn new() -> Result<Self> {
let filename = format!("./jit-{}.dump", process::id());
let jitdump_file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(&filename)?;
// After we make our `*.dump` file we execute an `mmap` syscall,
// specifically with executable permissions, to map it into our address
// space. This is required so `perf inject` will work later. The `perf
// inject` command will see that an mmap syscall happened, and it'll see
// the filename we mapped, and that'll trigger it to actually read and
// parse the file.
//
// To match what some perf examples are doing we keep this `mmap` alive
// until this agent goes away.
let map_addr = unsafe {
let ptr = libc::mmap(
ptr::null_mut(),
libc::sysconf(libc::_SC_PAGESIZE) as usize,
libc::PROT_EXEC | libc::PROT_READ,
libc::MAP_PRIVATE,
jitdump_file.as_raw_fd(),
0,
);
if ptr == libc::MAP_FAILED {
return Err(io::Error::last_os_error().into());
}
ptr as usize
};
let mut state = State {
jitdump_file,
map_addr,
code_index: 0,
dump_funcs: true,
};
state.write_file_header()?;
Ok(JitDumpAgent {
state: Mutex::new(state),
})
}
}
impl ProfilingAgent for JitDumpAgent {
fn module_load(
&self,
module: &Module,
functions: &PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
dbg_image: Option<&[u8]>,
) {
self.state
.lock()
.unwrap()
.module_load(module, functions, dbg_image);
}
}
impl State {
/// Returns timestamp from a single source
fn get_time_stamp(&self) -> u64 {
// We need to use `CLOCK_MONOTONIC` on Linux which is what `Instant`
// conveniently also uses, but `Instant` doesn't allow us to get access
// to nanoseconds as an internal detail, so we calculate the nanoseconds
// ourselves here.
unsafe {
let mut ts = mem::MaybeUninit::zeroed();
assert_eq!(
libc::clock_gettime(libc::CLOCK_MONOTONIC, ts.as_mut_ptr()),
0
);
let ts = ts.assume_init();
// TODO: What does it mean for either sec or nsec to be negative?
(ts.tv_sec * 1_000_000_000 + ts.tv_nsec) as u64
}
}
/// Returns the ELF machine architecture.
fn get_e_machine(&self) -> u32 {
match target_lexicon::HOST.architecture {
Architecture::X86_64 => elf::EM_X86_64 as u32,
Architecture::X86_32(_) => elf::EM_386 as u32,
Architecture::Arm(_) => elf::EM_ARM as u32,
Architecture::Aarch64(_) => elf::EM_AARCH64 as u32,
_ => unimplemented!("unrecognized architecture"),
}
}
fn write_file_header(&mut self) -> Result<()> {
let header = FileHeader {
timestamp: self.get_time_stamp(),
e_machine: self.get_e_machine(),
magic: 0x4A695444,
version: 1,
size: mem::size_of::<FileHeader>() as u32,
pad1: 0,
pid: process::id(),
flags: 0,
};
self.jitdump_file.iowrite_with(header, NATIVE)?;
Ok(())
}
fn write_code_load_record(
&mut self,
record_name: &str,
cl_record: CodeLoadRecord,
code_buffer: &[u8],
) -> Result<()> {
self.jitdump_file.iowrite_with(cl_record, NATIVE)?;
self.jitdump_file.write_all(record_name.as_bytes())?;
self.jitdump_file.write_all(b"\0")?;
self.jitdump_file.write_all(code_buffer)?;
Ok(())
}
/// Write DebugInfoRecord to open jit dump file.
/// Must be written before the corresponding CodeLoadRecord.
fn write_debug_info_record(&mut self, dir_record: DebugInfoRecord) -> Result<()> {
self.jitdump_file.iowrite_with(dir_record, NATIVE)?;
Ok(())
}
/// Write DebugInfoRecord to open jit dump file.
/// Must be written before the corresponding CodeLoadRecord.
fn write_debug_info_entries(&mut self, die_entries: Vec<DebugEntry>) -> Result<()> {
for entry in die_entries.iter() {
self.jitdump_file.iowrite_with(entry.address, NATIVE)?;
self.jitdump_file.iowrite_with(entry.line, NATIVE)?;
self.jitdump_file
.iowrite_with(entry.discriminator, NATIVE)?;
self.jitdump_file.write_all(entry.filename.as_bytes())?;
self.jitdump_file.write_all(b"\0")?;
}
Ok(())
}
/// Sent when a method is compiled and loaded into memory by the VM.
pub fn module_load(
&mut self,
module: &Module,
functions: &PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
dbg_image: Option<&[u8]>,
) -> () {
let pid = process::id();
let tid = pid; // ThreadId does appear to track underlying thread. Using PID.
for (idx, func) in functions.iter() {
let (addr, len) = unsafe { ((**func).as_ptr() as *const u8, (**func).len()) };
if let Some(img) = &dbg_image {
if let Err(err) = self.dump_from_debug_image(img, "wasm", addr, len, pid, tid) {
println!(
"Jitdump: module_load failed dumping from debug image: {:?}\n",
err
);
}
} else {
let timestamp = self.get_time_stamp();
let name = super::debug_name(module, idx);
self.dump_code_load_record(&name, addr, len, timestamp, pid, tid);
}
}
}
fn dump_code_load_record(
&mut self,
method_name: &str,
addr: *const u8,
len: usize,
timestamp: u64,
pid: u32,
tid: u32,
) -> () {
let name_len = method_name.len() + 1;
let size_limit = mem::size_of::<CodeLoadRecord>();
let rh = RecordHeader {
id: RecordId::JitCodeLoad as u32,
record_size: size_limit as u32 + name_len as u32 + len as u32,
timestamp: timestamp,
};
let clr = CodeLoadRecord {
header: rh,
pid: pid,
tid: tid,
virtual_address: addr as u64,
address: addr as u64,
size: len as u64,
index: self.code_index,
};
self.code_index += 1;
unsafe {
let code_buffer: &[u8] = std::slice::from_raw_parts(addr, len);
if let Err(err) = self.write_code_load_record(method_name, clr, code_buffer) {
println!("Jitdump: write_code_load_failed_record failed: {:?}\n", err);
}
}
}
/// Attempts to dump debuginfo data structures, adding method and line level
/// for the jitted function.
pub fn dump_from_debug_image(
&mut self,
dbg_image: &[u8],
module_name: &str,
addr: *const u8,
len: usize,
pid: u32,
tid: u32,
) -> Result<()> {
let file = object::File::parse(&dbg_image).unwrap();
let endian = if file.is_little_endian() {
gimli::RunTimeEndian::Little
} else {
gimli::RunTimeEndian::Big
};
let load_section = |id: gimli::SectionId| -> Result<borrow::Cow<[u8]>> {
if let Some(section) = file.section_by_name(id.name()) {
Ok(section.data()?.into())
} else {
Ok((&[] as &[u8]).into())
}
};
let load_section_sup = |_| Ok(borrow::Cow::Borrowed(&[][..]));
let dwarf_cow = gimli::Dwarf::load(&load_section, &load_section_sup)?;
let borrow_section: &dyn for<'a> Fn(
&'a borrow::Cow<[u8]>,
)
-> gimli::EndianSlice<'a, gimli::RunTimeEndian> =
&|section| gimli::EndianSlice::new(&*section, endian);
let dwarf = dwarf_cow.borrow(&borrow_section);
let mut iter = dwarf.units();
while let Some(header) = iter.next()? {
let unit = match dwarf.unit(header) {
Ok(unit) => unit,
Err(_err) => {
return Ok(());
}
};
self.dump_entries(unit, &dwarf, module_name, addr, len, pid, tid)?;
// TODO: Temp exit to avoid duplicate addresses being covered by only
// processing the top unit
break;
}
if !self.dump_funcs {
let timestamp = self.get_time_stamp();
self.dump_code_load_record(module_name, addr, len, timestamp, pid, tid);
}
Ok(())
}
fn dump_entries<R: Reader>(
&mut self,
unit: gimli::Unit<R>,
dwarf: &gimli::Dwarf<R>,
module_name: &str,
addr: *const u8,
len: usize,
pid: u32,
tid: u32,
) -> Result<()> {
let mut depth = 0;
let mut entries = unit.entries();
while let Some((delta_depth, entry)) = entries.next_dfs()? {
if self.dump_funcs {
let record_header = RecordHeader {
id: RecordId::JitCodeLoad as u32,
record_size: 0,
timestamp: 0,
};
let mut clr = CodeLoadRecord {
header: record_header,
pid: pid,
tid: tid,
virtual_address: 0,
address: 0,
size: 0,
index: 0,
};
let mut clr_name: String = String::from(module_name);
let mut get_debug_entry = false;
depth += delta_depth;
assert!(depth >= 0);
if entry.tag() == gimli::constants::DW_TAG_subprogram {
get_debug_entry = true;
let mut attrs = entry.attrs();
while let Some(attr) = attrs.next()? {
if let Some(n) = attr.name().static_string() {
if n == "DW_AT_low_pc" {
clr.address = match attr.value() {
gimli::AttributeValue::Addr(address) => address,
_ => 0,
};
clr.virtual_address = clr.address;
} else if n == "DW_AT_high_pc" {
clr.size = match attr.value() {
gimli::AttributeValue::Udata(data) => data,
_ => 0,
};
} else if n == "DW_AT_name" {
clr_name = match attr.value() {
gimli::AttributeValue::DebugStrRef(offset) => {
if let Ok(s) = dwarf.debug_str.get_str(offset) {
clr_name.push_str("::");
clr_name.push_str(&s.to_string_lossy()?);
clr_name
} else {
clr_name.push_str("::");
clr_name.push_str("?");
clr_name
}
}
_ => {
clr_name.push_str("??");
clr_name
}
};
}
}
}
}
if get_debug_entry {
// TODO: Temp check to make sure well only formed data is processed.
if clr.address == 0 {
continue;
}
// TODO: Temp check to make sure well only formed data is processed.
if clr_name == "?" {
continue;
}
if clr.address == 0 || clr.size == 0 {
clr.address = addr as u64;
clr.virtual_address = addr as u64;
clr.size = len as u64;
}
clr.header.record_size = mem::size_of::<CodeLoadRecord>() as u32
+ (clr_name.len() + 1) as u32
+ clr.size as u32;
clr.index = self.code_index;
self.code_index += 1;
self.dump_debug_info(&unit, &dwarf, clr.address, clr.size, None)?;
clr.header.timestamp = self.get_time_stamp();
unsafe {
let code_buffer: &[u8] =
std::slice::from_raw_parts(clr.address as *const u8, clr.size as usize);
let _ = self.write_code_load_record(&clr_name, clr, code_buffer);
}
}
} else {
let mut func_name: String = String::from("?");
let mut func_addr = 0;
let mut func_size = 0;
let mut get_debug_entry = false;
depth += delta_depth;
assert!(depth >= 0);
if entry.tag() == gimli::constants::DW_TAG_subprogram {
get_debug_entry = true;
let mut attrs = entry.attrs();
while let Some(attr) = attrs.next()? {
if let Some(n) = attr.name().static_string() {
if n == "DW_AT_low_pc" {
func_addr = match attr.value() {
gimli::AttributeValue::Addr(address) => address,
_ => 0,
};
} else if n == "DW_AT_high_pc" {
func_size = match attr.value() {
gimli::AttributeValue::Udata(data) => data,
_ => 0,
};
} else if n == "DW_AT_name" {
func_name = match attr.value() {
gimli::AttributeValue::DebugStrRef(offset) => {
if let Ok(s) = dwarf.debug_str.get_str(offset) {
func_name.clear();
func_name.push_str(&s.to_string_lossy()?);
func_name
} else {
func_name.push_str("?");
func_name
}
}
_ => {
func_name.push_str("??");
func_name
}
};
}
}
}
}
if get_debug_entry {
// TODO: Temp check to make sure well only formed data is processed.
if func_addr == 0 {
continue;
}
// TODO: Temp check to make sure well only formed data is processed.
if func_name == "?" {
continue;
}
self.dump_debug_info(
&unit,
&dwarf,
func_addr,
func_size,
Some(func_name.as_str()),
)?;
}
}
}
Ok(())
}
fn dump_debug_info<R: Reader>(
&mut self,
unit: &gimli::Unit<R>,
dwarf: &gimli::Dwarf<R>,
address: u64,
size: u64,
file_suffix: Option<&str>,
) -> Result<()> {
let timestamp = self.get_time_stamp();
if let Some(program) = unit.line_program.clone() {
let mut debug_info_record = DebugInfoRecord {
header: RecordHeader {
id: RecordId::JitCodeDebugInfo as u32,
record_size: 0,
timestamp: timestamp,
},
address: address,
count: 0,
};
let mut debug_entries = Vec::new();
let mut debug_entries_total_filenames_len = 0;
let mut rows = program.rows();
while let Some((header, row)) = rows.next_row()? {
let row_file_index = row.file_index() - 1;
let myfile = dwarf
.attr_string(
&unit,
header.file_names()[row_file_index as usize].path_name(),
)
.unwrap();
let filename = myfile.to_string_lossy()?;
let line = row.line().unwrap_or(0);
let column = match row.column() {
gimli::ColumnType::Column(column) => column,
gimli::ColumnType::LeftEdge => 0,
};
if (row.address() < address) || (row.address() > (address + size)) {
continue;
}
let mut debug_entry = DebugEntry {
address: row.address(),
line: line as u32,
discriminator: column as u32,
filename: filename.to_string(),
};
if let Some(suffix) = file_suffix {
debug_entry.filename.push_str("::");
debug_entry.filename.push_str(suffix);
}
debug_entries_total_filenames_len += debug_entry.filename.len() + 1;
debug_entries.push(debug_entry);
}
debug_info_record.count = debug_entries.len() as u64;
let debug_entries_size = (debug_info_record.count
* (mem::size_of::<DebugEntry>() as u64 - mem::size_of::<String>() as u64))
+ debug_entries_total_filenames_len as u64;
debug_info_record.header.record_size =
mem::size_of::<DebugInfoRecord>() as u32 + debug_entries_size as u32;
let _ = self.write_debug_info_record(debug_info_record);
let _ = self.write_debug_info_entries(debug_entries);
}
Ok(())
}
}
impl Drop for State {
fn drop(&mut self) {
unsafe {
libc::munmap(
self.map_addr as *mut _,
libc::sysconf(libc::_SC_PAGESIZE) as usize,
);
}
}
}
trait Reader: gimli::Reader<Offset = usize> + Send + Sync {}
impl<'input, Endian> Reader for gimli::EndianSlice<'input, Endian> where
Endian: gimli::Endianity + Send + Sync
{
}