Add support for generating perf maps for simple perf profiling (#6030)

* Add support for generating perf maps for simple perf profiling

* add missing enum entry in C code

* bugfix: use hexa when printing the code region's length too (thanks bjorn3!)

* sanitize file name + use bufwriter

* introduce --profile CLI flag for wasmtime

* Update doc and doc comments for new --profile option

* remove redundant FromStr import

* Apply review feedback: make_line receives a Write impl, report errors

* fix tests?

* better docs
This commit is contained in:
Benjamin Bouvier
2023-03-20 17:17:36 +01:00
committed by GitHub
parent b5a2d536ac
commit 6f4f30c840
14 changed files with 224 additions and 38 deletions

View File

@@ -80,6 +80,10 @@ enum wasmtime_profiling_strategy_enum { // ProfilingStrategy
///
/// Note that this isn't always enabled at build time.
WASMTIME_PROFILING_STRATEGY_VTUNE,
/// Linux's simple "perfmap" support in `perf` is enabled and when Wasmtime is
/// run under `perf` necessary calls will be made to profile generated JIT
/// code.
WASMTIME_PROFILING_STRATEGY_PERFMAP,
};
#define WASMTIME_CONFIG_PROP(ret, name, ty) \

View File

@@ -36,6 +36,7 @@ pub enum wasmtime_profiling_strategy_t {
WASMTIME_PROFILING_STRATEGY_NONE,
WASMTIME_PROFILING_STRATEGY_JITDUMP,
WASMTIME_PROFILING_STRATEGY_VTUNE,
WASMTIME_PROFILING_STRATEGY_PERFMAP,
}
#[no_mangle]
@@ -157,6 +158,7 @@ pub extern "C" fn wasmtime_config_profiler_set(
WASMTIME_PROFILING_STRATEGY_NONE => ProfilingStrategy::None,
WASMTIME_PROFILING_STRATEGY_JITDUMP => ProfilingStrategy::JitDump,
WASMTIME_PROFILING_STRATEGY_VTUNE => ProfilingStrategy::VTune,
WASMTIME_PROFILING_STRATEGY_PERFMAP => ProfilingStrategy::PerfMap,
});
}

View File

@@ -68,18 +68,6 @@ pub const SUPPORTED_WASI_MODULES: &[(&str, &str)] = &[
),
];
fn pick_profiling_strategy(jitdump: bool, vtune: bool) -> Result<ProfilingStrategy> {
Ok(match (jitdump, vtune) {
(true, false) => ProfilingStrategy::JitDump,
(false, true) => ProfilingStrategy::VTune,
(true, true) => {
println!("Can't enable --jitdump and --vtune at the same time. Profiling not enabled.");
ProfilingStrategy::None
}
_ => ProfilingStrategy::None,
})
}
fn init_file_per_thread_logger(prefix: &'static str) {
file_per_thread_logger::initialize(prefix);
@@ -142,14 +130,11 @@ pub struct CommonOptions {
#[clap(long, value_name = "MODULE,MODULE,...", parse(try_from_str = parse_wasi_modules))]
pub wasi_modules: Option<WasiModules>,
/// Profiling strategy (valid options are: perfmap, jitdump, vtune)
#[clap(long)]
pub profile: Option<ProfilingStrategy>,
/// Generate jitdump file (supported on --features=profiling build)
#[clap(long, conflicts_with = "vtune")]
pub jitdump: bool,
/// Generate vtune (supported on --features=vtune build)
#[clap(long, conflicts_with = "jitdump")]
pub vtune: bool,
/// Run optimization passes on translated functions, on by default
#[clap(short = 'O', long)]
pub optimize: bool,
@@ -283,7 +268,7 @@ impl CommonOptions {
.cranelift_debug_verifier(self.enable_cranelift_debug_verifier)
.debug_info(self.debug_info)
.cranelift_opt_level(self.opt_level())
.profiler(pick_profiling_strategy(self.jitdump, self.vtune)?)
.profiler(self.profile.unwrap_or(ProfilingStrategy::None))
.cranelift_nan_canonicalization(self.enable_cranelift_nan_canonicalization);
self.enable_wasm_features(&mut config);

View File

@@ -4,7 +4,7 @@
//!
//! Usage Example:
//! Record
//! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --jitdump test.wasm
//! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --profile=jitdump test.wasm
//! Combine
//! sudo perf inject -v -j -i perf.data -o perf.jit.data
//! Report

View File

@@ -11,6 +11,16 @@ cfg_if::cfg_if! {
}
}
cfg_if::cfg_if! {
if #[cfg(target_os = "linux")] {
#[path = "profiling/perfmap_linux.rs"]
mod perfmap;
} else {
#[path = "profiling/perfmap_disabled.rs"]
mod perfmap;
}
}
cfg_if::cfg_if! {
// Note: VTune support is disabled on windows mingw because the ittapi crate doesn't compile
// there; see also https://github.com/bytecodealliance/wasmtime/pull/4003 for rationale.
@@ -24,6 +34,7 @@ cfg_if::cfg_if! {
}
pub use jitdump::JitDumpAgent;
pub use perfmap::PerfMapAgent;
pub use vtune::VTuneAgent;
/// Common interface for profiling tools.

View File

@@ -8,7 +8,7 @@ pub struct JitDumpAgent {
}
impl JitDumpAgent {
/// Intialize a JitDumpAgent and write out the header
/// Intialize a dummy JitDumpAgent that will fail upon instantiation.
pub fn new() -> Result<Self> {
if cfg!(feature = "jitdump") {
bail!("jitdump is not supported on this platform");

View File

@@ -4,7 +4,7 @@
//!
//! Usage Example:
//! Record
//! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --jitdump test.wasm
//! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --profile=jitdump test.wasm
//! Combine
//! sudo perf inject -v -j -i perf.data -o perf.jit.data
//! Report

View File

@@ -0,0 +1,28 @@
use crate::{CompiledModule, ProfilingAgent};
use anyhow::{bail, Result};
/// Interface for driving the creation of jitdump files
#[derive(Debug)]
pub struct PerfMapAgent {
_private: (),
}
impl PerfMapAgent {
/// Intialize a dummy PerfMapAgent that will fail upon instantiation.
pub fn new() -> Result<Self> {
bail!("perfmap support not supported on this platform");
}
}
impl ProfilingAgent for PerfMapAgent {
fn module_load(&self, _module: &CompiledModule, _dbg_image: Option<&[u8]>) {}
fn load_single_trampoline(
&self,
_name: &str,
_addr: *const u8,
_size: usize,
__pid: u32,
_tid: u32,
) {
}
}

View File

@@ -0,0 +1,85 @@
use crate::{CompiledModule, ProfilingAgent};
use anyhow::Result;
use std::io::{self, BufWriter, Write};
use std::process;
use std::{fs::File, sync::Mutex};
use wasmtime_environ::EntityRef as _;
/// Process-wide perf map file. Perf only reads a unique file per process.
static PERFMAP_FILE: Mutex<Option<File>> = Mutex::new(None);
/// Interface for driving the creation of jitdump files
pub struct PerfMapAgent;
impl PerfMapAgent {
/// Intialize a JitDumpAgent and write out the header.
pub fn new() -> Result<Self> {
let mut file = PERFMAP_FILE.lock().unwrap();
if file.is_none() {
let filename = format!("/tmp/perf-{}.map", process::id());
*file = Some(File::create(filename)?);
}
Ok(PerfMapAgent)
}
fn make_line(
writer: &mut dyn Write,
name: &str,
addr: *const u8,
len: usize,
) -> io::Result<()> {
// Format is documented here: https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jit-interface.txt
// Try our best to sanitize the name, since wasm allows for any utf8 string in there.
let sanitized_name = name.replace('\n', "_").replace('\r', "_");
write!(writer, "{:x} {:x} {}\n", addr as usize, len, sanitized_name)?;
Ok(())
}
}
impl ProfilingAgent for PerfMapAgent {
/// Sent when a method is compiled and loaded into memory by the VM.
fn module_load(&self, module: &CompiledModule, _dbg_image: Option<&[u8]>) {
let mut file = PERFMAP_FILE.lock().unwrap();
let file = file.as_mut().unwrap();
let mut file = BufWriter::new(file);
for (idx, func) in module.finished_functions() {
let addr = func.as_ptr();
let len = func.len();
let name = super::debug_name(module, idx);
if let Err(err) = Self::make_line(&mut file, &name, addr, len) {
eprintln!("Error when writing function info to the perf map file: {err}");
return;
}
}
// Note: these are the trampolines into exported functions.
for (idx, func, len) in module.trampolines() {
let (addr, len) = (func as usize as *const u8, len);
let name = format!("wasm::trampoline[{}]", idx.index());
if let Err(err) = Self::make_line(&mut file, &name, addr, len) {
eprintln!("Error when writing export trampoline info to the perf map file: {err}");
return;
}
}
if let Err(err) = file.flush() {
eprintln!("Error when flushing the perf map file buffer: {err}");
}
}
fn load_single_trampoline(
&self,
name: &str,
addr: *const u8,
size: usize,
_pid: u32,
_tid: u32,
) {
let mut file = PERFMAP_FILE.lock().unwrap();
let file = file.as_mut().unwrap();
if let Err(err) = Self::make_line(file, name, addr, size) {
eprintln!("Error when writing import trampoline info to the perf map file: {err}");
}
}
}

View File

@@ -1,11 +1,11 @@
//! Adds support for profiling JIT-ed code using VTune. By default, VTune
//! support is built in to Wasmtime (configure with the `vtune` feature flag).
//! To enable it at runtime, use the `--vtune` CLI flag.
//! To enable it at runtime, use the `--profile=vtune` CLI flag.
//!
//! ### Profile
//!
//! ```ignore
//! vtune -run-pass-thru=--no-altstack -v -collect hotspots target/debug/wasmtime --vtune test.wasm
//! vtune -run-pass-thru=--no-altstack -v -collect hotspots target/debug/wasmtime --profile=vtune test.wasm
//! ```
//!
//! Note: `vtune` is a command-line tool for VTune which must [be

View File

@@ -8,7 +8,7 @@ pub struct VTuneAgent {
}
impl VTuneAgent {
/// Intialize a VTuneAgent and write out the header
/// Intialize a dummy VTuneAgent that will fail upon instantiation.
pub fn new() -> Result<Self> {
if cfg!(feature = "vtune") {
bail!("VTune is not supported on this platform.");

View File

@@ -6,13 +6,14 @@ use std::collections::{HashMap, HashSet};
use std::fmt;
#[cfg(feature = "cache")]
use std::path::Path;
use std::str::FromStr;
use std::sync::Arc;
use target_lexicon::Architecture;
use wasmparser::WasmFeatures;
#[cfg(feature = "cache")]
use wasmtime_cache::CacheConfig;
use wasmtime_environ::Tunables;
use wasmtime_jit::{JitDumpAgent, NullProfilerAgent, ProfilingAgent, VTuneAgent};
use wasmtime_jit::{JitDumpAgent, NullProfilerAgent, PerfMapAgent, ProfilingAgent, VTuneAgent};
use wasmtime_runtime::{InstanceAllocator, OnDemandInstanceAllocator, RuntimeMemoryCreator};
pub use wasmtime_environ::CacheStore;
@@ -221,7 +222,6 @@ impl Config {
#[cfg(compiler)]
#[cfg_attr(nightlydoc, doc(cfg(feature = "cranelift")))] // see build.rs
pub fn target(&mut self, target: &str) -> Result<&mut Self> {
use std::str::FromStr;
self.compiler_config.target =
Some(target_lexicon::Triple::from_str(target).map_err(|e| anyhow::anyhow!(e))?);
@@ -1536,6 +1536,7 @@ impl Config {
pub(crate) fn build_profiler(&self) -> Result<Box<dyn ProfilingAgent>> {
Ok(match self.profiling_strategy {
ProfilingStrategy::PerfMap => Box::new(PerfMapAgent::new()?) as Box<dyn ProfilingAgent>,
ProfilingStrategy::JitDump => Box::new(JitDumpAgent::new()?) as Box<dyn ProfilingAgent>,
ProfilingStrategy::VTune => Box::new(VTuneAgent::new()?) as Box<dyn ProfilingAgent>,
ProfilingStrategy::None => Box::new(NullProfilerAgent),
@@ -1727,11 +1728,14 @@ pub enum OptLevel {
}
/// Select which profiling technique to support.
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ProfilingStrategy {
/// No profiler support.
None,
/// Collect function name information as the "perf map" file format, used with `perf` on Linux.
PerfMap,
/// Collect profiling info for "jitdump" file format, used with `perf` on
/// Linux.
JitDump,
@@ -1740,6 +1744,20 @@ pub enum ProfilingStrategy {
VTune,
}
impl FromStr for ProfilingStrategy {
type Err = anyhow::Error;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"none" => Ok(Self::None),
"perfmap" => Ok(Self::PerfMap),
"jitdump" => Ok(Self::JitDump),
"vtune" => Ok(Self::VTune),
_ => anyhow::bail!("unknown value for profiling strategy"),
}
}
}
/// Select how wasm backtrace detailed information is handled.
#[derive(Debug, Clone, Copy)]
pub enum WasmBacktraceDetails {

View File

@@ -6,6 +6,59 @@ an extremely powerful profiler with lots of documentation on the web, but for
the rest of this section we'll assume you're running on Linux and already have
`perf` installed.
There are two profiling agents for `perf`:
- a very simple one that will map code regions to symbol names: `perfmap`.
- a more detailed one that can provide additional information and mappings between the source
language statements and generated JIT code: `jitdump`.
## Profiling with `perfmap`
Simple profiling support with `perf` generates a "perf map" file that the `perf` CLI will
automatically look for, when running into unresolved symbols. This requires runtime support from
Wasmtime itself, so you will need to manually change a few things to enable profiling support in
your application. Enabling runtime support depends on how you're using Wasmtime:
* **Rust API** - you'll want to call the [`Config::profiler`] method with
`ProfilingStrategy::PerfMap` to enable profiling of your wasm modules.
* **C API** - you'll want to call the `wasmtime_config_profiler_set` API with a
`WASMTIME_PROFILING_STRATEGY_PERFMAP` value.
* **Command Line** - you'll want to pass the `--profile=perfmap` flag on the command
line.
Once perfmap support is enabled, you'll use `perf record` like usual to record
your application's performance.
For example if you're using the CLI, you'll execute:
```sh
$ perf record -k mono wasmtime --profile=perfmap foo.wasm
```
This will create a `perf.data` file as per usual, but it will *also* create a
`/tmp/perf-XXXX.map` file. This extra `.map` file is the perf map file which is
specified by `perf` and Wasmtime generates at runtime.
After that you can explore the `perf.data` profile as you usually would, for example with:
```sh
$ perf report --input perf.data
```
You should be able to see time spent in wasm functions, generate flamegraphs based on that, etc..
You should also see entries for wasm functions show up as one function and the name of each
function matches the debug name section in the wasm file.
Note that support for perfmap is still relatively new in Wasmtime, so if you
have any problems, please don't hesitate to [file an issue]!
[file an issue]: https://github.com/bytecodealliance/wasmtime/issues/new
## Profiling with `jitdump`
Profiling support with `perf` uses the "jitdump" support in the `perf` CLI. This
requires runtime support from Wasmtime itself, so you will need to manually
change a few things to enable profiling support in your application. First
@@ -19,7 +72,7 @@ depends on how you're using Wasmtime:
* **C API** - you'll want to call the `wasmtime_config_profiler_set` API with a
`WASMTIME_PROFILING_STRATEGY_JITDUMP` value.
* **Command Line** - you'll want to pass the `--jitdump` flag on the command
* **Command Line** - you'll want to pass the `--profile=jitdump` flag on the command
line.
Once jitdump support is enabled, you'll use `perf record` like usual to record
@@ -29,7 +82,7 @@ your application's performance. You'll need to also be sure to pass the
For example if you're using the CLI, you'll execute:
```sh
$ perf record -k mono wasmtime --jitdump foo.wasm
$ perf record -k mono wasmtime --profile=jitdump foo.wasm
```
This will create a `perf.data` file as per usual, but it will *also* create a
@@ -110,7 +163,7 @@ To collect perf information for this wasm module we'll execute:
```sh
$ rustc --target wasm32-wasi fib.rs -O
$ perf record -k mono wasmtime --jitdump fib.wasm
$ perf record -k mono wasmtime --profile=jitdump fib.wasm
fib(42) = 267914296
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.147 MB perf.data (3435 samples) ]

View File

@@ -39,7 +39,7 @@ runtime--enable runtime support based on how you use Wasmtime:
* **C API** - call the `wasmtime_config_profiler_set` API with a
`WASMTIME_PROFILING_STRATEGY_VTUNE` value.
* **Command Line** - pass the `--vtune` flag on the command line.
* **Command Line** - pass the `--profile=vtune` flag on the command line.
### Profiling Wasmtime itself
@@ -58,11 +58,11 @@ With VTune [properly installed][download], if you are using the CLI execute:
```sh
$ cargo build
$ vtune -run-pass-thru=--no-altstack -collect hotspots target/debug/wasmtime --vtune foo.wasm
$ vtune -run-pass-thru=--no-altstack -collect hotspots target/debug/wasmtime --profile=vtune foo.wasm
```
This command tells the VTune collector (`vtune`) to collect hot spot
profiling data as Wasmtime is executing `foo.wasm`. The `--vtune` flag enables
profiling data as Wasmtime is executing `foo.wasm`. The `--profile=vtune` flag enables
VTune support in Wasmtime so that the collector is also alerted to JIT events
that take place during runtime. The first time this is run, the result of the
command is a results diretory `r000hs/` which contains profiling data for
@@ -96,13 +96,13 @@ $ rustc --target wasm32-wasi fib.rs -C opt-level=z -C lto=yes
```
Then we execute the Wasmtime runtime (built with the `vtune` feature and
executed with the `--vtune` flag to enable reporting) inside the VTune CLI
executed with the `--profile=vtune` flag to enable reporting) inside the VTune CLI
application, `vtune`, which must already be installed and available on the
path. To collect hot spot profiling information, we execute:
```sh
$ rustc --target wasm32-wasi fib.rs -C opt-level=z -C lto=yes
$ vtune -run-pass-thru=--no-altstack -v -collect hotspots target/debug/wasmtime --vtune fib.wasm
$ vtune -run-pass-thru=--no-altstack -v -collect hotspots target/debug/wasmtime --profile=vtune fib.wasm
fib(45) = 1134903170
amplxe: Collection stopped.
amplxe: Using result path /home/jlb6740/wasmtime/r000hs
@@ -141,7 +141,7 @@ like:
- Open VTune Profiler
- "Configure Analysis" with
- "Application" set to `/path/to/wasmtime` (e.g., `target/debug/wasmtime`)
- "Application parameters" set to `--vtune /path/to/module.wasm`
- "Application parameters" set to `--profile=vtune /path/to/module.wasm`
- "Working directory" set as appropriate
- Enable "Hardware Event-Based Sampling," which may require some system
configuration, e.g. `sysctl -w kernel.perf_event_paranoid=0`