diff --git a/crates/c-api/include/wasmtime/config.h b/crates/c-api/include/wasmtime/config.h index 951004e96a..7a41bb38ef 100644 --- a/crates/c-api/include/wasmtime/config.h +++ b/crates/c-api/include/wasmtime/config.h @@ -80,6 +80,10 @@ enum wasmtime_profiling_strategy_enum { // ProfilingStrategy /// /// Note that this isn't always enabled at build time. WASMTIME_PROFILING_STRATEGY_VTUNE, + /// Linux's simple "perfmap" support in `perf` is enabled and when Wasmtime is + /// run under `perf` necessary calls will be made to profile generated JIT + /// code. + WASMTIME_PROFILING_STRATEGY_PERFMAP, }; #define WASMTIME_CONFIG_PROP(ret, name, ty) \ diff --git a/crates/c-api/src/config.rs b/crates/c-api/src/config.rs index 275730f239..bb065b3407 100644 --- a/crates/c-api/src/config.rs +++ b/crates/c-api/src/config.rs @@ -36,6 +36,7 @@ pub enum wasmtime_profiling_strategy_t { WASMTIME_PROFILING_STRATEGY_NONE, WASMTIME_PROFILING_STRATEGY_JITDUMP, WASMTIME_PROFILING_STRATEGY_VTUNE, + WASMTIME_PROFILING_STRATEGY_PERFMAP, } #[no_mangle] @@ -157,6 +158,7 @@ pub extern "C" fn wasmtime_config_profiler_set( WASMTIME_PROFILING_STRATEGY_NONE => ProfilingStrategy::None, WASMTIME_PROFILING_STRATEGY_JITDUMP => ProfilingStrategy::JitDump, WASMTIME_PROFILING_STRATEGY_VTUNE => ProfilingStrategy::VTune, + WASMTIME_PROFILING_STRATEGY_PERFMAP => ProfilingStrategy::PerfMap, }); } diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index cb9f0ef82d..1bb053b95a 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -68,18 +68,6 @@ pub const SUPPORTED_WASI_MODULES: &[(&str, &str)] = &[ ), ]; -fn pick_profiling_strategy(jitdump: bool, vtune: bool) -> Result { - Ok(match (jitdump, vtune) { - (true, false) => ProfilingStrategy::JitDump, - (false, true) => ProfilingStrategy::VTune, - (true, true) => { - println!("Can't enable --jitdump and --vtune at the same time. Profiling not enabled."); - ProfilingStrategy::None - } - _ => ProfilingStrategy::None, - }) -} - fn init_file_per_thread_logger(prefix: &'static str) { file_per_thread_logger::initialize(prefix); @@ -142,14 +130,11 @@ pub struct CommonOptions { #[clap(long, value_name = "MODULE,MODULE,...", parse(try_from_str = parse_wasi_modules))] pub wasi_modules: Option, + /// Profiling strategy (valid options are: perfmap, jitdump, vtune) + #[clap(long)] + pub profile: Option, + /// Generate jitdump file (supported on --features=profiling build) - #[clap(long, conflicts_with = "vtune")] - pub jitdump: bool, - - /// Generate vtune (supported on --features=vtune build) - #[clap(long, conflicts_with = "jitdump")] - pub vtune: bool, - /// Run optimization passes on translated functions, on by default #[clap(short = 'O', long)] pub optimize: bool, @@ -283,7 +268,7 @@ impl CommonOptions { .cranelift_debug_verifier(self.enable_cranelift_debug_verifier) .debug_info(self.debug_info) .cranelift_opt_level(self.opt_level()) - .profiler(pick_profiling_strategy(self.jitdump, self.vtune)?) + .profiler(self.profile.unwrap_or(ProfilingStrategy::None)) .cranelift_nan_canonicalization(self.enable_cranelift_nan_canonicalization); self.enable_wasm_features(&mut config); diff --git a/crates/jit-debug/src/perf_jitdump.rs b/crates/jit-debug/src/perf_jitdump.rs index 86f49987d2..b5f3666dd5 100644 --- a/crates/jit-debug/src/perf_jitdump.rs +++ b/crates/jit-debug/src/perf_jitdump.rs @@ -4,7 +4,7 @@ //! //! Usage Example: //! Record -//! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --jitdump test.wasm +//! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --profile=jitdump test.wasm //! Combine //! sudo perf inject -v -j -i perf.data -o perf.jit.data //! Report diff --git a/crates/jit/src/profiling.rs b/crates/jit/src/profiling.rs index a20db14614..e158a58c92 100644 --- a/crates/jit/src/profiling.rs +++ b/crates/jit/src/profiling.rs @@ -11,6 +11,16 @@ cfg_if::cfg_if! { } } +cfg_if::cfg_if! { + if #[cfg(target_os = "linux")] { + #[path = "profiling/perfmap_linux.rs"] + mod perfmap; + } else { + #[path = "profiling/perfmap_disabled.rs"] + mod perfmap; + } +} + cfg_if::cfg_if! { // Note: VTune support is disabled on windows mingw because the ittapi crate doesn't compile // there; see also https://github.com/bytecodealliance/wasmtime/pull/4003 for rationale. @@ -24,6 +34,7 @@ cfg_if::cfg_if! { } pub use jitdump::JitDumpAgent; +pub use perfmap::PerfMapAgent; pub use vtune::VTuneAgent; /// Common interface for profiling tools. diff --git a/crates/jit/src/profiling/jitdump_disabled.rs b/crates/jit/src/profiling/jitdump_disabled.rs index f88d76a58c..16dd501640 100644 --- a/crates/jit/src/profiling/jitdump_disabled.rs +++ b/crates/jit/src/profiling/jitdump_disabled.rs @@ -8,7 +8,7 @@ pub struct JitDumpAgent { } impl JitDumpAgent { - /// Intialize a JitDumpAgent and write out the header + /// Intialize a dummy JitDumpAgent that will fail upon instantiation. pub fn new() -> Result { if cfg!(feature = "jitdump") { bail!("jitdump is not supported on this platform"); diff --git a/crates/jit/src/profiling/jitdump_linux.rs b/crates/jit/src/profiling/jitdump_linux.rs index 4dcab8bc6f..1878aa7f06 100644 --- a/crates/jit/src/profiling/jitdump_linux.rs +++ b/crates/jit/src/profiling/jitdump_linux.rs @@ -4,7 +4,7 @@ //! //! Usage Example: //! Record -//! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --jitdump test.wasm +//! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --profile=jitdump test.wasm //! Combine //! sudo perf inject -v -j -i perf.data -o perf.jit.data //! Report diff --git a/crates/jit/src/profiling/perfmap_disabled.rs b/crates/jit/src/profiling/perfmap_disabled.rs new file mode 100644 index 0000000000..07c50ca0ea --- /dev/null +++ b/crates/jit/src/profiling/perfmap_disabled.rs @@ -0,0 +1,28 @@ +use crate::{CompiledModule, ProfilingAgent}; +use anyhow::{bail, Result}; + +/// Interface for driving the creation of jitdump files +#[derive(Debug)] +pub struct PerfMapAgent { + _private: (), +} + +impl PerfMapAgent { + /// Intialize a dummy PerfMapAgent that will fail upon instantiation. + pub fn new() -> Result { + bail!("perfmap support not supported on this platform"); + } +} + +impl ProfilingAgent for PerfMapAgent { + fn module_load(&self, _module: &CompiledModule, _dbg_image: Option<&[u8]>) {} + fn load_single_trampoline( + &self, + _name: &str, + _addr: *const u8, + _size: usize, + __pid: u32, + _tid: u32, + ) { + } +} diff --git a/crates/jit/src/profiling/perfmap_linux.rs b/crates/jit/src/profiling/perfmap_linux.rs new file mode 100644 index 0000000000..fa1e23ac9d --- /dev/null +++ b/crates/jit/src/profiling/perfmap_linux.rs @@ -0,0 +1,85 @@ +use crate::{CompiledModule, ProfilingAgent}; +use anyhow::Result; +use std::io::{self, BufWriter, Write}; +use std::process; +use std::{fs::File, sync::Mutex}; +use wasmtime_environ::EntityRef as _; + +/// Process-wide perf map file. Perf only reads a unique file per process. +static PERFMAP_FILE: Mutex> = Mutex::new(None); + +/// Interface for driving the creation of jitdump files +pub struct PerfMapAgent; + +impl PerfMapAgent { + /// Intialize a JitDumpAgent and write out the header. + pub fn new() -> Result { + let mut file = PERFMAP_FILE.lock().unwrap(); + if file.is_none() { + let filename = format!("/tmp/perf-{}.map", process::id()); + *file = Some(File::create(filename)?); + } + Ok(PerfMapAgent) + } + + fn make_line( + writer: &mut dyn Write, + name: &str, + addr: *const u8, + len: usize, + ) -> io::Result<()> { + // Format is documented here: https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jit-interface.txt + // Try our best to sanitize the name, since wasm allows for any utf8 string in there. + let sanitized_name = name.replace('\n', "_").replace('\r', "_"); + write!(writer, "{:x} {:x} {}\n", addr as usize, len, sanitized_name)?; + Ok(()) + } +} + +impl ProfilingAgent for PerfMapAgent { + /// Sent when a method is compiled and loaded into memory by the VM. + fn module_load(&self, module: &CompiledModule, _dbg_image: Option<&[u8]>) { + let mut file = PERFMAP_FILE.lock().unwrap(); + let file = file.as_mut().unwrap(); + let mut file = BufWriter::new(file); + + for (idx, func) in module.finished_functions() { + let addr = func.as_ptr(); + let len = func.len(); + let name = super::debug_name(module, idx); + if let Err(err) = Self::make_line(&mut file, &name, addr, len) { + eprintln!("Error when writing function info to the perf map file: {err}"); + return; + } + } + + // Note: these are the trampolines into exported functions. + for (idx, func, len) in module.trampolines() { + let (addr, len) = (func as usize as *const u8, len); + let name = format!("wasm::trampoline[{}]", idx.index()); + if let Err(err) = Self::make_line(&mut file, &name, addr, len) { + eprintln!("Error when writing export trampoline info to the perf map file: {err}"); + return; + } + } + + if let Err(err) = file.flush() { + eprintln!("Error when flushing the perf map file buffer: {err}"); + } + } + + fn load_single_trampoline( + &self, + name: &str, + addr: *const u8, + size: usize, + _pid: u32, + _tid: u32, + ) { + let mut file = PERFMAP_FILE.lock().unwrap(); + let file = file.as_mut().unwrap(); + if let Err(err) = Self::make_line(file, name, addr, size) { + eprintln!("Error when writing import trampoline info to the perf map file: {err}"); + } + } +} diff --git a/crates/jit/src/profiling/vtune.rs b/crates/jit/src/profiling/vtune.rs index b99511110b..64699ed30f 100644 --- a/crates/jit/src/profiling/vtune.rs +++ b/crates/jit/src/profiling/vtune.rs @@ -1,11 +1,11 @@ //! Adds support for profiling JIT-ed code using VTune. By default, VTune //! support is built in to Wasmtime (configure with the `vtune` feature flag). -//! To enable it at runtime, use the `--vtune` CLI flag. +//! To enable it at runtime, use the `--profile=vtune` CLI flag. //! //! ### Profile //! //! ```ignore -//! vtune -run-pass-thru=--no-altstack -v -collect hotspots target/debug/wasmtime --vtune test.wasm +//! vtune -run-pass-thru=--no-altstack -v -collect hotspots target/debug/wasmtime --profile=vtune test.wasm //! ``` //! //! Note: `vtune` is a command-line tool for VTune which must [be diff --git a/crates/jit/src/profiling/vtune_disabled.rs b/crates/jit/src/profiling/vtune_disabled.rs index 01da215919..34c6871276 100644 --- a/crates/jit/src/profiling/vtune_disabled.rs +++ b/crates/jit/src/profiling/vtune_disabled.rs @@ -8,7 +8,7 @@ pub struct VTuneAgent { } impl VTuneAgent { - /// Intialize a VTuneAgent and write out the header + /// Intialize a dummy VTuneAgent that will fail upon instantiation. pub fn new() -> Result { if cfg!(feature = "vtune") { bail!("VTune is not supported on this platform."); diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 2095281e07..0d314823dd 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -6,13 +6,14 @@ use std::collections::{HashMap, HashSet}; use std::fmt; #[cfg(feature = "cache")] use std::path::Path; +use std::str::FromStr; use std::sync::Arc; use target_lexicon::Architecture; use wasmparser::WasmFeatures; #[cfg(feature = "cache")] use wasmtime_cache::CacheConfig; use wasmtime_environ::Tunables; -use wasmtime_jit::{JitDumpAgent, NullProfilerAgent, ProfilingAgent, VTuneAgent}; +use wasmtime_jit::{JitDumpAgent, NullProfilerAgent, PerfMapAgent, ProfilingAgent, VTuneAgent}; use wasmtime_runtime::{InstanceAllocator, OnDemandInstanceAllocator, RuntimeMemoryCreator}; pub use wasmtime_environ::CacheStore; @@ -221,7 +222,6 @@ impl Config { #[cfg(compiler)] #[cfg_attr(nightlydoc, doc(cfg(feature = "cranelift")))] // see build.rs pub fn target(&mut self, target: &str) -> Result<&mut Self> { - use std::str::FromStr; self.compiler_config.target = Some(target_lexicon::Triple::from_str(target).map_err(|e| anyhow::anyhow!(e))?); @@ -1536,6 +1536,7 @@ impl Config { pub(crate) fn build_profiler(&self) -> Result> { Ok(match self.profiling_strategy { + ProfilingStrategy::PerfMap => Box::new(PerfMapAgent::new()?) as Box, ProfilingStrategy::JitDump => Box::new(JitDumpAgent::new()?) as Box, ProfilingStrategy::VTune => Box::new(VTuneAgent::new()?) as Box, ProfilingStrategy::None => Box::new(NullProfilerAgent), @@ -1727,11 +1728,14 @@ pub enum OptLevel { } /// Select which profiling technique to support. -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq)] pub enum ProfilingStrategy { /// No profiler support. None, + /// Collect function name information as the "perf map" file format, used with `perf` on Linux. + PerfMap, + /// Collect profiling info for "jitdump" file format, used with `perf` on /// Linux. JitDump, @@ -1740,6 +1744,20 @@ pub enum ProfilingStrategy { VTune, } +impl FromStr for ProfilingStrategy { + type Err = anyhow::Error; + + fn from_str(s: &str) -> std::result::Result { + match s { + "none" => Ok(Self::None), + "perfmap" => Ok(Self::PerfMap), + "jitdump" => Ok(Self::JitDump), + "vtune" => Ok(Self::VTune), + _ => anyhow::bail!("unknown value for profiling strategy"), + } + } +} + /// Select how wasm backtrace detailed information is handled. #[derive(Debug, Clone, Copy)] pub enum WasmBacktraceDetails { diff --git a/docs/examples-profiling-perf.md b/docs/examples-profiling-perf.md index 40dd231854..63317aeddf 100644 --- a/docs/examples-profiling-perf.md +++ b/docs/examples-profiling-perf.md @@ -6,6 +6,59 @@ an extremely powerful profiler with lots of documentation on the web, but for the rest of this section we'll assume you're running on Linux and already have `perf` installed. +There are two profiling agents for `perf`: + +- a very simple one that will map code regions to symbol names: `perfmap`. +- a more detailed one that can provide additional information and mappings between the source + language statements and generated JIT code: `jitdump`. + +## Profiling with `perfmap` + +Simple profiling support with `perf` generates a "perf map" file that the `perf` CLI will +automatically look for, when running into unresolved symbols. This requires runtime support from +Wasmtime itself, so you will need to manually change a few things to enable profiling support in +your application. Enabling runtime support depends on how you're using Wasmtime: + +* **Rust API** - you'll want to call the [`Config::profiler`] method with + `ProfilingStrategy::PerfMap` to enable profiling of your wasm modules. + +* **C API** - you'll want to call the `wasmtime_config_profiler_set` API with a + `WASMTIME_PROFILING_STRATEGY_PERFMAP` value. + +* **Command Line** - you'll want to pass the `--profile=perfmap` flag on the command + line. + +Once perfmap support is enabled, you'll use `perf record` like usual to record +your application's performance. + +For example if you're using the CLI, you'll execute: + +```sh +$ perf record -k mono wasmtime --profile=perfmap foo.wasm +``` + +This will create a `perf.data` file as per usual, but it will *also* create a +`/tmp/perf-XXXX.map` file. This extra `.map` file is the perf map file which is +specified by `perf` and Wasmtime generates at runtime. + +After that you can explore the `perf.data` profile as you usually would, for example with: + +```sh +$ perf report --input perf.data +``` + +You should be able to see time spent in wasm functions, generate flamegraphs based on that, etc.. +You should also see entries for wasm functions show up as one function and the name of each +function matches the debug name section in the wasm file. + +Note that support for perfmap is still relatively new in Wasmtime, so if you +have any problems, please don't hesitate to [file an issue]! + +[file an issue]: https://github.com/bytecodealliance/wasmtime/issues/new + + +## Profiling with `jitdump` + Profiling support with `perf` uses the "jitdump" support in the `perf` CLI. This requires runtime support from Wasmtime itself, so you will need to manually change a few things to enable profiling support in your application. First @@ -19,7 +72,7 @@ depends on how you're using Wasmtime: * **C API** - you'll want to call the `wasmtime_config_profiler_set` API with a `WASMTIME_PROFILING_STRATEGY_JITDUMP` value. -* **Command Line** - you'll want to pass the `--jitdump` flag on the command +* **Command Line** - you'll want to pass the `--profile=jitdump` flag on the command line. Once jitdump support is enabled, you'll use `perf record` like usual to record @@ -29,7 +82,7 @@ your application's performance. You'll need to also be sure to pass the For example if you're using the CLI, you'll execute: ```sh -$ perf record -k mono wasmtime --jitdump foo.wasm +$ perf record -k mono wasmtime --profile=jitdump foo.wasm ``` This will create a `perf.data` file as per usual, but it will *also* create a @@ -110,7 +163,7 @@ To collect perf information for this wasm module we'll execute: ```sh $ rustc --target wasm32-wasi fib.rs -O -$ perf record -k mono wasmtime --jitdump fib.wasm +$ perf record -k mono wasmtime --profile=jitdump fib.wasm fib(42) = 267914296 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.147 MB perf.data (3435 samples) ] diff --git a/docs/examples-profiling-vtune.md b/docs/examples-profiling-vtune.md index 9e15d57f8b..61c67d1b57 100644 --- a/docs/examples-profiling-vtune.md +++ b/docs/examples-profiling-vtune.md @@ -39,7 +39,7 @@ runtime--enable runtime support based on how you use Wasmtime: * **C API** - call the `wasmtime_config_profiler_set` API with a `WASMTIME_PROFILING_STRATEGY_VTUNE` value. -* **Command Line** - pass the `--vtune` flag on the command line. +* **Command Line** - pass the `--profile=vtune` flag on the command line. ### Profiling Wasmtime itself @@ -58,11 +58,11 @@ With VTune [properly installed][download], if you are using the CLI execute: ```sh $ cargo build -$ vtune -run-pass-thru=--no-altstack -collect hotspots target/debug/wasmtime --vtune foo.wasm +$ vtune -run-pass-thru=--no-altstack -collect hotspots target/debug/wasmtime --profile=vtune foo.wasm ``` This command tells the VTune collector (`vtune`) to collect hot spot -profiling data as Wasmtime is executing `foo.wasm`. The `--vtune` flag enables +profiling data as Wasmtime is executing `foo.wasm`. The `--profile=vtune` flag enables VTune support in Wasmtime so that the collector is also alerted to JIT events that take place during runtime. The first time this is run, the result of the command is a results diretory `r000hs/` which contains profiling data for @@ -96,13 +96,13 @@ $ rustc --target wasm32-wasi fib.rs -C opt-level=z -C lto=yes ``` Then we execute the Wasmtime runtime (built with the `vtune` feature and -executed with the `--vtune` flag to enable reporting) inside the VTune CLI +executed with the `--profile=vtune` flag to enable reporting) inside the VTune CLI application, `vtune`, which must already be installed and available on the path. To collect hot spot profiling information, we execute: ```sh $ rustc --target wasm32-wasi fib.rs -C opt-level=z -C lto=yes -$ vtune -run-pass-thru=--no-altstack -v -collect hotspots target/debug/wasmtime --vtune fib.wasm +$ vtune -run-pass-thru=--no-altstack -v -collect hotspots target/debug/wasmtime --profile=vtune fib.wasm fib(45) = 1134903170 amplxe: Collection stopped. amplxe: Using result path /home/jlb6740/wasmtime/r000hs @@ -141,7 +141,7 @@ like: - Open VTune Profiler - "Configure Analysis" with - "Application" set to `/path/to/wasmtime` (e.g., `target/debug/wasmtime`) - - "Application parameters" set to `--vtune /path/to/module.wasm` + - "Application parameters" set to `--profile=vtune /path/to/module.wasm` - "Working directory" set as appropriate - Enable "Hardware Event-Based Sampling," which may require some system configuration, e.g. `sysctl -w kernel.perf_event_paranoid=0`