Adds JIT profiling support for VTune (#819)

This patch adds initial support for ittapi which is an open source profiling api for instrumentation and tracing and profiling of jitted code. Result files can be read by VTune for analysis Build: cargo build --features=vtune Profile: // Using amplxe-cl from VTune amplxe-cl -v -collect hostpost target/debug/wasmtime --vtune test.wasm
2020-04-02 07:04:08 -07:00
parent 9e11e8d019
commit dff789c7c6
17 changed files with 463 additions and 154 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1019,6 +1019,15 @@ version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e"

+[[package]]
+name = "ittapi-rs"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa16daf7106319e5c4456733e33aeb64d8c986af0127bc25eb6d9e84e2f1f8b0"
+dependencies = [
+ "cmake",
+]
+
 [[package]]
 name = "jobserver"
 version = "0.1.21"
@@ -2319,6 +2328,7 @@ dependencies = [
 "anyhow",
 "cfg-if",
 "gimli",
+ "ittapi-rs",
 "lazy_static",
 "libc",
 "object",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -75,6 +75,7 @@ lightbeam = [
    "wasmtime/lightbeam",
 ]
 jitdump = ["wasmtime/jitdump"]
+vtune = ["wasmtime/vtune"]

 [badges]
 maintenance = { status = "actively-developed" }
--- a/crates/api/Cargo.toml
+++ b/crates/api/Cargo.toml
@@ -51,6 +51,9 @@ lightbeam = ["wasmtime-jit/lightbeam"]
 # Enables support for the `perf` jitdump profiler
 jitdump = ["wasmtime-jit/jitdump"]

+# Enables support for the `VTune` profiler
+vtune = ["wasmtime-jit/vtune"]
+
 [[test]]
 name = "host-segfault"
 harness = false
--- a/crates/api/src/runtime.rs
+++ b/crates/api/src/runtime.rs
@@ -8,7 +8,7 @@ use wasmparser::{OperatorValidatorConfig, ValidatingParserConfig};
 use wasmtime_environ::settings::{self, Configurable};
 use wasmtime_environ::CacheConfig;
 use wasmtime_jit::{native, CompilationStrategy, Compiler};
-use wasmtime_profiling::{JitDumpAgent, NullProfilerAgent, ProfilingAgent};
+use wasmtime_profiling::{JitDumpAgent, NullProfilerAgent, ProfilingAgent, VTuneAgent};

 // Runtime Environment

@@ -227,6 +227,7 @@ impl Config {
    pub fn profiler(&mut self, profile: ProfilingStrategy) -> Result<&mut Self> {
        self.profiler = match profile {
            ProfilingStrategy::JitDump => Arc::new(JitDumpAgent::new()?) as Arc<dyn ProfilingAgent>,
+            ProfilingStrategy::VTune => Arc::new(VTuneAgent::new()?) as Arc<dyn ProfilingAgent>,
            ProfilingStrategy::None => Arc::new(NullProfilerAgent),
        };
        Ok(self)
@@ -404,6 +405,9 @@ pub enum ProfilingStrategy {
    /// Collect profiling info for "jitdump" file format, used with `perf` on
    /// Linux.
    JitDump,
+
+    /// Collect profiling info using the "ittapi", used with `VTune` on Linux.
+    VTune,
 }

 // Engine
--- a/crates/c-api/include/wasmtime.h
+++ b/crates/c-api/include/wasmtime.h
@@ -30,6 +30,7 @@ typedef uint8_t wasmtime_profiling_strategy_t;
 enum wasmtime_profiling_strategy_t { // ProfilingStrategy
  WASMTIME_PROFILING_STRATEGY_NONE,
  WASMTIME_PROFILING_STRATEGY_JITDUMP,
+  WASMTIME_PROFILING_STRATEGY_VTUNE,
 };

 #define WASMTIME_CONFIG_PROP(ret, name, ty) \
--- a/crates/environ/src/cache/config.rs
+++ b/crates/environ/src/cache/config.rs
@@ -283,9 +283,7 @@ macro_rules! generate_setting_getter {
        ///
        /// Panics if the cache is disabled.
        pub fn $setting(&self) -> $setting_type {
-            self
-                .$setting
-                .expect(CACHE_IMPROPER_CONFIG_ERROR_MSG)
+            self.$setting.expect(CACHE_IMPROPER_CONFIG_ERROR_MSG)
        }
    };
 }
--- a/crates/jit/Cargo.toml
+++ b/crates/jit/Cargo.toml
@@ -36,6 +36,7 @@ winapi = { version = "0.3.8", features = ["winnt", "impl-default"] }
 [features]
 lightbeam = ["wasmtime-environ/lightbeam"]
 jitdump = ["wasmtime-profiling/jitdump"]
+vtune = ["wasmtime-profiling/vtune"]

 [badges]
 maintenance = { status = "actively-developed" }
--- a/crates/profiling/Cargo.toml
+++ b/crates/profiling/Cargo.toml
@@ -22,9 +22,11 @@ serde = { version = "1.0.99", features = ["derive"] }
 target-lexicon = "0.10.0"
 wasmtime-environ = { path = "../environ", version = "0.14.0" }
 wasmtime-runtime = { path = "../runtime", version = "0.14.0" }
+ittapi-rs = { version = "0.1.5", optional = true }

 [badges]
 maintenance = { status = "actively-developed" }

 [features]
 jitdump = ['object', 'scroll', 'gimli']
+vtune = ['ittapi-rs']
--- a/crates/profiling/src/lib.rs
+++ b/crates/profiling/src/lib.rs
@@ -15,7 +15,18 @@ cfg_if::cfg_if! {
    }
 }

+cfg_if::cfg_if! {
+    if #[cfg(all(feature = "vtune", target_os = "linux"))] {
+        #[path = "vtune_linux.rs"]
+        mod vtune;
+    } else {
+        #[path = "vtune_disabled.rs"]
+        mod vtune;
+    }
+}
+
 pub use crate::jitdump::JitDumpAgent;
+pub use crate::vtune::VTuneAgent;

 /// Common interface for profiling tools.
 pub trait ProfilingAgent: Send + Sync + 'static {
--- a/crates/profiling/src/vtune_disabled.rs
+++ b/crates/profiling/src/vtune_disabled.rs
@@ -0,0 +1,33 @@
+use crate::ProfilingAgent;
+use anyhow::{bail, Result};
+use wasmtime_environ::entity::PrimaryMap;
+use wasmtime_environ::wasm::DefinedFuncIndex;
+use wasmtime_environ::Module;
+use wasmtime_runtime::VMFunctionBody;
+
+/// Interface for driving vtune support
+#[derive(Debug)]
+pub struct VTuneAgent {
+    _private: (),
+}
+
+impl VTuneAgent {
+    /// Intialize a VTuneAgent and write out the header
+    pub fn new() -> Result<Self> {
+        if cfg!(feature = "vtune") {
+            bail!("VTune is not supported on this platform.");
+        } else {
+            bail!("VTune support disabled at compile time.");
+        }
+    }
+}
+
+impl ProfilingAgent for VTuneAgent {
+    fn module_load(
+        &self,
+        _module: &Module,
+        _functions: &PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
+        _dbg_image: Option<&[u8]>,
+    ) {
+    }
+}
--- a/crates/profiling/src/vtune_linux.rs
+++ b/crates/profiling/src/vtune_linux.rs
@@ -0,0 +1,156 @@
+//! Adds support for profiling jitted code using VTune Amplifier
+//!
+//! Build:
+//!     cargo build --features=vtune
+//! Profile:
+//!     amplxe-cl -run-pass-thru=--no-altstack -v -collect hotspots target/debug/wasmtime --vtune test.wasm
+//!
+//! Note: amplxe-cl is a command-line tool for Vtune which should be installed.
+
+use crate::ProfilingAgent;
+use anyhow::Result;
+use core::ptr;
+use ittapi_rs::*;
+use std::collections::HashMap;
+use std::ffi::CString;
+use std::sync::Mutex;
+use wasmtime_environ::entity::PrimaryMap;
+use wasmtime_environ::wasm::DefinedFuncIndex;
+use wasmtime_environ::Module;
+use wasmtime_runtime::VMFunctionBody;
+
+/// Interface for driving the ittapi for VTune support
+pub struct VTuneAgent {
+    // Note that we use a mutex internally to serialize state updates
+    // since multiple threads may be sharing this agent.
+    state: Mutex<State>,
+}
+
+/// Interface for driving vtune
+#[derive(Clone, Debug, Default)]
+struct State {
+    /// Unique identifier for the jitted function
+    method_id: HashMap<(usize, DefinedFuncIndex), u32>,
+}
+
+impl VTuneAgent {
+    /// Intialize a VTuneAgent and write out the header
+    pub fn new() -> Result<Self> {
+        let state = State {
+            method_id: HashMap::new(),
+        };
+        Ok(VTuneAgent {
+            state: Mutex::new(state),
+        })
+    }
+}
+
+impl Drop for VTuneAgent {
+    fn drop(&mut self) {
+        self.state.lock().unwrap().event_shutdown();
+    }
+}
+
+impl State {
+    /// Return the unique method ID for use with the ittapi
+    pub fn get_method_id(&mut self, module_id: usize, func_idx: DefinedFuncIndex) -> u32 {
+        let method_id: u32;
+        unsafe {
+            method_id = iJIT_GetNewMethodID();
+        }
+        assert_eq!(
+            self.method_id.insert((module_id, func_idx), method_id),
+            None
+        );
+        method_id
+    }
+
+    /// Load module
+    pub fn event_load(
+        &mut self,
+        method_id: u32,
+        filename: &str,
+        module_name: &str,
+        method_name: &str,
+        addr: *const u8,
+        len: usize,
+    ) -> () {
+        let mut jmethod = _iJIT_Method_Load {
+            method_id: method_id,
+            method_name: CString::new(method_name)
+                .expect("CString::new failed")
+                .into_raw(),
+            method_load_address: addr as *mut ::std::os::raw::c_void,
+            method_size: len as u32,
+            line_number_size: 0,
+            line_number_table: ptr::null_mut(),
+            class_id: 0,
+            class_file_name: CString::new(module_name)
+                .expect("CString::new failed")
+                .into_raw(),
+            source_file_name: CString::new(filename)
+                .expect("CString::new failed")
+                .into_raw(),
+        };
+        let jmethod_ptr = &mut jmethod as *mut _ as *mut _;
+        unsafe {
+            println!("EventLoad: NotifyEvent Called {}", method_id);
+            let _ret = iJIT_NotifyEvent(
+                iJIT_jvm_event_iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+                jmethod_ptr as *mut ::std::os::raw::c_void,
+            );
+        }
+    }
+
+    /// Shutdown module
+    fn event_shutdown(&mut self) -> () {
+        unsafe {
+            println!("Drop was called!!!!!!\n");
+            let _ret = iJIT_NotifyEvent(iJIT_jvm_event_iJVM_EVENT_TYPE_SHUTDOWN, ptr::null_mut());
+        }
+    }
+}
+
+impl ProfilingAgent for VTuneAgent {
+    fn module_load(
+        &self,
+        module: &Module,
+        functions: &PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
+        dbg_image: Option<&[u8]>,
+    ) {
+        self.state
+            .lock()
+            .unwrap()
+            .module_load(module, functions, dbg_image);
+    }
+}
+
+impl State {
+    fn module_load(
+        &mut self,
+        module: &Module,
+        functions: &PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
+        _dbg_image: Option<&[u8]>,
+    ) -> () {
+        for (idx, func) in functions.iter() {
+            let (addr, len) = unsafe { ((**func).as_ptr() as *const u8, (**func).len()) };
+            let default_filename = "wasm_file";
+            let default_module_name = String::from("wasm_module");
+            let module_name = module.name.as_ref().unwrap_or(&default_module_name);
+            let method_name = super::debug_name(module, idx);
+            let method_id = self.get_method_id(module.id, idx);
+            println!(
+                "Event Load: ({}) {:?}::{:?} Addr:{:?}\n",
+                method_id, module_name, method_name, addr
+            );
+            self.event_load(
+                method_id,
+                default_filename,
+                module_name,
+                &method_name,
+                addr,
+                len,
+            );
+        }
+    }
+}
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -7,6 +7,8 @@
 - [Examples](./examples.md)
  - [Markdown parser](./examples-markdown.md)
  - [Profiling WebAssembly](./examples-profiling.md)
+    - [Profiling with Perf](./examples-profiling-perf.md)
+    - [Profiling with VTune](./examples-profiling-vtune.md)
  - [Embedding in Rust](./examples-rust-embed.md)
    - [Hello, world!](./examples-rust-hello-world.md)
    - [Calculating the GCD](./examples-rust-gcd.md)
--- a/docs/assets/vtune-gui-fib.png
+++ b/docs/assets/vtune-gui-fib.png
--- a/docs/examples-profiling-perf.md
+++ b/docs/examples-profiling-perf.md
@@ -0,0 +1,140 @@
+# Using `perf` on Linux
+
+One profiler supported by Wasmtime is the [`perf`
+profiler](https://perf.wiki.kernel.org/index.php/Main_Page) for Linux. This is
+an extremely powerful profiler with lots of documentation on the web, but for
+the rest of this section we'll assume you're running on Linux and already have
+`perf` installed.
+
+Profiling support with `perf` uses the "jitdump" support in the `perf` CLI. This
+requires runtime support from Wasmtime itself, so you will need to manually
+change a few things to enable profiling support in your application. First
+you'll want to make sure that Wasmtime is compiled with the `jitdump` Cargo
+feature (which is enabled by default). Otherwise enabling runtime support
+depends on how you're using Wasmtime:
+
+* **Rust API** - you'll want to call the [`Config::profiler`] method with
+  `ProfilingStrategy::JitDump` to enable profiling of your wasm modules.
+
+* **C API** - you'll want to call the `wasmtime_config_profiler_set` API with a
+  `WASMTIME_PROFILING_STRATEGY_JITDUMP` value.
+
+* **Command Line** - you'll want to pass the `--jitdump` flag on the command
+  line.
+
+Once jitdump support is enabled, you'll use `perf record` like usual to record
+your application's performance. You'll need to also be sure to pass the
+`--clockid mono` or `-k mono` flag to `perf record`.
+
+For example if you're using the CLI, you'll execute:
+
+```sh
+$ perf record -k mono wasmtime --jitdump foo.wasm
+```
+
+This will create a `perf.data` file as per usual, but it will *also* create a
+`jit-XXXX.dump` file. This extra `*.dump` file is the jitdump file which is
+specified by `perf` and Wasmtime generates at runtime.
+
+The next thing you need to do is to merge the `*.dump` file into the
+`perf.data` file, which you can do with the `perf inject` command:
+
+```sh
+$ perf inject --jit --input perf.data --output perf.jit.data
+```
+
+This will read `perf.data`, automatically pick up the `*.dump` file that's
+correct, and then create `perf.jit.data` which merges all the JIT information
+together. This should also create a lot of `jitted-XXXX-N.so` files in the
+current directory which are ELF images for all the JIT functions that were
+created by Wasmtime.
+
+After that you can explore the `perf.jit.data` profile as you usually would,
+for example with:
+
+```sh
+$ perf report --input perf.jit.data
+```
+
+You should be able to annotate wasm functions and see their raw assembly. You
+should also see entries for wasm functions show up as one function and the
+name of each function matches the debug name section in the wasm file.
+
+Note that support for jitdump is still relatively new in Wasmtime, so if you
+have any problems, please don't hesitate to [file an issue]!
+
+[file an issue]: https://github.com/bytecodealliance/wasmtime/issues/new
+
+### `perf` and DWARF information
+
+If the jitdump profile doesn't give you enough information by default, you can
+also enable dwarf debug information to be generated for JIT code which should
+give the `perf` profiler more information about what's being profiled. This can
+include information like more desriptive function names, filenames, and line
+numbers.
+
+Enabling dwarf debug information for JIT code depends on how you're using
+Wasmtime:
+
+* **Rust API** - you'll want to call the [`Config::debug_info`] method.
+
+* **C API** - you'll want to call the `wasmtime_config_debug_info_set` API.
+
+* **Command Line** - you'll want to pass the `-g` flag on the command line.
+
+You shouldn't need to do anything else to get this information into `perf`. The
+perf collection data should automatically pick up all this dwarf debug
+information.
+
+### `perf` example
+
+Let's run through a quick example with `perf` to get the feel for things. First
+let's take a look at some wasm:
+
+```rust
+fn main() {
+    let n = 42;
+    println!("fib({}) = {}", n, fib(n));
+}
+
+fn fib(n: u32) -> u32 {
+    if n <= 2 {
+        1
+    } else {
+        fib(n - 1) + fib(n - 2)
+    }
+}
+```
+
+To collect perf information for this wasm module we'll execute:
+
+```sh
+$ rustc --target wasm32-wasi fib.rs -O
+$ perf record -k mono wasmtime --jitdump fib.wasm
+fib(42) = 267914296
+[ perf record: Woken up 1 times to write data ]
+[ perf record: Captured and wrote 0.147 MB perf.data (3435 samples) ]
+$ perf inject --jit --input perf.data --output perf.jit.data
+```
+
+And we should have all out information now! We can execute `perf report` for
+example to see that 99% of our runtime (as expected) is spent in our `fib`
+function. Note that the symbol has been demangled to `fib::fib` which is what
+the Rust symbol is:
+
+```sh
+$ perf report --input perf.jit.data
+```
+
+![perf report output](assets/perf-report-fib.png)
+
+Alternatively we could also use `perf annotate` to take a look at the
+disassembly of the `fib` function, seeing what the JIT generated:
+
+```sh
+$ perf annotate --input perf.jit-data
+```
+
+![perf annotate output](assets/perf-annotate-fib.png)
+
+[`Config::debug_info`]: https://bytecodealliance.github.io/wasmtime/api/wasmtime/struct.Config.html#method.debug_info
--- a/docs/examples-profiling-vtune.md
+++ b/docs/examples-profiling-vtune.md
@@ -0,0 +1,81 @@
+# Using `VTune` on Linux
+
+[`VTune Profiler`](https://software.intel.com/en-us/vtune-help) is a popular performance profiling tool that targets both 32-bit and 64-bit x86 architectures. The tool collects profiling data during runtime and then either through command line or gui, provides a variety of options for viewing and doing anaysis on that data. VTune Profiler is available in both commerical and free options. The free download version backed by a community forum for support, is available [`here`](https://software.intel.com/en-us/vtune/choose-download#standalone). This version is appropriate for detailed analysis of your WASM program. Note for jit support, Wasmtime only supports VTune profiling on linux platforms but other platforms are expected to be enabled in the future.
+
+VTune support in wasmtime is provided through the jit profiling APIs at [`https://github.com/intel/ittapi`](https://github.com/intel/ittapi). These APIs are provided for code generators (or the runtimes that use them) to report jit activities. These APIs are implemented in a shared library (built from the same [`ittapi`](https://github.com/intel/ittapi) project) which wasmtime pulls in and links to when vtune support is specified through the `vtune` cargo feature flag. This feature is not enabled by default. When the VTune collector is run, it links to this same shared library to handle profiling request related to the reported jit activities. Specifically, Wasmtime pulls in the ittapi-rs system crate which provides the shared library and Rust interface to the jit profiling APIs.
+
+For jit profiling with VTune Profiler, first you want to make sure the `vtune` feature is enabled. After that, enabling runtime support is based on how you are using Wasmtime:
+
+* **Rust API** - you'll want to call the [`Config::profiler`] method with
+  `ProfilingStrategy::VTune` to enable profiling of your wasm modules.
+
+* **C API** - you'll want to call the `wasmtime_config_profiler_set` API with a
+  `WASMTIME_PROFILING_STRATEGY_VTUNE` value.
+
+* **Command Line** - you'll want to pass the `--vtune` flag on the command
+  line.
+
+After profiling is complete, a results folder will hold profiling data that can then be read and analyzed with VTune.
+
+Also note, VTune is capable of profiling a single process or system wide. As such, and like perf, VTune is plenty capable of profiling the wasmtime runtime itself without any added support. However, APIs [`here`](https://github.com/intel/ittapi) also support an interface for marking the start and stop of code regions for easy isolatation in the VTune Profiler. Support for these APIs are expected to be added in the future.
+
+Take the following example: with VTune properly installed, if you're using the CLI you'll execute with:
+
+```sh
+$ cargo build --features=vtune
+$ amplxe-cl -run-pass-thru=--no-altstack -collect hotspots target/debug/wasmtime --vtune foo.wasm
+```
+
+This command tells the VTune collector (amplxe-cl) to collect hotspot profiling data on wasmtime that is executing foo.wasm. The --vtune flag enables VTune support in wasmtime so that the collector is also alerted to jit events that take place during runtime. The first time this is run, the result of the command is a results diretory r000hs/ which contains hotspot profiling data for wasmtime and the execution of foo.wasm. This data can then be read and displayed via the command line or via the VTune gui by importing the result.
+
+### `VTune` example
+
+Running through a familiar algorithm, first we'll start with the following wasm:
+
+```rust
+fn main() {
+    let n = 45;
+    println!("fib({}) = {}", n, fib(n));
+}
+
+fn fib(n: u32) -> u32 {
+    if n <= 2 {
+        1
+    } else {
+        fib(n - 1) + fib(n - 2)
+    }
+}
+```
+
+Profiling data using vtune can be collected a number of ways and profiling data can be collected to focus
+on certain types of analysis. Below we show a command line executable option using amplxe-cl, which is
+installed and in our path, to help find hotspots in our wasm module. To collect  profiling information then,
+we'll simply execute:
+
+```sh
+$ rustc --target wasm32-wasi fib.rs -C opt-level=z -C lto=yes
+$ amplxe-cl -run-pass-thru=--no-altstack -v -collect hotspots target/debug/wasmtime --vtune fib.wasm
+fib(45) = 1134903170
+amplxe: Collection stopped.
+amplxe: Using result path /home/jlb6740/wasmtime/r000hs
+amplxe: Executing actions  7 % Clearing the database
+amplxe: The database has been cleared, elapsed time is 0.239 seconds.
+amplxe: Executing actions 14 % Updating precomputed scalar metrics
+amplxe: Raw data has been loaded to the database, elapsed time is 0.792 seconds.
+amplxe: Executing actions 19 % Processing profile metrics and debug information
+...
+...
+Top Hotspots
+Function                                                                                      Module          CPU Time
+--------------------------------------------------------------------------------------------  --------------  --------
+h2bacf53cb3845acf                                                                             [Dynamic code]    3.480s
+__memmove_avx_unaligned_erms                                                                  libc.so.6         0.222s
+cranelift_codegen::ir::instructions::InstructionData::opcode::hee6f5b6a72fc684e               wasmtime          0.122s
+core::ptr::slice_from_raw_parts::hc5cb6f1b39a0e7a1                                            wasmtime          0.066s
+_$LT$usize$u20$as$u20$core..slice..SliceIndex$LT$$u5b$T$u5d$$GT$$GT$::get::h70c7f142eeeee8bd  wasmtime          0.066s
+```
+Note again, wasmtime must be built with the `vtune` feature flag enabled. From here you there are several options for further analysis. Below is an example view of the collected as seen in VTune's gui with it's many options.
+
+![vtune report output](assets/vtune-gui-fib.png)
+
+For more information on VTune and the analysis tools it provides see the docs [`here`](https://software.intel.com/en-us/vtune-help).
--- a/docs/examples-profiling.md
+++ b/docs/examples-profiling.md
@@ -6,147 +6,4 @@ well your wasm module is performing! From time to time you might want to dive a
 bit deeper into the performance of your wasm, and this is where profiling comes
 into the picture.

-Profiling support in Wasmtime is still under development, but if you're using a
-supported profiler this example is targeted at helping you get some more
-information about the performance of your wasm.
-
-## Using `perf` on Linux
-
-One profiler supported by Wasmtime is the [`perf`
-profiler](https://perf.wiki.kernel.org/index.php/Main_Page) for Linux. This is
-an extremely powerful profiler with lots of documentation on the web, but for
-the rest of this section we'll assume you're running on Linux and already have
-`perf` installed.
-
-Profiling support with `perf` uses the "jitdump" support in the `perf` CLI. This
-requires runtime support from Wasmtime itself, so you will need to manually
-change a few things to enable profiling support in your application. First
-you'll want to make sure that Wasmtime is compiled with the `jitdump` Cargo
-feature (which is enabled by default). Otherwise enabling runtime support
-depends on how you're using Wasmtime:
-
- **Rust API** - you'll want to call the [`Config::profiler`] method with
-  `ProfilingStrategy::JitDump` to enable profiling of your wasm modules.
-
- **C API** - you'll want to call the `wasmtime_config_profiler_set` API with a
-  `WASMTIME_PROFILING_STRATEGY_JITDUMP` value.
-
- **Command Line** - you'll want to pass the `--jitdump` flag on the command
-  line.
-
-Once jitdump support is enabled, you'll use `perf record` like usual to record
-your application's performance. You'll need to also be sure to pass the
-`--clockid mono` or `-k mono` flag to `perf record`.
-
-For example if you're using the CLI, you'll execute:
-
-```sh
-perf record -k mono wasmtime --jitdump foo.wasm
-```
-
-This will create a `perf.data` file as per usual, but it will _also_ create a
-`jit-XXXX.dump` file. This extra `*.dump` file is the jitdump file which is
-specified by `perf` and Wasmtime generates at runtime.
-
-The next thing you need to do is to merge the `*.dump` file into the
-`perf.data` file, which you can do with the `perf inject` command:
-
-```sh
-perf inject --jit --input perf.data --output perf.jit.data
-```
-
-This will read `perf.data`, automatically pick up the `*.dump` file that's
-correct, and then create `perf.jit.data` which merges all the JIT information
-together. This should also create a lot of `jitted-XXXX-N.so` files in the
-current directory which are ELF images for all the JIT functions that were
-created by Wasmtime.
-
-After that you can explore the `perf.jit.data` profile as you usually would,
-for example with:
-
-```sh
-perf report --input perf.jit.data
-```
-
-You should be able to annotate wasm functions and see their raw assembly. You
-should also see entries for wasm functions show up as one function and the
-name of each function matches the debug name section in the wasm file.
-
-Note that support for jitdump is still relatively new in Wasmtime, so if you
-have any problems, please don't hesitate to [file an issue]!
-
-[file an issue]: https://github.com/bytecodealliance/wasmtime/issues/new
-
-### `perf` and DWARF information
-
-If the jitdump profile doesn't give you enough information by default, you can
-also enable dwarf debug information to be generated for JIT code which should
-give the `perf` profiler more information about what's being profiled. This can
-include information like more desriptive function names, filenames, and line
-numbers.
-
-Enabling dwarf debug information for JIT code depends on how you're using
-Wasmtime:
-
- **Rust API** - you'll want to call the [`Config::debug_info`] method.
-
- **C API** - you'll want to call the `wasmtime_config_debug_info_set` API.
-
- **Command Line** - you'll want to pass the `-g` flag on the command line.
-
-You shouldn't need to do anything else to get this information into `perf`. The
-perf collection data should automatically pick up all this dwarf debug
-information.
-
-### `perf` example
-
-Let's run through a quick example with `perf` to get the feel for things. First
-let's take a look at some wasm:
-
-```rust
-fn main() {
-    let n = 42;
-    println!("fib({}) = {}", n, fib(n));
-}
-
-fn fib(n: u32) -> u32 {
-    if n <= 2 {
-        1
-    } else {
-        fib(n - 1) + fib(n - 2)
-    }
-}
-```
-
-To collect perf information for this wasm module we'll execute:
-
-```sh
-$ rustc --target wasm32-wasi fib.rs -O
-$ perf record -k mono wasmtime --jitdump fib.wasm
-fib(42) = 267914296
-[ perf record: Woken up 1 times to write data ]
-[ perf record: Captured and wrote 0.147 MB perf.data (3435 samples) ]
-$ perf inject --jit --input perf.data --output perf.jit.data
-```
-
-And we should have all out information now! We can execute `perf report` for
-example to see that 99% of our runtime (as expected) is spent in our `fib`
-function. Note that the symbol has been demangled to `fib::fib` which is what
-the Rust symbol is:
-
-```sh
-perf report --input perf.jit.data
-```
-
-![perf report output](assets/perf-report-fib.png)
-
-Alternatively we could also use `perf annotate` to take a look at the
-disassembly of the `fib` function, seeing what the JIT generated:
-
-```sh
-perf annotate --input perf.jit.data
-```
-
-![perf annotate output](assets/perf-annotate-fib.png)
-
-[`config::debug_info`]: https://bytecodealliance.github.io/wasmtime/api/wasmtime/struct.Config.html#method.debug_info
+Profiling support in Wasmtime is still under development, but if you're using either [perf](./examples-profiling-perf.md) or [Vtune](./examples-profiling-vtune.md) the examples in these sections are targeted at helping you get some information about the performance of your wasm modules.
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -43,10 +43,15 @@ fn pick_compilation_strategy(cranelift: bool, lightbeam: bool) -> Result<Strateg
    })
 }

-fn pick_profiling_strategy(jitdump: bool) -> Result<ProfilingStrategy> {
-    Ok(match jitdump {
-        true => ProfilingStrategy::JitDump,
-        false => ProfilingStrategy::None,
+fn pick_profiling_strategy(jitdump: bool, vtune: bool) -> Result<ProfilingStrategy> {
+    Ok(match (jitdump, vtune) {
+        (true, false) => ProfilingStrategy::JitDump,
+        (false, true) => ProfilingStrategy::VTune,
+        (true, true) => {
+            println!("Can't enable --jitdump and --vtune at the same time. Profiling not enabled.");
+            ProfilingStrategy::None
+        }
+        _ => ProfilingStrategy::None,
    })
 }

@@ -128,9 +133,13 @@ struct CommonOptions {
    lightbeam: bool,

    /// Generate jitdump file (supported on --features=profiling build)
-    #[structopt(long)]
+    #[structopt(long, conflicts_with = "vtune")]
    jitdump: bool,

+    /// Generate vtune (supported on --features=vtune build)
+    #[structopt(long, conflicts_with = "jitdump")]
+    vtune: bool,
+
    /// Run optimization passes on translated functions, on by default
    #[structopt(short = "O", long)]
    optimize: bool,
@@ -158,7 +167,7 @@ impl CommonOptions {
            .wasm_threads(self.enable_threads || self.enable_all)
            .cranelift_opt_level(self.opt_level())
            .strategy(pick_compilation_strategy(self.cranelift, self.lightbeam)?)?
-            .profiler(pick_profiling_strategy(self.jitdump)?)?;
+            .profiler(pick_profiling_strategy(self.jitdump, self.vtune)?)?;
        if !self.disable_cache {
            match &self.config {
                Some(path) => {