Support plugging external profilers into the Cranelift timing infrastructure (#5749)

* Remove no-std code for cranelift_codegen::timings

no-std mode isn't supported by Cranelift anymore

* Simplify define_passes macro

* Add egraph opt timings

* Replace the add_to_current api with PassTimes::add

* Omit a couple of unused time measurements

* Reduce divergence between run and run_passes a bit

* Introduce a Profiler trait

This allows plugging in external profilers into the Cranelift profiling
framework.

* Add Pass::description method

* Remove duplicate usage of the compile pass timing

* Rustfmt
This commit is contained in:
bjorn3
2023-03-10 20:33:56 +01:00
committed by GitHub
parent 0751cba6e2
commit 108f7917c8
5 changed files with 181 additions and 181 deletions

View File

@@ -213,7 +213,6 @@ impl Context {
/// ///
/// Returns information about the function's code and read-only data. /// Returns information about the function's code and read-only data.
pub fn compile(&mut self, isa: &dyn TargetIsa) -> CompileResult<&CompiledCode> { pub fn compile(&mut self, isa: &dyn TargetIsa) -> CompileResult<&CompiledCode> {
let _tt = timing::compile();
let stencil = self.compile_stencil(isa).map_err(|error| CompileError { let stencil = self.compile_stencil(isa).map_err(|error| CompileError {
inner: error, inner: error,
func: &self.func, func: &self.func,
@@ -379,6 +378,8 @@ impl Context {
/// Run optimizations via the egraph infrastructure. /// Run optimizations via the egraph infrastructure.
pub fn egraph_pass(&mut self) -> CodegenResult<()> { pub fn egraph_pass(&mut self) -> CodegenResult<()> {
let _tt = timing::egraph();
trace!( trace!(
"About to optimize with egraph phase:\n{}", "About to optimize with egraph phase:\n{}",
self.func.display() self.func.display()

View File

@@ -3,8 +3,11 @@
//! This modules provides facilities for timing the execution of individual compilation passes. //! This modules provides facilities for timing the execution of individual compilation passes.
use core::fmt; use core::fmt;
use std::any::Any;
pub use self::details::{add_to_current, take_current, PassTimes, TimingToken}; use std::boxed::Box;
use std::cell::{Cell, RefCell};
use std::mem;
use std::time::{Duration, Instant};
// Each pass that can be timed is predefined with the `define_passes!` macro. Each pass has a // Each pass that can be timed is predefined with the `define_passes!` macro. Each pass has a
// snake_case name and a plain text description used when printing out the timing report. // snake_case name and a plain text description used when printing out the timing report.
@@ -16,22 +19,26 @@ pub use self::details::{add_to_current, take_current, PassTimes, TimingToken};
// - A const array of pass descriptions. // - A const array of pass descriptions.
// - A public function per pass used to start the timing of that pass. // - A public function per pass used to start the timing of that pass.
macro_rules! define_passes { macro_rules! define_passes {
{ $enum:ident, $num_passes:ident, $descriptions:ident; ($($pass:ident: $desc:expr,)+) => {
$($pass:ident: $desc:expr,)+ /// A single profiled pass.
} => {
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum $enum { $($pass,)+ None} pub enum Pass {
$(#[doc=$desc] $pass,)+
/// No active pass.
None,
}
const $num_passes: usize = $enum::None as usize; /// The amount of profiled passes.
pub const NUM_PASSES: usize = Pass::None as usize;
const $descriptions: [&str; $num_passes] = [ $($desc),+ ]; const DESCRIPTIONS: [&str; NUM_PASSES] = [ $($desc),+ ];
$( $(
#[doc=$desc] #[doc=$desc]
#[must_use] #[must_use]
pub fn $pass() -> TimingToken { pub fn $pass() -> Box<dyn Any> {
details::start_pass($enum::$pass) start_pass(Pass::$pass)
} }
)+ )+
} }
@@ -39,8 +46,6 @@ macro_rules! define_passes {
// Pass definitions. // Pass definitions.
define_passes! { define_passes! {
Pass, NUM_PASSES, DESCRIPTIONS;
// All these are used in other crates but defined here so they appear in the unified // All these are used in other crates but defined here so they appear in the unified
// `PassTimes` output. // `PassTimes` output.
process_file: "Processing test file", process_file: "Processing test file",
@@ -58,6 +63,7 @@ define_passes! {
loop_analysis: "Loop analysis", loop_analysis: "Loop analysis",
preopt: "Pre-legalization rewriting", preopt: "Pre-legalization rewriting",
dce: "Dead code elimination", dce: "Dead code elimination",
egraph: "Egraph based optimizations",
gvn: "Global value numbering", gvn: "Global value numbering",
licm: "Loop invariant code motion", licm: "Loop invariant code motion",
unreachable_code: "Remove unreachable blocks", unreachable_code: "Remove unreachable blocks",
@@ -75,39 +81,61 @@ define_passes! {
} }
impl Pass { impl Pass {
pub fn idx(self) -> usize { fn idx(self) -> usize {
self as usize self as usize
} }
/// Description of the pass.
pub fn description(self) -> &'static str {
match DESCRIPTIONS.get(self.idx()) {
Some(s) => s,
None => "<no pass>",
}
}
} }
impl fmt::Display for Pass { impl fmt::Display for Pass {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match DESCRIPTIONS.get(self.idx()) { f.write_str(self.description())
Some(s) => f.write_str(s),
None => f.write_str("<no pass>"),
}
} }
} }
/// Implementation details. /// A profiler.
pub trait Profiler {
/// Start a profiling pass.
/// ///
/// This whole module can be gated on a `cfg` feature to provide a dummy implementation for /// Will return a token which when dropped indicates the end of the pass.
/// performance-sensitive builds or restricted environments. The dummy implementation must provide ///
/// `TimingToken` and `PassTimes` types and `take_current`, `add_to_current`, and `start_pass` funcs /// Multiple passes can be active at the same time, but they must be started and stopped in a
#[cfg(feature = "std")] /// LIFO fashion.
mod details { fn start_pass(&self, pass: Pass) -> Box<dyn Any>;
use super::{Pass, DESCRIPTIONS, NUM_PASSES}; }
use std::cell::{Cell, RefCell};
use std::fmt; // Information about passes in a single thread.
use std::mem; thread_local! {
use std::time::{Duration, Instant}; static PROFILER: RefCell<Box<dyn Profiler>> = RefCell::new(Box::new(DefaultProfiler));
}
/// Set the profiler for the current thread.
///
/// Returns the old profiler.
pub fn set_thread_profiler(new_profiler: Box<dyn Profiler>) -> Box<dyn Profiler> {
PROFILER.with(|profiler| std::mem::replace(&mut *profiler.borrow_mut(), new_profiler))
}
/// Start timing `pass` as a child of the currently running pass, if any.
///
/// This function is called by the publicly exposed pass functions.
fn start_pass(pass: Pass) -> Box<dyn Any> {
PROFILER.with(|profiler| profiler.borrow().start_pass(pass))
}
/// A timing token is responsible for timing the currently running pass. Timing starts when it /// A timing token is responsible for timing the currently running pass. Timing starts when it
/// is created and ends when it is dropped. /// is created and ends when it is dropped.
/// ///
/// Multiple passes can be active at the same time, but they must be started and stopped in a /// Multiple passes can be active at the same time, but they must be started and stopped in a
/// LIFO fashion. /// LIFO fashion.
pub struct TimingToken { struct DefaultTimingToken {
/// Start time for this pass. /// Start time for this pass.
start: Instant, start: Instant,
@@ -134,6 +162,14 @@ mod details {
} }
impl PassTimes { impl PassTimes {
/// Add `other` to the timings of this `PassTimes`.
pub fn add(&mut self, other: &Self) {
for (a, b) in self.pass.iter_mut().zip(&other.pass[..]) {
a.total += b.total;
a.child += b.child;
}
}
/// Returns the total amount of time taken by all the passes measured. /// Returns the total amount of time taken by all the passes measured.
pub fn total(&self) -> Duration { pub fn total(&self) -> Duration {
self.pass.iter().map(|p| p.total - p.child).sum() self.pass.iter().map(|p| p.total - p.child).sum()
@@ -183,21 +219,23 @@ mod details {
static PASS_TIME: RefCell<PassTimes> = RefCell::new(Default::default()); static PASS_TIME: RefCell<PassTimes> = RefCell::new(Default::default());
} }
/// Start timing `pass` as a child of the currently running pass, if any. /// The default profiler. You can get the results using [`take_current`].
/// pub struct DefaultProfiler;
/// This function is called by the publicly exposed pass functions.
pub(super) fn start_pass(pass: Pass) -> TimingToken { impl Profiler for DefaultProfiler {
fn start_pass(&self, pass: Pass) -> Box<dyn Any> {
let prev = CURRENT_PASS.with(|p| p.replace(pass)); let prev = CURRENT_PASS.with(|p| p.replace(pass));
log::debug!("timing: Starting {}, (during {})", pass, prev); log::debug!("timing: Starting {}, (during {})", pass, prev);
TimingToken { Box::new(DefaultTimingToken {
start: Instant::now(), start: Instant::now(),
pass, pass,
prev, prev,
})
} }
} }
/// Dropping a timing token indicated the end of the pass. /// Dropping a timing token indicated the end of the pass.
impl Drop for TimingToken { impl Drop for DefaultTimingToken {
fn drop(&mut self) { fn drop(&mut self) {
let duration = self.start.elapsed(); let duration = self.start.elapsed();
log::debug!("timing: Ending {}", self.pass); log::debug!("timing: Ending {}", self.pass);
@@ -214,40 +252,10 @@ mod details {
} }
/// Take the current accumulated pass timings and reset the timings for the current thread. /// Take the current accumulated pass timings and reset the timings for the current thread.
///
/// Only applies when [`DefaultProfiler`] is used.
pub fn take_current() -> PassTimes { pub fn take_current() -> PassTimes {
PASS_TIME.with(|rc| mem::replace(&mut *rc.borrow_mut(), Default::default())) PASS_TIME.with(|rc| mem::take(&mut *rc.borrow_mut()))
}
/// Add `timings` to the accumulated timings for the current thread.
pub fn add_to_current(times: &PassTimes) {
PASS_TIME.with(|rc| {
for (a, b) in rc.borrow_mut().pass.iter_mut().zip(&times.pass[..]) {
a.total += b.total;
a.child += b.child;
}
})
}
}
/// Dummy `debug` implementation
#[cfg(not(feature = "std"))]
mod details {
use super::Pass;
/// Dummy `TimingToken`
pub struct TimingToken;
/// Dummy `PassTimes`
pub struct PassTimes;
/// Returns dummy `PassTimes`
pub fn take_current() -> PassTimes {
PassTimes
}
/// does nothing
pub fn add_to_current(_times: PassTimes) {}
/// does nothing
pub(super) fn start_pass(_pass: Pass) -> TimingToken {
TimingToken
}
} }
#[cfg(test)] #[cfg(test)]

View File

@@ -82,14 +82,16 @@ impl ConcurrentRunner {
/// Join all the worker threads. /// Join all the worker threads.
/// Transfer pass timings from the worker threads to the current thread. /// Transfer pass timings from the worker threads to the current thread.
pub fn join(&mut self) { pub fn join(&mut self) -> timing::PassTimes {
assert!(self.request_tx.is_none(), "must shutdown before join"); assert!(self.request_tx.is_none(), "must shutdown before join");
let mut pass_times = timing::PassTimes::default();
for h in self.handles.drain(..) { for h in self.handles.drain(..) {
match h.join() { match h.join() {
Ok(t) => timing::add_to_current(&t), Ok(t) => pass_times.add(&t),
Err(e) => println!("worker panicked: {:?}", e), Err(e) => println!("worker panicked: {:?}", e),
} }
} }
pass_times
} }
/// Add a new job to the queues. /// Add a new job to the queues.

View File

@@ -24,10 +24,8 @@
pub use crate::function_runner::TestFileCompiler; pub use crate::function_runner::TestFileCompiler;
use crate::runner::TestRunner; use crate::runner::TestRunner;
use cranelift_codegen::timing;
use cranelift_reader::TestCommand; use cranelift_reader::TestCommand;
use std::path::Path; use std::path::Path;
use std::time;
mod concurrent; mod concurrent;
pub mod function_runner; pub mod function_runner;
@@ -63,7 +61,7 @@ mod test_wasm;
/// Directories are scanned recursively for test cases ending in `.clif`. These test cases are /// Directories are scanned recursively for test cases ending in `.clif`. These test cases are
/// executed on background threads. /// executed on background threads.
/// ///
pub fn run(verbose: bool, report_times: bool, files: &[String]) -> anyhow::Result<time::Duration> { pub fn run(verbose: bool, report_times: bool, files: &[String]) -> anyhow::Result<()> {
let mut runner = TestRunner::new(verbose, report_times); let mut runner = TestRunner::new(verbose, report_times);
for path in files.iter().map(Path::new) { for path in files.iter().map(Path::new) {
@@ -89,8 +87,8 @@ pub fn run_passes(
passes: &[String], passes: &[String],
target: &str, target: &str,
file: &str, file: &str,
) -> anyhow::Result<time::Duration> { ) -> anyhow::Result<()> {
let mut runner = TestRunner::new(verbose, /* report_times */ false); let mut runner = TestRunner::new(verbose, report_times);
let path = Path::new(file); let path = Path::new(file);
if path == Path::new("-") || path.is_file() { if path == Path::new("-") || path.is_file() {
@@ -99,11 +97,8 @@ pub fn run_passes(
runner.push_dir(path); runner.push_dir(path);
} }
let result = runner.run_passes(passes, target); runner.start_threads();
if report_times { runner.run_passes(passes, target)
println!("{}", timing::take_current());
}
result
} }
/// Create a new subcommand trait object to match `parsed.command`. /// Create a new subcommand trait object to match `parsed.command`.

View File

@@ -5,7 +5,6 @@
use crate::concurrent::{ConcurrentRunner, Reply}; use crate::concurrent::{ConcurrentRunner, Reply};
use crate::runone; use crate::runone;
use cranelift_codegen::timing;
use std::error::Error; use std::error::Error;
use std::ffi::OsStr; use std::ffi::OsStr;
use std::fmt::{self, Display}; use std::fmt::{self, Display};
@@ -298,9 +297,9 @@ impl TestRunner {
None => break, None => break,
} }
} }
conc.join(); let pass_times = conc.join();
if self.report_times { if self.report_times {
println!("{}", timing::take_current()); println!("{}", pass_times);
} }
} }
} }
@@ -355,8 +354,7 @@ impl TestRunner {
} }
/// Scan pushed directories for tests and run them. /// Scan pushed directories for tests and run them.
pub fn run(&mut self) -> anyhow::Result<time::Duration> { pub fn run(&mut self) -> anyhow::Result<()> {
let started = time::Instant::now();
self.scan_dirs(IsPass::NotPass); self.scan_dirs(IsPass::NotPass);
self.schedule_jobs(); self.schedule_jobs();
self.report_slow_tests(); self.report_slow_tests();
@@ -364,26 +362,22 @@ impl TestRunner {
println!("{} tests", self.tests.len()); println!("{} tests", self.tests.len());
match self.errors { match self.errors {
0 => Ok(started.elapsed()), 0 => Ok(()),
1 => anyhow::bail!("1 failure"), 1 => anyhow::bail!("1 failure"),
n => anyhow::bail!("{} failures", n), n => anyhow::bail!("{} failures", n),
} }
} }
/// Scan pushed directories for tests and run specified passes from commandline on them. /// Scan pushed directories for tests and run specified passes from commandline on them.
pub fn run_passes( pub fn run_passes(&mut self, passes: &[String], target: &str) -> anyhow::Result<()> {
&mut self,
passes: &[String],
target: &str,
) -> anyhow::Result<time::Duration> {
let started = time::Instant::now();
self.scan_dirs(IsPass::Pass); self.scan_dirs(IsPass::Pass);
self.schedule_pass_job(passes, target); self.schedule_pass_job(passes, target);
self.report_slow_tests(); self.report_slow_tests();
self.drain_threads();
println!("{} tests", self.tests.len()); println!("{} tests", self.tests.len());
match self.errors { match self.errors {
0 => Ok(started.elapsed()), 0 => Ok(()),
1 => anyhow::bail!("1 failure"), 1 => anyhow::bail!("1 failure"),
n => anyhow::bail!("{} failures", n), n => anyhow::bail!("{} failures", n),
} }