diff --git a/cranelift/src/cton-util.rs b/cranelift/src/cton-util.rs index b08fa8cde3..fe121d0055 100644 --- a/cranelift/src/cton-util.rs +++ b/cranelift/src/cton-util.rs @@ -10,7 +10,7 @@ extern crate num_cpus; extern crate tempdir; extern crate term; -use cretonne::VERSION; +use cretonne::{VERSION, timing}; use docopt::Docopt; use std::io::{self, Write}; use std::process; @@ -27,16 +27,18 @@ const USAGE: &str = " Cretonne code generator utility Usage: - cton-util test [-v] ... + cton-util test [-vT] ... cton-util cat ... cton-util filecheck [-v] cton-util print-cfg ... - cton-util compile [-vp] [--set ]... [--isa ] ... - cton-util wasm [-ctvp] [--set ]... [--isa ] ... + cton-util compile [-vpT] [--set ]... [--isa ] ... + cton-util wasm [-ctvpT] [--set ]... [--isa ] ... cton-util --help | --version Options: -v, --verbose be more verbose + -T, --time-passes + print pass timing report -t, --just-decode just decode WebAssembly to Cretonne IL -c, --check-translation @@ -64,6 +66,7 @@ struct Args { flag_verbose: bool, flag_set: Vec, flag_isa: String, + flag_time_passes: bool, } /// A command either succeeds or fails with an error message. @@ -81,7 +84,7 @@ fn cton_util() -> CommandResult { .unwrap_or_else(|e| e.exit()); // Find the sub-command to execute. - if args.cmd_test { + let result = if args.cmd_test { filetest::run(args.flag_verbose, args.arg_file) } else if args.cmd_cat { cat::run(args.arg_file) @@ -104,7 +107,13 @@ fn cton_util() -> CommandResult { } else { // Debugging / shouldn't happen with proper command line handling above. Err(format!("Unhandled args: {:?}", args)) + }; + + if args.flag_time_passes { + print!("{}", timing::take_current()); } + + result } fn main() { diff --git a/cranelift/src/filetest/concurrent.rs b/cranelift/src/filetest/concurrent.rs index 4f7cca6935..cb188577e0 100644 --- a/cranelift/src/filetest/concurrent.rs +++ b/cranelift/src/filetest/concurrent.rs @@ -3,6 +3,7 @@ //! This module provides the `ConcurrentRunner` struct which uses a pool of threads to run tests //! concurrently. +use cretonne::timing; use std::panic::catch_unwind; use std::path::{Path, PathBuf}; use std::sync::mpsc::{channel, Sender, Receiver}; @@ -33,7 +34,7 @@ pub struct ConcurrentRunner { // Workers have their own `Sender`. reply_rx: Receiver, - handles: Vec>, + handles: Vec>, } impl ConcurrentRunner { @@ -64,11 +65,13 @@ impl ConcurrentRunner { } /// Join all the worker threads. + /// Transfer pass timings from the worker threads to the current thread. pub fn join(&mut self) { assert!(self.request_tx.is_none(), "must shutdown before join"); for h in self.handles.drain(..) { - if let Err(e) = h.join() { - println!("worker panicked: {:?}", e); + match h.join() { + Ok(t) => timing::add_to_current(t), + Err(e) => println!("worker panicked: {:?}", e), } } } @@ -109,7 +112,7 @@ fn worker_thread( thread_num: usize, requests: Arc>>, replies: Sender, -) -> thread::JoinHandle<()> { +) -> thread::JoinHandle { thread::Builder::new() .name(format!("worker #{}", thread_num)) .spawn(move || { @@ -142,6 +145,10 @@ fn worker_thread( replies.send(Reply::Done { jobid, result }).unwrap(); } + + // Timing is accumulated independently per thread. + // Timings from this worker thread will be aggregated by `ConcurrentRunner::join()`. + timing::take_current() }) .unwrap() } diff --git a/cranelift/src/filetest/runone.rs b/cranelift/src/filetest/runone.rs index 67ea25b08d..1dc8eb9849 100644 --- a/cranelift/src/filetest/runone.rs +++ b/cranelift/src/filetest/runone.rs @@ -6,6 +6,7 @@ use std::time; use cretonne::ir::Function; use cretonne::isa::TargetIsa; use cretonne::settings::Flags; +use cretonne::timing; use cretonne::verify_function; use cton_reader::parse_test; use cton_reader::IsaSpec; @@ -17,6 +18,7 @@ use filetest::subtest::{SubTest, Context, Result}; /// /// If running this test causes a panic, it will propagate as normal. pub fn run(path: &Path) -> TestResult { + let _tt = timing::process_file(); dbg!("---\nFile: {}", path.to_string_lossy()); let started = time::Instant::now(); let buffer = read_to_string(path).map_err(|e| e.to_string())?; @@ -71,7 +73,6 @@ pub fn run(path: &Path) -> TestResult { } - // TODO: Actually run the tests. Ok(started.elapsed()) } diff --git a/lib/cretonne/src/context.rs b/lib/cretonne/src/context.rs index aa900ff9e1..221481a039 100644 --- a/lib/cretonne/src/context.rs +++ b/lib/cretonne/src/context.rs @@ -23,6 +23,7 @@ use unreachable_code::eliminate_unreachable_code; use verifier; use simple_gvn::do_simple_gvn; use licm::do_licm; +use timing; /// Persistent data structures and compilation pipeline. pub struct Context { @@ -74,6 +75,7 @@ impl Context { /// /// Returns the size of the function's code. pub fn compile(&mut self, isa: &TargetIsa) -> Result { + let _tt = timing::compile(); self.verify_if(isa)?; self.compute_cfg(); @@ -100,6 +102,7 @@ impl Context { /// /// The machine code is not relocated. Instead, any relocations are emitted into `relocs`. pub fn emit_to_memory(&self, mem: *mut u8, relocs: &mut RelocSink, isa: &TargetIsa) { + let _tt = timing::binemit(); isa.emit_function(&self.func, &mut MemoryCodeSink::new(mem, relocs)); } diff --git a/lib/cretonne/src/dominator_tree.rs b/lib/cretonne/src/dominator_tree.rs index d9f2362e3b..0da024f36d 100644 --- a/lib/cretonne/src/dominator_tree.rs +++ b/lib/cretonne/src/dominator_tree.rs @@ -5,6 +5,7 @@ use flowgraph::{ControlFlowGraph, BasicBlock}; use ir::{Ebb, Inst, Function, Layout, ProgramOrder, ExpandedProgramPoint}; use ir::instructions::BranchInfo; use packed_option::PackedOption; +use timing; use std::cmp::Ordering; @@ -227,6 +228,7 @@ impl DominatorTree { /// Reset and compute a CFG post-order and dominator tree. pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) { + let _tt = timing::domtree(); debug_assert!(cfg.is_valid()); self.compute_postorder(func); self.compute_domtree(func, cfg); diff --git a/lib/cretonne/src/flowgraph.rs b/lib/cretonne/src/flowgraph.rs index 10eff77857..d1e45312ac 100644 --- a/lib/cretonne/src/flowgraph.rs +++ b/lib/cretonne/src/flowgraph.rs @@ -28,13 +28,14 @@ use ir::{Function, Inst, Ebb}; use ir::instructions::BranchInfo; use entity::EntityMap; use std::mem; +use timing; /// A basic block denoted by its enclosing Ebb and last instruction. pub type BasicBlock = (Ebb, Inst); /// A container for the successors and predecessors of some Ebb. #[derive(Clone, Default)] -pub struct CFGNode { +struct CFGNode { /// Instructions that can branch or jump to this EBB. /// /// This maps branch instruction -> predecessor EBB which is redundant since the EBB containing @@ -94,6 +95,7 @@ impl ControlFlowGraph { /// /// This will clear and overwrite any information already stored in this data structure. pub fn compute(&mut self, func: &Function) { + let _tt = timing::flowgraph(); self.clear(); self.data.resize(func.dfg.num_ebbs()); diff --git a/lib/cretonne/src/isa/intel/mod.rs b/lib/cretonne/src/isa/intel/mod.rs index 96048c5fbd..b4ca044c2f 100644 --- a/lib/cretonne/src/isa/intel/mod.rs +++ b/lib/cretonne/src/isa/intel/mod.rs @@ -14,6 +14,7 @@ use isa::{TargetIsa, RegInfo, RegClass, EncInfo}; use ir; use regalloc; use result; +use timing; #[allow(dead_code)] @@ -115,6 +116,7 @@ impl TargetIsa for Isa { } fn prologue_epilogue(&self, func: &mut ir::Function) -> result::CtonResult { + let _tt = timing::prologue_epilogue(); abi::prologue_epilogue(func, self) } } diff --git a/lib/cretonne/src/isa/mod.rs b/lib/cretonne/src/isa/mod.rs index e2f28ed119..ccfced715e 100644 --- a/lib/cretonne/src/isa/mod.rs +++ b/lib/cretonne/src/isa/mod.rs @@ -51,6 +51,7 @@ use settings; use ir; use regalloc; use result; +use timing; use isa::enc_tables::Encodings; #[cfg(build_riscv)] @@ -236,6 +237,7 @@ pub trait TargetIsa { /// /// Return an error if the stack frame is too large. fn prologue_epilogue(&self, func: &mut ir::Function) -> result::CtonResult { + let _tt = timing::prologue_epilogue(); // This default implementation is unlikely to be good enough. use stack_layout::layout_stack; use ir::stackslot::{StackSize, StackOffset}; diff --git a/lib/cretonne/src/legalizer/mod.rs b/lib/cretonne/src/legalizer/mod.rs index 4a6ff6c337..2781f5fe84 100644 --- a/lib/cretonne/src/legalizer/mod.rs +++ b/lib/cretonne/src/legalizer/mod.rs @@ -18,6 +18,7 @@ use flowgraph::ControlFlowGraph; use ir::{self, InstBuilder}; use isa::TargetIsa; use bitset::BitSet; +use timing; mod boundary; mod globalvar; @@ -33,6 +34,7 @@ use self::heap::expand_heap_addr; /// - Fill out `func.encodings`. /// pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &TargetIsa) { + let _tt = timing::legalize(); debug_assert!(cfg.is_valid()); boundary::legalize_signatures(func, isa); diff --git a/lib/cretonne/src/lib.rs b/lib/cretonne/src/lib.rs index 5de3c1dba3..9abffe0abc 100644 --- a/lib/cretonne/src/lib.rs +++ b/lib/cretonne/src/lib.rs @@ -28,6 +28,7 @@ pub mod packed_option; pub mod regalloc; pub mod result; pub mod settings; +pub mod timing; pub mod verifier; mod abi; diff --git a/lib/cretonne/src/licm.rs b/lib/cretonne/src/licm.rs index 628f8e0aaa..1eb035d9bf 100644 --- a/lib/cretonne/src/licm.rs +++ b/lib/cretonne/src/licm.rs @@ -7,6 +7,7 @@ use std::collections::HashSet; use dominator_tree::DominatorTree; use entity::{EntityList, ListPool}; use loop_analysis::{Loop, LoopAnalysis}; +use timing; /// Performs the LICM pass by detecting loops within the CFG and moving /// loop-invariant instructions out of them. @@ -17,6 +18,7 @@ pub fn do_licm( domtree: &mut DominatorTree, loop_analysis: &mut LoopAnalysis, ) { + let _tt = timing::licm(); debug_assert!(cfg.is_valid()); debug_assert!(domtree.is_valid()); debug_assert!(loop_analysis.is_valid()); diff --git a/lib/cretonne/src/loop_analysis.rs b/lib/cretonne/src/loop_analysis.rs index 173ceb46ee..f6dee54d71 100644 --- a/lib/cretonne/src/loop_analysis.rs +++ b/lib/cretonne/src/loop_analysis.rs @@ -7,6 +7,7 @@ use entity::EntityMap; use flowgraph::ControlFlowGraph; use ir::{Function, Ebb, Layout}; use packed_option::PackedOption; +use timing; /// A opaque reference to a code loop. #[derive(Copy, Clone, PartialEq, Eq, Hash)] @@ -98,6 +99,7 @@ impl LoopAnalysis { impl LoopAnalysis { /// Detects the loops in a function. Needs the control flow graph and the dominator tree. pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph, domtree: &DominatorTree) { + let _tt = timing::loop_analysis(); self.loops.clear(); self.ebb_loop_map.clear(); self.ebb_loop_map.resize(func.dfg.num_ebbs()); diff --git a/lib/cretonne/src/regalloc/coalescing.rs b/lib/cretonne/src/regalloc/coalescing.rs index 31228f680a..3ce58821bc 100644 --- a/lib/cretonne/src/regalloc/coalescing.rs +++ b/lib/cretonne/src/regalloc/coalescing.rs @@ -18,6 +18,7 @@ use std::cmp::Ordering; use std::iter::Peekable; use std::mem; use isa::{TargetIsa, EncInfo}; +use timing; /// Dominator forest. /// @@ -282,6 +283,7 @@ impl Coalescing { liveness: &mut Liveness, virtregs: &mut VirtRegs, ) { + let _tt = timing::ra_cssa(); dbg!("Coalescing for:\n{}", func.display(isa)); let mut context = Context { isa, diff --git a/lib/cretonne/src/regalloc/coloring.rs b/lib/cretonne/src/regalloc/coloring.rs index 25b271d17e..2f52dfaabc 100644 --- a/lib/cretonne/src/regalloc/coloring.rs +++ b/lib/cretonne/src/regalloc/coloring.rs @@ -57,6 +57,7 @@ use regalloc::liveness::Liveness; use regalloc::liverange::{LiveRange, LiveRangeContext}; use regalloc::solver::{Solver, SolverError}; use std::mem; +use timing; /// Data structures for the coloring pass. @@ -123,6 +124,7 @@ impl Coloring { liveness: &mut Liveness, tracker: &mut LiveValueTracker, ) { + let _tt = timing::ra_coloring(); dbg!("Coloring for:\n{}", func.display(isa)); let mut ctx = Context { usable_regs: isa.allocatable_registers(func), diff --git a/lib/cretonne/src/regalloc/context.rs b/lib/cretonne/src/regalloc/context.rs index d101cce30b..765ecf155c 100644 --- a/lib/cretonne/src/regalloc/context.rs +++ b/lib/cretonne/src/regalloc/context.rs @@ -16,6 +16,7 @@ use regalloc::reload::Reload; use regalloc::spilling::Spilling; use regalloc::virtregs::VirtRegs; use result::CtonResult; +use timing; use topo_order::TopoOrder; use verifier::{verify_context, verify_liveness, verify_cssa, verify_locations}; @@ -72,6 +73,7 @@ impl Context { cfg: &ControlFlowGraph, domtree: &mut DominatorTree, ) -> CtonResult { + let _tt = timing::regalloc(); debug_assert!(domtree.is_valid()); // `Liveness` and `Coloring` are self-clearing. diff --git a/lib/cretonne/src/regalloc/liveness.rs b/lib/cretonne/src/regalloc/liveness.rs index 20208e36a1..6eee4f0c8c 100644 --- a/lib/cretonne/src/regalloc/liveness.rs +++ b/lib/cretonne/src/regalloc/liveness.rs @@ -184,6 +184,7 @@ use regalloc::affinity::Affinity; use regalloc::liverange::{LiveRange, LiveRangeForest, LiveRangeContext}; use std::mem; use std::ops::Index; +use timing; /// A set of live ranges, indexed by value number. type LiveRangeSet = SparseMap; @@ -385,6 +386,7 @@ impl Liveness { /// Compute the live ranges of all SSA values used in `func`. /// This clears out any existing analysis stored in this data structure. pub fn compute(&mut self, isa: &TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) { + let _tt = timing::ra_liveness(); self.ranges.clear(); // Get ISA data structures used for computing live range affinities. diff --git a/lib/cretonne/src/regalloc/reload.rs b/lib/cretonne/src/regalloc/reload.rs index aa43d6771a..ab9798d4c9 100644 --- a/lib/cretonne/src/regalloc/reload.rs +++ b/lib/cretonne/src/regalloc/reload.rs @@ -19,6 +19,7 @@ use isa::{TargetIsa, Encoding, EncInfo, RecipeConstraints, ConstraintKind}; use regalloc::affinity::Affinity; use regalloc::live_value_tracker::{LiveValue, LiveValueTracker}; use regalloc::liveness::Liveness; +use timing; use topo_order::TopoOrder; /// Reusable data structures for the reload pass. @@ -69,6 +70,7 @@ impl Reload { topo: &mut TopoOrder, tracker: &mut LiveValueTracker, ) { + let _tt = timing::ra_reload(); dbg!("Reload for:\n{}", func.display(isa)); let mut ctx = Context { cur: EncCursor::new(func, isa), diff --git a/lib/cretonne/src/regalloc/spilling.rs b/lib/cretonne/src/regalloc/spilling.rs index 0bbb4c4225..6eef88141d 100644 --- a/lib/cretonne/src/regalloc/spilling.rs +++ b/lib/cretonne/src/regalloc/spilling.rs @@ -26,6 +26,7 @@ use regalloc::liveness::Liveness; use regalloc::pressure::Pressure; use regalloc::virtregs::VirtRegs; use std::fmt; +use timing; use topo_order::TopoOrder; /// Persistent data structures for the spilling pass. @@ -87,6 +88,7 @@ impl Spilling { topo: &mut TopoOrder, tracker: &mut LiveValueTracker, ) { + let _tt = timing::ra_spilling(); dbg!("Spilling for:\n{}", func.display(isa)); let reginfo = isa.register_info(); let usable_regs = isa.allocatable_registers(func); diff --git a/lib/cretonne/src/simple_gvn.rs b/lib/cretonne/src/simple_gvn.rs index 708a287904..8642122593 100644 --- a/lib/cretonne/src/simple_gvn.rs +++ b/lib/cretonne/src/simple_gvn.rs @@ -5,6 +5,7 @@ use flowgraph::ControlFlowGraph; use dominator_tree::DominatorTree; use ir::{InstructionData, Function, Inst, Opcode, Type}; use scoped_hash_map::ScopedHashMap; +use timing; /// Test whether the given opcode is unsafe to even consider for GVN. fn trivially_unsafe_for_gvn(opcode: Opcode) -> bool { @@ -16,6 +17,7 @@ fn trivially_unsafe_for_gvn(opcode: Opcode) -> bool { /// Perform simple GVN on `func`. /// pub fn do_simple_gvn(func: &mut Function, cfg: &mut ControlFlowGraph, domtree: &mut DominatorTree) { + let _tt = timing::gvn(); debug_assert!(cfg.is_valid()); debug_assert!(domtree.is_valid()); diff --git a/lib/cretonne/src/timing.rs b/lib/cretonne/src/timing.rs new file mode 100644 index 0000000000..e1855cf65a --- /dev/null +++ b/lib/cretonne/src/timing.rs @@ -0,0 +1,225 @@ +//! Pass timing. +//! +//! This modules provides facilities for timing the execution of individual compilation passes. + +use std::fmt; + +pub use self::details::{TimingToken, PassTimes, take_current, add_to_current}; + +// Each pass that can be timed is predefined with the `define_passes!` macro. Each pass has a +// snake_case name and a plain text description used when printing out the timing report. +// +// This macro defines: +// +// - A C-style enum containing all the pass names and a `NoPass` variant. +// - A usize constant with the number of defined passes. +// - A const array of pass descriptions. +// - A public function per pass used to start the timing of that pass. +macro_rules! define_passes { + { $enum:ident, $num_passes:ident, $descriptions:ident; + $($pass:ident: $desc:expr,)+ + } => { + #[allow(non_camel_case_types)] + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + enum $enum { $($pass,)+ NoPass } + + const $num_passes: usize = $enum::NoPass as usize; + + const $descriptions: [&str; $num_passes] = [ $($desc),+ ]; + + $( + #[doc=$desc] + pub fn $pass() -> TimingToken { + details::start_pass($enum::$pass) + } + )+ + } +} + +// Pass definitions. +define_passes!{ + Pass, NUM_PASSES, DESCRIPTIONS; + + process_file: "Processing test file", + parse_text: "Parsing textual Cretonne IL", + wasm_translate_module: "Translate WASM module", + wasm_translate_function: "Translate WASM function", + + verifier: "Verify Cretonne IL", + verify_cssa: "Verify CSSA", + verify_liveness: "Verify live ranges", + verify_locations: "Verify value locations", + verify_flags: "Verify CPU flags", + + compile: "Compilation passes", + flowgraph: "Control flow graph", + domtree: "Dominator tree", + loop_analysis: "Loop analysis", + legalize: "Legalization", + gvn: "Global value numbering", + licm: "Loop invariant code motion", + unreachable_code: "Remove unreachable blocks", + + regalloc: "Register allocation", + ra_liveness: "RA liveness analysis", + ra_cssa: "RA coalescing CSSA", + ra_spilling: "RA spilling", + ra_reload: "RA reloading", + ra_coloring: "RA coloring", + + prologue_epilogue: "Prologue/epilogue insertion", + binemit: "Binary machine code emission", +} + +impl Pass { + pub fn idx(self) -> usize { + self as usize + } +} + +impl fmt::Display for Pass { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match DESCRIPTIONS.get(self.idx()) { + Some(s) => f.write_str(s), + None => f.write_str(""), + } + } +} + + +/// Implementation details. +/// +/// This whole module can be gated on a `cfg` feature to provide a dummy implementation for +/// performance-sensitive builds or restricted environments. The dummy implementation must provide +/// `TimingToken` and `PassTimings` types and a `take_current` function. +mod details { + use super::{Pass, NUM_PASSES, DESCRIPTIONS}; + use std::cell::{Cell, RefCell}; + use std::fmt; + use std::mem; + use std::time::{Instant, Duration}; + + /// A timing token is responsible for timing the currently running pass. Timing starts when it + /// is created and ends when it is dropped. + /// + /// Multiple passes can be active at the same time, but they must be started and stopped in a + /// LIFO fashion. + pub struct TimingToken { + /// Start time for this pass. + start: Instant, + + // Pass being timed by this token. + pass: Pass, + + // The previously active pass which will be restored when this token is dropped. + prev: Pass, + } + + /// Accumulated timing information for a single pass. + #[derive(Default)] + struct PassTime { + /// Total time spent running this pas including children. + total: Duration, + + /// Time spent running in child passes. + child: Duration, + } + + /// Accumulated timing for all passes. + #[derive(Default)] + pub struct PassTimes { + pass: [PassTime; NUM_PASSES], + } + + impl fmt::Display for PassTimes { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + writeln!(f, "======= ======= ==================================")?; + writeln!(f, " Total Self Pass")?; + writeln!(f, "------- ------- ----------------------------------")?; + for (time, desc) in self.pass.iter().zip(&DESCRIPTIONS) { + // Omit passes that haven't run. + if time.total == Duration::default() { + continue; + } + + // Write a duration as secs.milis, trailing space. + fn fmtdur(mut dur: Duration, f: &mut fmt::Formatter) -> fmt::Result { + // Round to nearest ms by adding 500us. + dur += Duration::new(0, 500_000); + let ms = dur.subsec_nanos() / 1_000_000; + write!(f, "{:3}.{:03} ", dur.as_secs(), ms) + } + + fmtdur(time.total, f)?; + if let Some(s) = time.total.checked_sub(time.child) { + fmtdur(s, f)?; + } + writeln!(f, " {}", desc)?; + } + writeln!(f, "======= ======= ==================================") + } + } + + /// Information about passes in a single thread. + thread_local!{ + static CURRENT_PASS: Cell = Cell::new(Pass::NoPass); + static PASS_TIME: RefCell = RefCell::new(Default::default()); + } + + /// Start timing `pass` as a child of the currently running pass, if any. + /// + /// This function is called by the publicly exposed pass functions. + pub(super) fn start_pass(pass: Pass) -> TimingToken { + let prev = CURRENT_PASS.with(|p| p.replace(pass)); + dbg!("timing: Starting {}, (during {})", pass, prev); + TimingToken { + start: Instant::now(), + pass, + prev, + } + } + + /// Dropping a timing token indicated the end of the pass. + impl Drop for TimingToken { + fn drop(&mut self) { + let duration = self.start.elapsed(); + dbg!("timing: Ending {}", self.pass); + let old_cur = CURRENT_PASS.with(|p| p.replace(self.prev)); + assert_eq!(self.pass, old_cur, "Timing tokens dropped out of order"); + PASS_TIME.with(|rc| { + let mut table = rc.borrow_mut(); + table.pass[self.pass.idx()].total += duration; + if let Some(parent) = table.pass.get_mut(self.prev.idx()) { + parent.child += duration; + } + }) + } + } + + /// Take the current accumulated pass timings and reset the timings for the current thread. + pub fn take_current() -> PassTimes { + PASS_TIME.with(|rc| mem::replace(&mut *rc.borrow_mut(), Default::default())) + } + + /// Add `timings` to the accumulated timings for the current thread. + pub fn add_to_current(times: PassTimes) { + PASS_TIME.with(|rc| for (a, b) in rc.borrow_mut().pass.iter_mut().zip( + ×.pass, + ) + { + a.total += b.total; + a.child += b.child; + }) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn display() { + assert_eq!(Pass::NoPass.to_string(), ""); + assert_eq!(Pass::regalloc.to_string(), "Register allocation"); + } +} diff --git a/lib/cretonne/src/unreachable_code.rs b/lib/cretonne/src/unreachable_code.rs index f5d49c77f9..36ec7e673c 100644 --- a/lib/cretonne/src/unreachable_code.rs +++ b/lib/cretonne/src/unreachable_code.rs @@ -4,6 +4,7 @@ use cursor::{Cursor, FuncCursor}; use dominator_tree::DominatorTree; use flowgraph::ControlFlowGraph; use ir; +use timing; /// Eliminate unreachable code. /// @@ -16,6 +17,7 @@ pub fn eliminate_unreachable_code( cfg: &mut ControlFlowGraph, domtree: &DominatorTree, ) { + let _tt = timing::unreachable_code(); let mut pos = FuncCursor::new(func); while let Some(ebb) = pos.next_ebb() { if domtree.is_reachable(ebb) { diff --git a/lib/cretonne/src/verifier/cssa.rs b/lib/cretonne/src/verifier/cssa.rs index 853f27251c..935279c7f1 100644 --- a/lib/cretonne/src/verifier/cssa.rs +++ b/lib/cretonne/src/verifier/cssa.rs @@ -6,6 +6,7 @@ use ir::Function; use regalloc::liveness::Liveness; use regalloc::virtregs::VirtRegs; use std::cmp::Ordering; +use timing; use verifier::Result; /// Verify conventional SSA form for `func`. @@ -29,6 +30,7 @@ pub fn verify_cssa( liveness: &Liveness, virtregs: &VirtRegs, ) -> Result { + let _tt = timing::verify_cssa(); let verifier = CssaVerifier { func, cfg, diff --git a/lib/cretonne/src/verifier/flags.rs b/lib/cretonne/src/verifier/flags.rs index f007844b51..da47fdfcf8 100644 --- a/lib/cretonne/src/verifier/flags.rs +++ b/lib/cretonne/src/verifier/flags.rs @@ -8,6 +8,7 @@ use isa; use packed_option::PackedOption; use std::result; use verifier::{Result, Error}; +use timing; /// Verify that CPU flags are used correctly. /// @@ -26,6 +27,7 @@ pub fn verify_flags( cfg: &ControlFlowGraph, isa: Option<&isa::TargetIsa>, ) -> Result { + let _tt = timing::verify_flags(); let mut verifier = FlagsVerifier { func, cfg, diff --git a/lib/cretonne/src/verifier/liveness.rs b/lib/cretonne/src/verifier/liveness.rs index 78eb3e17ca..a58d04caf7 100644 --- a/lib/cretonne/src/verifier/liveness.rs +++ b/lib/cretonne/src/verifier/liveness.rs @@ -8,6 +8,7 @@ use regalloc::liveness::Liveness; use regalloc::liverange::LiveRange; use std::cmp::Ordering; use verifier::Result; +use timing; /// Verify liveness information for `func`. /// @@ -27,6 +28,7 @@ pub fn verify_liveness( cfg: &ControlFlowGraph, liveness: &Liveness, ) -> Result { + let _tt = timing::verify_liveness(); let verifier = LivenessVerifier { isa, func, diff --git a/lib/cretonne/src/verifier/locations.rs b/lib/cretonne/src/verifier/locations.rs index 8df40ba924..2c93d0de3e 100644 --- a/lib/cretonne/src/verifier/locations.rs +++ b/lib/cretonne/src/verifier/locations.rs @@ -5,6 +5,7 @@ use isa; use regalloc::RegDiversions; use regalloc::liveness::Liveness; use verifier::Result; +use timing; /// Verify value locations for `func`. /// @@ -22,6 +23,7 @@ pub fn verify_locations( func: &ir::Function, liveness: Option<&Liveness>, ) -> Result { + let _tt = timing::verify_locations(); let verifier = LocationVerifier { isa, func, diff --git a/lib/cretonne/src/verifier/mod.rs b/lib/cretonne/src/verifier/mod.rs index 259cf48a5e..ef534bcd87 100644 --- a/lib/cretonne/src/verifier/mod.rs +++ b/lib/cretonne/src/verifier/mod.rs @@ -73,6 +73,7 @@ use std::collections::BTreeSet; use std::error as std_error; use std::fmt::{self, Display, Formatter, Write}; use std::result; +use timing; pub use self::cssa::verify_cssa; pub use self::liveness::verify_liveness; @@ -126,6 +127,7 @@ pub type Result = result::Result<(), Error>; /// Verify `func`. pub fn verify_function<'a, FOI: Into>>(func: &Function, fisa: FOI) -> Result { + let _tt = timing::verifier(); Verifier::new(func, fisa.into()).run() } @@ -137,6 +139,7 @@ pub fn verify_context<'a, FOI: Into>>( domtree: &DominatorTree, fisa: FOI, ) -> Result { + let _tt = timing::verifier(); let verifier = Verifier::new(func, fisa.into()); if cfg.is_valid() { verifier.cfg_integrity(cfg)?; diff --git a/lib/reader/src/parser.rs b/lib/reader/src/parser.rs index ac25f1e6e6..c897d68a3f 100644 --- a/lib/reader/src/parser.rs +++ b/lib/reader/src/parser.rs @@ -14,7 +14,7 @@ use cretonne::ir::immediates::{Imm64, Uimm32, Offset32, Ieee32, Ieee64}; use cretonne::ir::entities::AnyEntity; use cretonne::ir::instructions::{InstructionFormat, InstructionData, VariableArgs}; use cretonne::isa::{self, TargetIsa, Encoding, RegUnit}; -use cretonne::settings; +use cretonne::{settings, timing}; use testfile::{TestFile, Details, Comment}; use error::{Location, Error, Result}; use lexer::{self, Lexer, Token}; @@ -26,6 +26,7 @@ use sourcemap::{SourceMap, MutableSourceMap}; /// /// Any test commands or ISA declarations are ignored. pub fn parse_functions(text: &str) -> Result> { + let _tt = timing::parse_text(); parse_test(text).map(|file| { file.functions.into_iter().map(|(func, _)| func).collect() }) @@ -35,6 +36,7 @@ pub fn parse_functions(text: &str) -> Result> { /// /// The returned `TestFile` contains direct references to substrings of `text`. pub fn parse_test<'a>(text: &'a str) -> Result> { + let _tt = timing::parse_text(); let mut parser = Parser::new(text); // Gather the preamble comments as 'Function'. parser.gather_comments(AnyEntity::Function); diff --git a/lib/wasm/src/func_translator.rs b/lib/wasm/src/func_translator.rs index f68996d9b6..36c1b58a17 100644 --- a/lib/wasm/src/func_translator.rs +++ b/lib/wasm/src/func_translator.rs @@ -8,6 +8,7 @@ use code_translator::translate_operator; use cretonne::entity::EntityRef; use cretonne::ir::{self, InstBuilder, Ebb}; use cretonne::result::{CtonResult, CtonError}; +use cretonne::timing; use cton_frontend::{ILBuilder, FunctionBuilder}; use environ::FuncEnvironment; use state::TranslationState; @@ -66,6 +67,7 @@ impl FuncTranslator { func: &mut ir::Function, environ: &mut FE, ) -> CtonResult { + let _tt = timing::wasm_translate_function(); dbg!( "translate({} bytes, {}{})", reader.bytes_remaining(), diff --git a/lib/wasm/src/module_translator.rs b/lib/wasm/src/module_translator.rs index 87e38a119c..d75f60a35e 100644 --- a/lib/wasm/src/module_translator.rs +++ b/lib/wasm/src/module_translator.rs @@ -1,5 +1,6 @@ //! Translation skeletton that traverses the whole WebAssembly module and call helper functions //! to deal with each part of it. +use cretonne::timing; use wasmparser::{ParserState, SectionCode, ParserInput, Parser, WasmDecoder, BinaryReaderError}; use sections_translator::{SectionParsingError, parse_function_signatures, parse_import_section, parse_function_section, parse_export_section, parse_start_section, @@ -15,6 +16,7 @@ pub fn translate_module<'data>( data: &'data [u8], environ: &mut ModuleEnvironment<'data>, ) -> Result<(), String> { + let _tt = timing::wasm_translate_module(); let mut parser = Parser::new(data); match *parser.read() { ParserState::BeginWasm { .. } => {}