Add a compilation pass timing facility.

Individual compilation passes call the corresponding timing::*() function and hold on to their timing token while they run. This causes nested per-pass timing information to be recorded in thread-local storage. The --time-passes command line option prints a pass timing report to stdout.
2017-12-06 10:56:17 -08:00
parent feaea238bc
commit 60c456c1ec
29 changed files with 305 additions and 12 deletions
--- a/cranelift/src/cton-util.rs
+++ b/cranelift/src/cton-util.rs
@@ -10,7 +10,7 @@ extern crate num_cpus;
 extern crate tempdir;
 extern crate term;

-use cretonne::VERSION;
+use cretonne::{VERSION, timing};
 use docopt::Docopt;
 use std::io::{self, Write};
 use std::process;
@@ -27,16 +27,18 @@ const USAGE: &str = "
 Cretonne code generator utility

 Usage:
-    cton-util test [-v] <file>...
+    cton-util test [-vT] <file>...
    cton-util cat <file>...
    cton-util filecheck [-v] <file>
    cton-util print-cfg <file>...
-    cton-util compile [-vp] [--set <set>]... [--isa <isa>] <file>...
-    cton-util wasm [-ctvp] [--set <set>]... [--isa <isa>] <file>...
+    cton-util compile [-vpT] [--set <set>]... [--isa <isa>] <file>...
+    cton-util wasm [-ctvpT] [--set <set>]... [--isa <isa>] <file>...
    cton-util --help | --version

 Options:
    -v, --verbose   be more verbose
+    -T, --time-passes
+                    print pass timing report
    -t, --just-decode
                    just decode WebAssembly to Cretonne IL
    -c, --check-translation
@@ -64,6 +66,7 @@ struct Args {
    flag_verbose: bool,
    flag_set: Vec<String>,
    flag_isa: String,
+    flag_time_passes: bool,
 }

 /// A command either succeeds or fails with an error message.
@@ -81,7 +84,7 @@ fn cton_util() -> CommandResult {
        .unwrap_or_else(|e| e.exit());

    // Find the sub-command to execute.
-    if args.cmd_test {
+    let result = if args.cmd_test {
        filetest::run(args.flag_verbose, args.arg_file)
    } else if args.cmd_cat {
        cat::run(args.arg_file)
@@ -104,7 +107,13 @@ fn cton_util() -> CommandResult {
    } else {
        // Debugging / shouldn't happen with proper command line handling above.
        Err(format!("Unhandled args: {:?}", args))
+    };
+
+    if args.flag_time_passes {
+        print!("{}", timing::take_current());
    }
+
+    result
 }

 fn main() {
--- a/cranelift/src/filetest/concurrent.rs
+++ b/cranelift/src/filetest/concurrent.rs
@@ -3,6 +3,7 @@
 //! This module provides the `ConcurrentRunner` struct which uses a pool of threads to run tests
 //! concurrently.

+use cretonne::timing;
 use std::panic::catch_unwind;
 use std::path::{Path, PathBuf};
 use std::sync::mpsc::{channel, Sender, Receiver};
@@ -33,7 +34,7 @@ pub struct ConcurrentRunner {
    // Workers have their own `Sender`.
    reply_rx: Receiver<Reply>,

-    handles: Vec<thread::JoinHandle<()>>,
+    handles: Vec<thread::JoinHandle<timing::PassTimes>>,
 }

 impl ConcurrentRunner {
@@ -64,11 +65,13 @@ impl ConcurrentRunner {
    }

    /// Join all the worker threads.
+    /// Transfer pass timings from the worker threads to the current thread.
    pub fn join(&mut self) {
        assert!(self.request_tx.is_none(), "must shutdown before join");
        for h in self.handles.drain(..) {
-            if let Err(e) = h.join() {
-                println!("worker panicked: {:?}", e);
+            match h.join() {
+                Ok(t) => timing::add_to_current(t),
+                Err(e) => println!("worker panicked: {:?}", e),
            }
        }
    }
@@ -109,7 +112,7 @@ fn worker_thread(
    thread_num: usize,
    requests: Arc<Mutex<Receiver<Request>>>,
    replies: Sender<Reply>,
-) -> thread::JoinHandle<()> {
+) -> thread::JoinHandle<timing::PassTimes> {
    thread::Builder::new()
        .name(format!("worker #{}", thread_num))
        .spawn(move || {
@@ -142,6 +145,10 @@ fn worker_thread(

                replies.send(Reply::Done { jobid, result }).unwrap();
            }
+
+            // Timing is accumulated independently per thread.
+            // Timings from this worker thread will be aggregated by `ConcurrentRunner::join()`.
+            timing::take_current()
        })
        .unwrap()
 }
--- a/cranelift/src/filetest/runone.rs
+++ b/cranelift/src/filetest/runone.rs
@@ -6,6 +6,7 @@ use std::time;
 use cretonne::ir::Function;
 use cretonne::isa::TargetIsa;
 use cretonne::settings::Flags;
+use cretonne::timing;
 use cretonne::verify_function;
 use cton_reader::parse_test;
 use cton_reader::IsaSpec;
@@ -17,6 +18,7 @@ use filetest::subtest::{SubTest, Context, Result};
 ///
 /// If running this test causes a panic, it will propagate as normal.
 pub fn run(path: &Path) -> TestResult {
+    let _tt = timing::process_file();
    dbg!("---\nFile: {}", path.to_string_lossy());
    let started = time::Instant::now();
    let buffer = read_to_string(path).map_err(|e| e.to_string())?;
@@ -71,7 +73,6 @@ pub fn run(path: &Path) -> TestResult {
    }


-    // TODO: Actually run the tests.
    Ok(started.elapsed())
 }

--- a/lib/cretonne/src/context.rs
+++ b/lib/cretonne/src/context.rs
@@ -23,6 +23,7 @@ use unreachable_code::eliminate_unreachable_code;
 use verifier;
 use simple_gvn::do_simple_gvn;
 use licm::do_licm;
+use timing;

 /// Persistent data structures and compilation pipeline.
 pub struct Context {
@@ -74,6 +75,7 @@ impl Context {
    ///
    /// Returns the size of the function's code.
    pub fn compile(&mut self, isa: &TargetIsa) -> Result<CodeOffset, CtonError> {
+        let _tt = timing::compile();
        self.verify_if(isa)?;

        self.compute_cfg();
@@ -100,6 +102,7 @@ impl Context {
    ///
    /// The machine code is not relocated. Instead, any relocations are emitted into `relocs`.
    pub fn emit_to_memory(&self, mem: *mut u8, relocs: &mut RelocSink, isa: &TargetIsa) {
+        let _tt = timing::binemit();
        isa.emit_function(&self.func, &mut MemoryCodeSink::new(mem, relocs));
    }

--- a/lib/cretonne/src/dominator_tree.rs
+++ b/lib/cretonne/src/dominator_tree.rs
@@ -5,6 +5,7 @@ use flowgraph::{ControlFlowGraph, BasicBlock};
 use ir::{Ebb, Inst, Function, Layout, ProgramOrder, ExpandedProgramPoint};
 use ir::instructions::BranchInfo;
 use packed_option::PackedOption;
+use timing;

 use std::cmp::Ordering;

@@ -227,6 +228,7 @@ impl DominatorTree {

    /// Reset and compute a CFG post-order and dominator tree.
    pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) {
+        let _tt = timing::domtree();
        debug_assert!(cfg.is_valid());
        self.compute_postorder(func);
        self.compute_domtree(func, cfg);
--- a/lib/cretonne/src/flowgraph.rs
+++ b/lib/cretonne/src/flowgraph.rs
@@ -28,13 +28,14 @@ use ir::{Function, Inst, Ebb};
 use ir::instructions::BranchInfo;
 use entity::EntityMap;
 use std::mem;
+use timing;

 /// A basic block denoted by its enclosing Ebb and last instruction.
 pub type BasicBlock = (Ebb, Inst);

 /// A container for the successors and predecessors of some Ebb.
 #[derive(Clone, Default)]
-pub struct CFGNode {
+struct CFGNode {
    /// Instructions that can branch or jump to this EBB.
    ///
    /// This maps branch instruction -> predecessor EBB which is redundant since the EBB containing
@@ -94,6 +95,7 @@ impl ControlFlowGraph {
    ///
    /// This will clear and overwrite any information already stored in this data structure.
    pub fn compute(&mut self, func: &Function) {
+        let _tt = timing::flowgraph();
        self.clear();
        self.data.resize(func.dfg.num_ebbs());

--- a/lib/cretonne/src/isa/intel/mod.rs
+++ b/lib/cretonne/src/isa/intel/mod.rs
@@ -14,6 +14,7 @@ use isa::{TargetIsa, RegInfo, RegClass, EncInfo};
 use ir;
 use regalloc;
 use result;
+use timing;


 #[allow(dead_code)]
@@ -115,6 +116,7 @@ impl TargetIsa for Isa {
    }

    fn prologue_epilogue(&self, func: &mut ir::Function) -> result::CtonResult {
+        let _tt = timing::prologue_epilogue();
        abi::prologue_epilogue(func, self)
    }
 }
--- a/lib/cretonne/src/isa/mod.rs
+++ b/lib/cretonne/src/isa/mod.rs
@@ -51,6 +51,7 @@ use settings;
 use ir;
 use regalloc;
 use result;
+use timing;
 use isa::enc_tables::Encodings;

 #[cfg(build_riscv)]
@@ -236,6 +237,7 @@ pub trait TargetIsa {
    ///
    /// Return an error if the stack frame is too large.
    fn prologue_epilogue(&self, func: &mut ir::Function) -> result::CtonResult {
+        let _tt = timing::prologue_epilogue();
        // This default implementation is unlikely to be good enough.
        use stack_layout::layout_stack;
        use ir::stackslot::{StackSize, StackOffset};
--- a/lib/cretonne/src/legalizer/mod.rs
+++ b/lib/cretonne/src/legalizer/mod.rs
@@ -18,6 +18,7 @@ use flowgraph::ControlFlowGraph;
 use ir::{self, InstBuilder};
 use isa::TargetIsa;
 use bitset::BitSet;
+use timing;

 mod boundary;
 mod globalvar;
@@ -33,6 +34,7 @@ use self::heap::expand_heap_addr;
 /// - Fill out `func.encodings`.
 ///
 pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &TargetIsa) {
+    let _tt = timing::legalize();
    debug_assert!(cfg.is_valid());

    boundary::legalize_signatures(func, isa);
--- a/lib/cretonne/src/lib.rs
+++ b/lib/cretonne/src/lib.rs
@@ -28,6 +28,7 @@ pub mod packed_option;
 pub mod regalloc;
 pub mod result;
 pub mod settings;
+pub mod timing;
 pub mod verifier;

 mod abi;
--- a/lib/cretonne/src/licm.rs
+++ b/lib/cretonne/src/licm.rs
@@ -7,6 +7,7 @@ use std::collections::HashSet;
 use dominator_tree::DominatorTree;
 use entity::{EntityList, ListPool};
 use loop_analysis::{Loop, LoopAnalysis};
+use timing;

 /// Performs the LICM pass by detecting loops within the CFG and moving
 /// loop-invariant instructions out of them.
@@ -17,6 +18,7 @@ pub fn do_licm(
    domtree: &mut DominatorTree,
    loop_analysis: &mut LoopAnalysis,
 ) {
+    let _tt = timing::licm();
    debug_assert!(cfg.is_valid());
    debug_assert!(domtree.is_valid());
    debug_assert!(loop_analysis.is_valid());
--- a/lib/cretonne/src/loop_analysis.rs
+++ b/lib/cretonne/src/loop_analysis.rs
@@ -7,6 +7,7 @@ use entity::EntityMap;
 use flowgraph::ControlFlowGraph;
 use ir::{Function, Ebb, Layout};
 use packed_option::PackedOption;
+use timing;

 /// A opaque reference to a code loop.
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
@@ -98,6 +99,7 @@ impl LoopAnalysis {
 impl LoopAnalysis {
    /// Detects the loops in a function. Needs the control flow graph and the dominator tree.
    pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph, domtree: &DominatorTree) {
+        let _tt = timing::loop_analysis();
        self.loops.clear();
        self.ebb_loop_map.clear();
        self.ebb_loop_map.resize(func.dfg.num_ebbs());
--- a/lib/cretonne/src/regalloc/coalescing.rs
+++ b/lib/cretonne/src/regalloc/coalescing.rs
@@ -18,6 +18,7 @@ use std::cmp::Ordering;
 use std::iter::Peekable;
 use std::mem;
 use isa::{TargetIsa, EncInfo};
+use timing;

 /// Dominator forest.
 ///
@@ -282,6 +283,7 @@ impl Coalescing {
        liveness: &mut Liveness,
        virtregs: &mut VirtRegs,
    ) {
+        let _tt = timing::ra_cssa();
        dbg!("Coalescing for:\n{}", func.display(isa));
        let mut context = Context {
            isa,
--- a/lib/cretonne/src/regalloc/coloring.rs
+++ b/lib/cretonne/src/regalloc/coloring.rs
@@ -57,6 +57,7 @@ use regalloc::liveness::Liveness;
 use regalloc::liverange::{LiveRange, LiveRangeContext};
 use regalloc::solver::{Solver, SolverError};
 use std::mem;
+use timing;


 /// Data structures for the coloring pass.
@@ -123,6 +124,7 @@ impl Coloring {
        liveness: &mut Liveness,
        tracker: &mut LiveValueTracker,
    ) {
+        let _tt = timing::ra_coloring();
        dbg!("Coloring for:\n{}", func.display(isa));
        let mut ctx = Context {
            usable_regs: isa.allocatable_registers(func),
--- a/lib/cretonne/src/regalloc/context.rs
+++ b/lib/cretonne/src/regalloc/context.rs
@@ -16,6 +16,7 @@ use regalloc::reload::Reload;
 use regalloc::spilling::Spilling;
 use regalloc::virtregs::VirtRegs;
 use result::CtonResult;
+use timing;
 use topo_order::TopoOrder;
 use verifier::{verify_context, verify_liveness, verify_cssa, verify_locations};

@@ -72,6 +73,7 @@ impl Context {
        cfg: &ControlFlowGraph,
        domtree: &mut DominatorTree,
    ) -> CtonResult {
+        let _tt = timing::regalloc();
        debug_assert!(domtree.is_valid());

        // `Liveness` and `Coloring` are self-clearing.
--- a/lib/cretonne/src/regalloc/liveness.rs
+++ b/lib/cretonne/src/regalloc/liveness.rs
@@ -184,6 +184,7 @@ use regalloc::affinity::Affinity;
 use regalloc::liverange::{LiveRange, LiveRangeForest, LiveRangeContext};
 use std::mem;
 use std::ops::Index;
+use timing;

 /// A set of live ranges, indexed by value number.
 type LiveRangeSet = SparseMap<Value, LiveRange>;
@@ -385,6 +386,7 @@ impl Liveness {
    /// Compute the live ranges of all SSA values used in `func`.
    /// This clears out any existing analysis stored in this data structure.
    pub fn compute(&mut self, isa: &TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
+        let _tt = timing::ra_liveness();
        self.ranges.clear();

        // Get ISA data structures used for computing live range affinities.
--- a/lib/cretonne/src/regalloc/reload.rs
+++ b/lib/cretonne/src/regalloc/reload.rs
@@ -19,6 +19,7 @@ use isa::{TargetIsa, Encoding, EncInfo, RecipeConstraints, ConstraintKind};
 use regalloc::affinity::Affinity;
 use regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
 use regalloc::liveness::Liveness;
+use timing;
 use topo_order::TopoOrder;

 /// Reusable data structures for the reload pass.
@@ -69,6 +70,7 @@ impl Reload {
        topo: &mut TopoOrder,
        tracker: &mut LiveValueTracker,
    ) {
+        let _tt = timing::ra_reload();
        dbg!("Reload for:\n{}", func.display(isa));
        let mut ctx = Context {
            cur: EncCursor::new(func, isa),
--- a/lib/cretonne/src/regalloc/spilling.rs
+++ b/lib/cretonne/src/regalloc/spilling.rs
@@ -26,6 +26,7 @@ use regalloc::liveness::Liveness;
 use regalloc::pressure::Pressure;
 use regalloc::virtregs::VirtRegs;
 use std::fmt;
+use timing;
 use topo_order::TopoOrder;

 /// Persistent data structures for the spilling pass.
@@ -87,6 +88,7 @@ impl Spilling {
        topo: &mut TopoOrder,
        tracker: &mut LiveValueTracker,
    ) {
+        let _tt = timing::ra_spilling();
        dbg!("Spilling for:\n{}", func.display(isa));
        let reginfo = isa.register_info();
        let usable_regs = isa.allocatable_registers(func);
--- a/lib/cretonne/src/simple_gvn.rs
+++ b/lib/cretonne/src/simple_gvn.rs
@@ -5,6 +5,7 @@ use flowgraph::ControlFlowGraph;
 use dominator_tree::DominatorTree;
 use ir::{InstructionData, Function, Inst, Opcode, Type};
 use scoped_hash_map::ScopedHashMap;
+use timing;

 /// Test whether the given opcode is unsafe to even consider for GVN.
 fn trivially_unsafe_for_gvn(opcode: Opcode) -> bool {
@@ -16,6 +17,7 @@ fn trivially_unsafe_for_gvn(opcode: Opcode) -> bool {
 /// Perform simple GVN on `func`.
 ///
 pub fn do_simple_gvn(func: &mut Function, cfg: &mut ControlFlowGraph, domtree: &mut DominatorTree) {
+    let _tt = timing::gvn();
    debug_assert!(cfg.is_valid());
    debug_assert!(domtree.is_valid());

--- a/lib/cretonne/src/timing.rs
+++ b/lib/cretonne/src/timing.rs
@@ -0,0 +1,225 @@
+//! Pass timing.
+//!
+//! This modules provides facilities for timing the execution of individual compilation passes.
+
+use std::fmt;
+
+pub use self::details::{TimingToken, PassTimes, take_current, add_to_current};
+
+// Each pass that can be timed is predefined with the `define_passes!` macro. Each pass has a
+// snake_case name and a plain text description used when printing out the timing report.
+//
+// This macro defines:
+//
+// - A C-style enum containing all the pass names and a `NoPass` variant.
+// - A usize constant with the number of defined passes.
+// - A const array of pass descriptions.
+// - A public function per pass used to start the timing of that pass.
+macro_rules! define_passes {
+    { $enum:ident, $num_passes:ident, $descriptions:ident;
+      $($pass:ident: $desc:expr,)+
+    } => {
+        #[allow(non_camel_case_types)]
+        #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+        enum $enum { $($pass,)+ NoPass }
+
+        const $num_passes: usize = $enum::NoPass as usize;
+
+        const $descriptions: [&str; $num_passes] = [ $($desc),+ ];
+
+        $(
+            #[doc=$desc]
+            pub fn $pass() -> TimingToken {
+                details::start_pass($enum::$pass)
+            }
+        )+
+    }
+}
+
+// Pass definitions.
+define_passes!{
+    Pass, NUM_PASSES, DESCRIPTIONS;
+
+    process_file: "Processing test file",
+    parse_text: "Parsing textual Cretonne IL",
+    wasm_translate_module: "Translate WASM module",
+    wasm_translate_function: "Translate WASM function",
+
+    verifier: "Verify Cretonne IL",
+    verify_cssa: "Verify CSSA",
+    verify_liveness: "Verify live ranges",
+    verify_locations: "Verify value locations",
+    verify_flags: "Verify CPU flags",
+
+    compile: "Compilation passes",
+    flowgraph: "Control flow graph",
+    domtree: "Dominator tree",
+    loop_analysis: "Loop analysis",
+    legalize: "Legalization",
+    gvn: "Global value numbering",
+    licm: "Loop invariant code motion",
+    unreachable_code: "Remove unreachable blocks",
+
+    regalloc: "Register allocation",
+    ra_liveness: "RA liveness analysis",
+    ra_cssa: "RA coalescing CSSA",
+    ra_spilling: "RA spilling",
+    ra_reload: "RA reloading",
+    ra_coloring: "RA coloring",
+
+    prologue_epilogue: "Prologue/epilogue insertion",
+    binemit: "Binary machine code emission",
+}
+
+impl Pass {
+    pub fn idx(self) -> usize {
+        self as usize
+    }
+}
+
+impl fmt::Display for Pass {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match DESCRIPTIONS.get(self.idx()) {
+            Some(s) => f.write_str(s),
+            None => f.write_str("<no pass>"),
+        }
+    }
+}
+
+
+/// Implementation details.
+///
+/// This whole module can be gated on a `cfg` feature to provide a dummy implementation for
+/// performance-sensitive builds or restricted environments. The dummy implementation must provide
+/// `TimingToken` and `PassTimings` types and a `take_current` function.
+mod details {
+    use super::{Pass, NUM_PASSES, DESCRIPTIONS};
+    use std::cell::{Cell, RefCell};
+    use std::fmt;
+    use std::mem;
+    use std::time::{Instant, Duration};
+
+    /// A timing token is responsible for timing the currently running pass. Timing starts when it
+    /// is created and ends when it is dropped.
+    ///
+    /// Multiple passes can be active at the same time, but they must be started and stopped in a
+    /// LIFO fashion.
+    pub struct TimingToken {
+        /// Start time for this pass.
+        start: Instant,
+
+        // Pass being timed by this token.
+        pass: Pass,
+
+        // The previously active pass which will be restored when this token is dropped.
+        prev: Pass,
+    }
+
+    /// Accumulated timing information for a single pass.
+    #[derive(Default)]
+    struct PassTime {
+        /// Total time spent running this pas including children.
+        total: Duration,
+
+        /// Time spent running in child passes.
+        child: Duration,
+    }
+
+    /// Accumulated timing for all passes.
+    #[derive(Default)]
+    pub struct PassTimes {
+        pass: [PassTime; NUM_PASSES],
+    }
+
+    impl fmt::Display for PassTimes {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            writeln!(f, "======= =======  ==================================")?;
+            writeln!(f, "  Total    Self  Pass")?;
+            writeln!(f, "------- -------  ----------------------------------")?;
+            for (time, desc) in self.pass.iter().zip(&DESCRIPTIONS) {
+                // Omit passes that haven't run.
+                if time.total == Duration::default() {
+                    continue;
+                }
+
+                // Write a duration as secs.milis, trailing space.
+                fn fmtdur(mut dur: Duration, f: &mut fmt::Formatter) -> fmt::Result {
+                    // Round to nearest ms by adding 500us.
+                    dur += Duration::new(0, 500_000);
+                    let ms = dur.subsec_nanos() / 1_000_000;
+                    write!(f, "{:3}.{:03} ", dur.as_secs(), ms)
+                }
+
+                fmtdur(time.total, f)?;
+                if let Some(s) = time.total.checked_sub(time.child) {
+                    fmtdur(s, f)?;
+                }
+                writeln!(f, " {}", desc)?;
+            }
+            writeln!(f, "======= =======  ==================================")
+        }
+    }
+
+    /// Information about passes in a single thread.
+    thread_local!{
+        static CURRENT_PASS: Cell<Pass> = Cell::new(Pass::NoPass);
+        static PASS_TIME: RefCell<PassTimes> = RefCell::new(Default::default());
+    }
+
+    /// Start timing `pass` as a child of the currently running pass, if any.
+    ///
+    /// This function is called by the publicly exposed pass functions.
+    pub(super) fn start_pass(pass: Pass) -> TimingToken {
+        let prev = CURRENT_PASS.with(|p| p.replace(pass));
+        dbg!("timing: Starting {}, (during {})", pass, prev);
+        TimingToken {
+            start: Instant::now(),
+            pass,
+            prev,
+        }
+    }
+
+    /// Dropping a timing token indicated the end of the pass.
+    impl Drop for TimingToken {
+        fn drop(&mut self) {
+            let duration = self.start.elapsed();
+            dbg!("timing: Ending {}", self.pass);
+            let old_cur = CURRENT_PASS.with(|p| p.replace(self.prev));
+            assert_eq!(self.pass, old_cur, "Timing tokens dropped out of order");
+            PASS_TIME.with(|rc| {
+                let mut table = rc.borrow_mut();
+                table.pass[self.pass.idx()].total += duration;
+                if let Some(parent) = table.pass.get_mut(self.prev.idx()) {
+                    parent.child += duration;
+                }
+            })
+        }
+    }
+
+    /// Take the current accumulated pass timings and reset the timings for the current thread.
+    pub fn take_current() -> PassTimes {
+        PASS_TIME.with(|rc| mem::replace(&mut *rc.borrow_mut(), Default::default()))
+    }
+
+    /// Add `timings` to the accumulated timings for the current thread.
+    pub fn add_to_current(times: PassTimes) {
+        PASS_TIME.with(|rc| for (a, b) in rc.borrow_mut().pass.iter_mut().zip(
+            &times.pass,
+        )
+        {
+            a.total += b.total;
+            a.child += b.child;
+        })
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn display() {
+        assert_eq!(Pass::NoPass.to_string(), "<no pass>");
+        assert_eq!(Pass::regalloc.to_string(), "Register allocation");
+    }
+}
--- a/lib/cretonne/src/unreachable_code.rs
+++ b/lib/cretonne/src/unreachable_code.rs
@@ -4,6 +4,7 @@ use cursor::{Cursor, FuncCursor};
 use dominator_tree::DominatorTree;
 use flowgraph::ControlFlowGraph;
 use ir;
+use timing;

 /// Eliminate unreachable code.
 ///
@@ -16,6 +17,7 @@ pub fn eliminate_unreachable_code(
    cfg: &mut ControlFlowGraph,
    domtree: &DominatorTree,
 ) {
+    let _tt = timing::unreachable_code();
    let mut pos = FuncCursor::new(func);
    while let Some(ebb) = pos.next_ebb() {
        if domtree.is_reachable(ebb) {
--- a/lib/cretonne/src/verifier/cssa.rs
+++ b/lib/cretonne/src/verifier/cssa.rs
@@ -6,6 +6,7 @@ use ir::Function;
 use regalloc::liveness::Liveness;
 use regalloc::virtregs::VirtRegs;
 use std::cmp::Ordering;
+use timing;
 use verifier::Result;

 /// Verify conventional SSA form for `func`.
@@ -29,6 +30,7 @@ pub fn verify_cssa(
    liveness: &Liveness,
    virtregs: &VirtRegs,
 ) -> Result {
+    let _tt = timing::verify_cssa();
    let verifier = CssaVerifier {
        func,
        cfg,
--- a/lib/cretonne/src/verifier/flags.rs
+++ b/lib/cretonne/src/verifier/flags.rs
@@ -8,6 +8,7 @@ use isa;
 use packed_option::PackedOption;
 use std::result;
 use verifier::{Result, Error};
+use timing;

 /// Verify that CPU flags are used correctly.
 ///
@@ -26,6 +27,7 @@ pub fn verify_flags(
    cfg: &ControlFlowGraph,
    isa: Option<&isa::TargetIsa>,
 ) -> Result {
+    let _tt = timing::verify_flags();
    let mut verifier = FlagsVerifier {
        func,
        cfg,
--- a/lib/cretonne/src/verifier/liveness.rs
+++ b/lib/cretonne/src/verifier/liveness.rs
@@ -8,6 +8,7 @@ use regalloc::liveness::Liveness;
 use regalloc::liverange::LiveRange;
 use std::cmp::Ordering;
 use verifier::Result;
+use timing;

 /// Verify liveness information for `func`.
 ///
@@ -27,6 +28,7 @@ pub fn verify_liveness(
    cfg: &ControlFlowGraph,
    liveness: &Liveness,
 ) -> Result {
+    let _tt = timing::verify_liveness();
    let verifier = LivenessVerifier {
        isa,
        func,
--- a/lib/cretonne/src/verifier/locations.rs
+++ b/lib/cretonne/src/verifier/locations.rs
@@ -5,6 +5,7 @@ use isa;
 use regalloc::RegDiversions;
 use regalloc::liveness::Liveness;
 use verifier::Result;
+use timing;

 /// Verify value locations for `func`.
 ///
@@ -22,6 +23,7 @@ pub fn verify_locations(
    func: &ir::Function,
    liveness: Option<&Liveness>,
 ) -> Result {
+    let _tt = timing::verify_locations();
    let verifier = LocationVerifier {
        isa,
        func,
--- a/lib/cretonne/src/verifier/mod.rs
+++ b/lib/cretonne/src/verifier/mod.rs
@@ -73,6 +73,7 @@ use std::collections::BTreeSet;
 use std::error as std_error;
 use std::fmt::{self, Display, Formatter, Write};
 use std::result;
+use timing;

 pub use self::cssa::verify_cssa;
 pub use self::liveness::verify_liveness;
@@ -126,6 +127,7 @@ pub type Result = result::Result<(), Error>;

 /// Verify `func`.
 pub fn verify_function<'a, FOI: Into<FlagsOrIsa<'a>>>(func: &Function, fisa: FOI) -> Result {
+    let _tt = timing::verifier();
    Verifier::new(func, fisa.into()).run()
 }

@@ -137,6 +139,7 @@ pub fn verify_context<'a, FOI: Into<FlagsOrIsa<'a>>>(
    domtree: &DominatorTree,
    fisa: FOI,
 ) -> Result {
+    let _tt = timing::verifier();
    let verifier = Verifier::new(func, fisa.into());
    if cfg.is_valid() {
        verifier.cfg_integrity(cfg)?;
--- a/lib/reader/src/parser.rs
+++ b/lib/reader/src/parser.rs
@@ -14,7 +14,7 @@ use cretonne::ir::immediates::{Imm64, Uimm32, Offset32, Ieee32, Ieee64};
 use cretonne::ir::entities::AnyEntity;
 use cretonne::ir::instructions::{InstructionFormat, InstructionData, VariableArgs};
 use cretonne::isa::{self, TargetIsa, Encoding, RegUnit};
-use cretonne::settings;
+use cretonne::{settings, timing};
 use testfile::{TestFile, Details, Comment};
 use error::{Location, Error, Result};
 use lexer::{self, Lexer, Token};
@@ -26,6 +26,7 @@ use sourcemap::{SourceMap, MutableSourceMap};
 ///
 /// Any test commands or ISA declarations are ignored.
 pub fn parse_functions(text: &str) -> Result<Vec<Function>> {
+    let _tt = timing::parse_text();
    parse_test(text).map(|file| {
        file.functions.into_iter().map(|(func, _)| func).collect()
    })
@@ -35,6 +36,7 @@ pub fn parse_functions(text: &str) -> Result<Vec<Function>> {
 ///
 /// The returned `TestFile` contains direct references to substrings of `text`.
 pub fn parse_test<'a>(text: &'a str) -> Result<TestFile<'a>> {
+    let _tt = timing::parse_text();
    let mut parser = Parser::new(text);
    // Gather the preamble comments as 'Function'.
    parser.gather_comments(AnyEntity::Function);
--- a/lib/wasm/src/func_translator.rs
+++ b/lib/wasm/src/func_translator.rs
@@ -8,6 +8,7 @@ use code_translator::translate_operator;
 use cretonne::entity::EntityRef;
 use cretonne::ir::{self, InstBuilder, Ebb};
 use cretonne::result::{CtonResult, CtonError};
+use cretonne::timing;
 use cton_frontend::{ILBuilder, FunctionBuilder};
 use environ::FuncEnvironment;
 use state::TranslationState;
@@ -66,6 +67,7 @@ impl FuncTranslator {
        func: &mut ir::Function,
        environ: &mut FE,
    ) -> CtonResult {
+        let _tt = timing::wasm_translate_function();
        dbg!(
            "translate({} bytes, {}{})",
            reader.bytes_remaining(),
--- a/lib/wasm/src/module_translator.rs
+++ b/lib/wasm/src/module_translator.rs
@@ -1,5 +1,6 @@
 //! Translation skeletton that traverses the whole WebAssembly module and call helper functions
 //! to deal with each part of it.
+use cretonne::timing;
 use wasmparser::{ParserState, SectionCode, ParserInput, Parser, WasmDecoder, BinaryReaderError};
 use sections_translator::{SectionParsingError, parse_function_signatures, parse_import_section,
                          parse_function_section, parse_export_section, parse_start_section,
@@ -15,6 +16,7 @@ pub fn translate_module<'data>(
    data: &'data [u8],
    environ: &mut ModuleEnvironment<'data>,
 ) -> Result<(), String> {
+    let _tt = timing::wasm_translate_module();
    let mut parser = Parser::new(data);
    match *parser.read() {
        ParserState::BeginWasm { .. } => {}