Improve coalescing performance by using a FxHashMap (#340)

* Use FxHashMap instead of HashMap for better performance * Replace the binary search in the coalescing pass with a FxHashMap This speeds up coalescing by up to 16% and overall compilation by 9%
2018-05-18 23:40:08 +01:00
parent 80fdfb2376
commit e9d362d902
6 changed files with 130 additions and 26 deletions
--- a/lib/codegen/src/fx.rs
+++ b/lib/codegen/src/fx.rs
@@ -0,0 +1,111 @@
 // This file is taken from the Rust compiler: src/librustc_data_structures/fx.rs
 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 use std::collections::{HashMap, HashSet};
 use std::default::Default;
 use std::hash::{BuildHasherDefault, Hash, Hasher};
 use std::ops::BitXor;
 pub type FxHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>;
 pub type FxHashSet<V> = HashSet<V, BuildHasherDefault<FxHasher>>;
 #[allow(non_snake_case)]
 pub fn FxHashMap<K: Hash + Eq, V>() -> FxHashMap<K, V> {
    HashMap::default()
 }
 #[allow(non_snake_case)]
 pub fn FxHashSet<V: Hash + Eq>() -> FxHashSet<V> {
    HashSet::default()
 }
 /// A speedy hash algorithm for use within rustc. The hashmap in liballoc
 /// by default uses SipHash which isn't quite as speedy as we want. In the
 /// compiler we're not really worried about DOS attempts, so we use a fast
 /// non-cryptographic hash.
 ///
 /// This is the same as the algorithm used by Firefox -- which is a homespun
 /// one not based on any widely-known algorithm -- though modified to produce
 /// 64-bit hash values instead of 32-bit hash values. It consistently
 /// out-performs an FNV-based hash within rustc itself -- the collision rate is
 /// similar or slightly worse than FNV, but the speed of the hash function
 /// itself is much higher because it works on up to 8 bytes at a time.
 pub struct FxHasher {
    hash: usize,
 }
 #[cfg(target_pointer_width = "32")]
 const K: usize = 0x9e3779b9;
 #[cfg(target_pointer_width = "64")]
 const K: usize = 0x517cc1b727220a95;
 impl Default for FxHasher {
    #[inline]
    fn default() -> FxHasher {
        FxHasher { hash: 0 }
    }
 }
 impl FxHasher {
    #[inline]
    fn add_to_hash(&mut self, i: usize) {
        self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K);
    }
 }
 impl Hasher for FxHasher {
    #[inline]
    fn write(&mut self, bytes: &[u8]) {
        for byte in bytes {
            let i = *byte;
            self.add_to_hash(i as usize);
        }
    }
    #[inline]
    fn write_u8(&mut self, i: u8) {
        self.add_to_hash(i as usize);
    }
    #[inline]
    fn write_u16(&mut self, i: u16) {
        self.add_to_hash(i as usize);
    }
    #[inline]
    fn write_u32(&mut self, i: u32) {
        self.add_to_hash(i as usize);
    }
    #[cfg(target_pointer_width = "32")]
    #[inline]
    fn write_u64(&mut self, i: u64) {
        self.add_to_hash(i as usize);
        self.add_to_hash((i >> 32) as usize);
    }
    #[cfg(target_pointer_width = "64")]
    #[inline]
    fn write_u64(&mut self, i: u64) {
        self.add_to_hash(i as usize);
    }
    #[inline]
    fn write_usize(&mut self, i: usize) {
        self.add_to_hash(i);
    }
    #[inline]
    fn finish(&self) -> u64 {
        self.hash as u64
    }
 }
--- a/lib/codegen/src/lib.rs
+++ b/lib/codegen/src/lib.rs
@@ -95,6 +95,7 @@ mod constant_hash;
 mod context;
 mod dce;
 mod divconst_magic_numbers;
 mod fx;
 mod iterators;
 mod legalizer;
 mod licm;
--- a/lib/codegen/src/licm.rs
+++ b/lib/codegen/src/licm.rs
@@ -4,9 +4,9 @@ use cursor::{Cursor, FuncCursor};
 use dominator_tree::DominatorTree;
 use entity::{EntityList, ListPool};
 use flowgraph::ControlFlowGraph;
 use fx::FxHashSet;
 use ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, Layout, Opcode, Type, Value};
 use loop_analysis::{Loop, LoopAnalysis};
 use std::collections::HashSet;
 use std::vec::Vec;
 use timing;
@@ -138,7 +138,7 @@ fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
 }
 /// Test whether the given instruction is loop-invariant.
-fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &HashSet<Value>) -> bool {
+fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &FxHashSet<Value>) -> bool {
    if trivially_unsafe_for_licm(dfg[inst].opcode()) {
        return false;
    }
@@ -162,7 +162,7 @@ fn remove_loop_invariant_instructions(
    cfg: &ControlFlowGraph,
    loop_analysis: &LoopAnalysis,
 ) -> Vec<Inst> {
-    let mut loop_values: HashSet<Value> = HashSet::new();
+    let mut loop_values: FxHashSet<Value> = FxHashSet();
    let mut invariant_insts: Vec<Inst> = Vec::new();
    let mut pos = FuncCursor::new(func);
    // We traverse the loop EBB in reverse post-order.
@@ -194,8 +194,8 @@ fn remove_loop_invariant_instructions(
 /// Return ebbs from a loop in post-order, starting from an entry point in the block.
 fn postorder_ebbs_loop(loop_analysis: &LoopAnalysis, cfg: &ControlFlowGraph, lp: Loop) -> Vec<Ebb> {
-    let mut grey = HashSet::new();
+    let mut grey = FxHashSet();
-    let mut black = HashSet::new();
+    let mut black = FxHashSet();
    let mut stack = vec![loop_analysis.loop_header(lp)];
    let mut postorder = Vec::new();
--- a/lib/codegen/src/regalloc/coalescing.rs
+++ b/lib/codegen/src/regalloc/coalescing.rs
@@ -10,6 +10,7 @@ use cursor::{Cursor, EncCursor};
 use dbg::DisplayList;
 use dominator_tree::{DominatorTree, DominatorTreePreorder};
 use flowgraph::ControlFlowGraph;
 use fx::FxHashMap;
 use ir::{self, InstBuilder, ProgramOrder};
 use ir::{Ebb, ExpandedProgramPoint, Function, Inst, Value};
 use isa::{EncInfo, TargetIsa};
@@ -883,11 +884,9 @@ struct VirtualCopies {
    // Filter for the currently active node iterator.
    //
-    // An (ebb, set_id, num) entry means that branches to `ebb` are active in `set_id` with branch
+    // An ebb => (set_id, num) entry means that branches to `ebb` are active in `set_id` with branch
    // argument number `num`.
-    //
+    filter: FxHashMap<Ebb, (u8, usize)>,
    // This is ordered by EBB number for fast binary search.
    filter: Vec<(Ebb, u8, usize)>,
 }
 impl VirtualCopies {
@@ -896,7 +895,7 @@ impl VirtualCopies {
        Self {
            params: Vec::new(),
            branches: Vec::new(),
-            filter: Vec::new(),
+            filter: FxHashMap(),
        }
    }
@@ -1010,12 +1009,10 @@ impl VirtualCopies {
                        // Stop once we're outside the bounds of `self.params`.
                        break;
                    }
-                    self.filter.push((ebb, set_id, num));
+                    self.filter.insert(ebb, (set_id, num));
                }
            }
        }
        // We'll be using `binary_search_by` with the numerical EBB ordering.
        self.filter.sort_unstable();
    }
    /// Look up the set_id and argument number for `ebb` in the current filter.
@@ -1023,13 +1020,7 @@ impl VirtualCopies {
    /// Returns `None` if none of the currently active parameters are defined at `ebb`. Otherwise
    /// returns `(set_id, argnum)` for an active parameter defined at `ebb`.
    fn lookup(&self, ebb: Ebb) -> Option<(u8, usize)> {
-        self.filter
+        self.filter.get(&ebb).map(|t| *t)
            .binary_search_by(|&(e, _, _)| e.cmp(&ebb))
            .ok()
            .map(|i| {
                let t = self.filter[i];
                (t.1, t.2)
            })
    }
    /// Get an iterator of dom-forest nodes corresponding to the current filter.
--- a/lib/codegen/src/regalloc/live_value_tracker.rs
+++ b/lib/codegen/src/regalloc/live_value_tracker.rs
@@ -6,12 +6,12 @@
 use dominator_tree::DominatorTree;
 use entity::{EntityList, ListPool};
 use fx::FxHashMap;
 use ir::{DataFlowGraph, Ebb, ExpandedProgramPoint, Inst, Layout, Value};
 use partition_slice::partition_slice;
 use regalloc::affinity::Affinity;
 use regalloc::liveness::Liveness;
 use regalloc::liverange::LiveRange;
 use std::collections::HashMap;
 use std::vec::Vec;
 type ValueList = EntityList<Value>;
@@ -25,7 +25,7 @@ pub struct LiveValueTracker {
    /// dominator of an EBB.
    ///
    /// This is the set of values that are live *before* the branch.
-    idom_sets: HashMap<Inst, ValueList>,
+    idom_sets: FxHashMap<Inst, ValueList>,
    /// Memory pool for the live sets.
    idom_pool: ListPool<Value>,
@@ -128,7 +128,7 @@ impl LiveValueTracker {
    pub fn new() -> Self {
        Self {
            live: LiveValueVec::new(),
-            idom_sets: HashMap::new(),
+            idom_sets: FxHashMap(),
            idom_pool: ListPool::new(),
        }
    }
--- a/lib/codegen/src/scoped_hash_map.rs
+++ b/lib/codegen/src/scoped_hash_map.rs
@@ -4,7 +4,8 @@
 //! container that has a concept of scopes that can be entered and exited, such that
 //! values inserted while inside a scope aren't visible outside the scope.
-use std::collections::{hash_map, HashMap};
+use fx::FxHashMap;
 use std::collections::hash_map;
 use std::hash::Hash;
 use std::mem;
@@ -58,7 +59,7 @@ pub enum Entry<'a, K: 'a, V: 'a> {
 /// Shadowing, where one scope has entries with the same keys as a containing scope,
 /// is not supported in this implementation.
 pub struct ScopedHashMap<K, V> {
-    map: HashMap<K, Val<K, V>>,
+    map: FxHashMap<K, Val<K, V>>,
    last_insert: Option<K>,
    current_depth: usize,
 }
@@ -70,7 +71,7 @@ where
    /// Creates an empty `ScopedHashMap`.
    pub fn new() -> Self {
        Self {
-            map: HashMap::new(),
+            map: FxHashMap(),
            last_insert: None,
            current_depth: 0,
        }