Reimplement coalescer following the Budimlic paper.

The old coalescing algorithm had some algorithmic complexity issues when dealing with large virtual registers. Reimplement to use a proper union-find algorithm so we only need one pass through the dominator forests for virtual registers that are interference free. Virtual registers that do have interference are split and new registers built. This pass is about twice as fast as the old one when dealing with complex virtual registers.
2018-01-09 10:33:02 -08:00
parent 16ac4f65b3
commit d1f236b00a
4 changed files with 604 additions and 485 deletions
--- a/cranelift/filetests/regalloc/coalesce.cton
+++ b/cranelift/filetests/regalloc/coalesce.cton
@@ -40,9 +40,8 @@ ebb1(v10: i32):
 function %dualuse(i32) -> i32 {
 ebb0(v0: i32):
    ; check: $(cp1=$V) = copy $v0
-    ; nextln: brnz $v0, $ebb1($v0, $cp1)
+    ; nextln: brnz $v0, $ebb1($cp1, $v0)
    brnz v0, ebb1(v0, v0)
-    ; not: copy
    v1 = iadd_imm v0, 7
    v2 = iadd_imm v1, 56
    jump ebb1(v1, v2)
@@ -56,14 +55,15 @@ ebb1(v10: i32, v11: i32):
 ; The interference can be broken with a copy at either branch.
 function %interference(i32) -> i32 {
 ebb0(v0: i32):
+    ; check: $(cp0=$V) = copy $v0
    ; not: copy
+    ; check: brnz $v0, ebb1($cp0)
    brnz v0, ebb1(v0)
    v1 = iadd_imm v0, 7
    ; v1 and v0 interfere here:
    v2 = iadd_imm v0, 8
-    ; check: $(cp1=$V) = copy $v1
    ; not: copy
-    ; check: jump $ebb1($cp1)
+    ; check: jump $ebb1($v1)
    jump ebb1(v1)

 ebb1(v10: i32):
@@ -75,7 +75,6 @@ ebb1(v10: i32):
 ; A loop where one induction variable is used as a backedge argument.
 function %fibonacci(i32) -> i32 {
 ebb0(v0: i32):
-    ; not: copy
    v1 = iconst.i32 1
    v2 = iconst.i32 2
    jump ebb1(v1, v2)
@@ -103,8 +102,7 @@ function %stackarg(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32):
    ; check: fill v8
    ; not: v8
-    brnz v0, ebb1(v8)
-    jump ebb1(v7)
+    jump ebb1(v8)

 ebb1(v10: i32):
    v11 = iadd_imm v10, 1
--- a/cranelift/filetests/regalloc/infinite-interference.cton
+++ b/cranelift/filetests/regalloc/infinite-interference.cton
@@ -0,0 +1,37 @@
+test regalloc
+isa riscv
+
+; Here, the coalescer initially builds vreg0 = [v1, v2, v3]
+;
+; There's interference between v1 and v2 at the brz instruction. Isolating v2 is not going to
+; resolve that conflict since v1 will just interfere with the inserted copy too.
+
+;function %c1(i32) -> i32 {
+;ebb0(v0: i32):
+;    v1 = iadd_imm v0, 1
+;    v2 = iconst.i32 1
+;    brz v1, ebb1(v2)
+;    jump ebb2
+;
+;ebb1(v3: i32):
+;    return v3
+;
+;ebb2:
+;    jump ebb1(v1)
+;}
+
+; Same thing with v1 and v2 swapped to reverse the order of definitions.
+
+function %c2(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iadd_imm v0, 1
+    v2 = iconst.i32 1
+    brz v2, ebb1(v1)
+    jump ebb2
+
+ebb1(v3: i32):
+    return v3
+
+ebb2:
+    jump ebb1(v2)
+}
--- a/lib/cretonne/src/ir/dfg.rs
+++ b/lib/cretonne/src/ir/dfg.rs
@@ -2,6 +2,7 @@

 use entity::{PrimaryMap, EntityMap};
 use isa::TargetIsa;
+use ir;
 use ir::builder::ReplaceBuilder;
 use ir::extfunc::ExtFuncData;
 use ir::instructions::{InstructionData, CallInfo, BranchInfo};
@@ -315,7 +316,7 @@ impl DataFlowGraph {
 }

 /// Where did a value come from?
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum ValueDef {
    /// Value is the n'th result of an instruction.
    Result(Inst, usize),
@@ -331,6 +332,22 @@ impl ValueDef {
            _ => panic!("Value is not an instruction result"),
        }
    }
+
+    /// Get the program point where the value was defined.
+    pub fn pp(self) -> ir::ExpandedProgramPoint {
+        self.into()
+    }
+
+    /// Get the number component of this definition.
+    ///
+    /// When multiple values are defined at the same program point, this indicates the index of
+    /// this value.
+    pub fn num(self) -> usize {
+        match self {
+            ValueDef::Result(_, n) |
+            ValueDef::Param(_, n) => n,
+        }
+    }
 }

 // Internal table storage for extended values.
--- a/lib/cretonne/src/regalloc/coalescing.rs
+++ b/lib/cretonne/src/regalloc/coalescing.rs