Reimplement coalescer following the Budimlic paper.

The old coalescing algorithm had some algorithmic complexity issues when
dealing with large virtual registers. Reimplement to use a proper
union-find algorithm so we only need one pass through the dominator
forests for virtual registers that are interference free.

Virtual registers that do have interference are split and new registers
built.

This pass is about twice as fast as the old one when dealing with
complex virtual registers.
This commit is contained in:
Jakob Stoklund Olesen
2018-01-09 10:33:02 -08:00
parent 16ac4f65b3
commit d1f236b00a
4 changed files with 604 additions and 485 deletions

View File

@@ -40,9 +40,8 @@ ebb1(v10: i32):
function %dualuse(i32) -> i32 {
ebb0(v0: i32):
; check: $(cp1=$V) = copy $v0
; nextln: brnz $v0, $ebb1($v0, $cp1)
; nextln: brnz $v0, $ebb1($cp1, $v0)
brnz v0, ebb1(v0, v0)
; not: copy
v1 = iadd_imm v0, 7
v2 = iadd_imm v1, 56
jump ebb1(v1, v2)
@@ -56,14 +55,15 @@ ebb1(v10: i32, v11: i32):
; The interference can be broken with a copy at either branch.
function %interference(i32) -> i32 {
ebb0(v0: i32):
; check: $(cp0=$V) = copy $v0
; not: copy
; check: brnz $v0, ebb1($cp0)
brnz v0, ebb1(v0)
v1 = iadd_imm v0, 7
; v1 and v0 interfere here:
v2 = iadd_imm v0, 8
; check: $(cp1=$V) = copy $v1
; not: copy
; check: jump $ebb1($cp1)
; check: jump $ebb1($v1)
jump ebb1(v1)
ebb1(v10: i32):
@@ -75,7 +75,6 @@ ebb1(v10: i32):
; A loop where one induction variable is used as a backedge argument.
function %fibonacci(i32) -> i32 {
ebb0(v0: i32):
; not: copy
v1 = iconst.i32 1
v2 = iconst.i32 2
jump ebb1(v1, v2)
@@ -103,8 +102,7 @@ function %stackarg(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i32 {
ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32):
; check: fill v8
; not: v8
brnz v0, ebb1(v8)
jump ebb1(v7)
jump ebb1(v8)
ebb1(v10: i32):
v11 = iadd_imm v10, 1

View File

@@ -0,0 +1,37 @@
test regalloc
isa riscv
; Here, the coalescer initially builds vreg0 = [v1, v2, v3]
;
; There's interference between v1 and v2 at the brz instruction. Isolating v2 is not going to
; resolve that conflict since v1 will just interfere with the inserted copy too.
;function %c1(i32) -> i32 {
;ebb0(v0: i32):
; v1 = iadd_imm v0, 1
; v2 = iconst.i32 1
; brz v1, ebb1(v2)
; jump ebb2
;
;ebb1(v3: i32):
; return v3
;
;ebb2:
; jump ebb1(v1)
;}
; Same thing with v1 and v2 swapped to reverse the order of definitions.
function %c2(i32) -> i32 {
ebb0(v0: i32):
v1 = iadd_imm v0, 1
v2 = iconst.i32 1
brz v2, ebb1(v1)
jump ebb2
ebb1(v3: i32):
return v3
ebb2:
jump ebb1(v2)
}

View File

@@ -2,6 +2,7 @@
use entity::{PrimaryMap, EntityMap};
use isa::TargetIsa;
use ir;
use ir::builder::ReplaceBuilder;
use ir::extfunc::ExtFuncData;
use ir::instructions::{InstructionData, CallInfo, BranchInfo};
@@ -315,7 +316,7 @@ impl DataFlowGraph {
}
/// Where did a value come from?
#[derive(Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ValueDef {
/// Value is the n'th result of an instruction.
Result(Inst, usize),
@@ -331,6 +332,22 @@ impl ValueDef {
_ => panic!("Value is not an instruction result"),
}
}
/// Get the program point where the value was defined.
pub fn pp(self) -> ir::ExpandedProgramPoint {
self.into()
}
/// Get the number component of this definition.
///
/// When multiple values are defined at the same program point, this indicates the index of
/// this value.
pub fn num(self) -> usize {
match self {
ValueDef::Result(_, n) |
ValueDef::Param(_, n) => n,
}
}
}
// Internal table storage for extended values.

File diff suppressed because it is too large Load Diff