calc live bitmaps
This commit is contained in:
@@ -1,9 +1,11 @@
|
|||||||
|
use alloc::collections::VecDeque;
|
||||||
use alloc::format;
|
use alloc::format;
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
use alloc::{string::String, vec};
|
use alloc::{string::String, vec};
|
||||||
use smallvec::{smallvec, SmallVec};
|
use smallvec::{smallvec, SmallVec};
|
||||||
use std::{convert::TryFrom, println};
|
use std::{convert::TryFrom, println};
|
||||||
|
|
||||||
|
use crate::indexset::IndexSet;
|
||||||
use crate::{
|
use crate::{
|
||||||
cfg::CFGInfo, Allocation, Block, Edit, Function, Inst, MachineEnv, Operand, OperandConstraint,
|
cfg::CFGInfo, Allocation, Block, Edit, Function, Inst, MachineEnv, Operand, OperandConstraint,
|
||||||
OperandKind, OperandPos, Output, PReg, PRegSet, ProgPoint, RegAllocError, RegClass, SpillSlot,
|
OperandKind, OperandPos, Output, PReg, PRegSet, ProgPoint, RegAllocError, RegClass, SpillSlot,
|
||||||
@@ -142,6 +144,8 @@ struct FastAllocState<'a, F: Function> {
|
|||||||
pub vregs: Vec<VRegData>,
|
pub vregs: Vec<VRegData>,
|
||||||
pub pregs: Vec<PRegData>,
|
pub pregs: Vec<PRegData>,
|
||||||
pub blocks: Vec<BlockData>,
|
pub blocks: Vec<BlockData>,
|
||||||
|
pub liveins: Vec<IndexSet>,
|
||||||
|
pub liveouts: Vec<IndexSet>,
|
||||||
|
|
||||||
pub cur_stack_slot_idx: u32,
|
pub cur_stack_slot_idx: u32,
|
||||||
pub reftype_vregs_in_pregs_count: u32,
|
pub reftype_vregs_in_pregs_count: u32,
|
||||||
@@ -197,6 +201,9 @@ impl<'a, F: Function> FastAllocState<'a, F> {
|
|||||||
|
|
||||||
// we need to create the alloc array beforehand because it needs to be sorted by inst index
|
// we need to create the alloc array beforehand because it needs to be sorted by inst index
|
||||||
// which we cannot guarantee when iterating through the blocks in reverse post-order
|
// which we cannot guarantee when iterating through the blocks in reverse post-order
|
||||||
|
// TODO: this can be folded into any of the other iterations of the blocks by counting the operand counts for each instruction
|
||||||
|
// globally and writing the op count for each inst into inst_alloc_offsets[idx]
|
||||||
|
// then just iterate inst_alloc_offsets and adjust the indices
|
||||||
let allocs = {
|
let allocs = {
|
||||||
let block_count = func.num_blocks();
|
let block_count = func.num_blocks();
|
||||||
let mut cur_idx = 0;
|
let mut cur_idx = 0;
|
||||||
@@ -215,6 +222,8 @@ impl<'a, F: Function> FastAllocState<'a, F> {
|
|||||||
vregs,
|
vregs,
|
||||||
pregs,
|
pregs,
|
||||||
blocks,
|
blocks,
|
||||||
|
liveins: Vec::new(),
|
||||||
|
liveouts: Vec::new(),
|
||||||
|
|
||||||
cur_stack_slot_idx: 0,
|
cur_stack_slot_idx: 0,
|
||||||
reftype_vregs_in_pregs_count: 0,
|
reftype_vregs_in_pregs_count: 0,
|
||||||
@@ -399,7 +408,7 @@ pub fn run<F: Function>(func: &F, mach_env: &MachineEnv) -> Result<Output, RegAl
|
|||||||
let mut state = FastAllocState::init(func, mach_env);
|
let mut state = FastAllocState::init(func, mach_env);
|
||||||
let const_state = ReadOnlyData::init(func, mach_env);
|
let const_state = ReadOnlyData::init(func, mach_env);
|
||||||
|
|
||||||
calc_use_positions(&mut state, &const_state);
|
calc_use_positions_and_live_bitmaps(&mut state, &const_state);
|
||||||
|
|
||||||
state.blocks[func.entry_block().index()].params_allocated = true;
|
state.blocks[func.entry_block().index()].params_allocated = true;
|
||||||
|
|
||||||
@@ -1030,11 +1039,19 @@ fn handle_out_block_params<'a, F: Function>(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn calc_use_positions<'a, F: Function>(
|
// don't inline for better perf stats
|
||||||
|
#[inline(never)]
|
||||||
|
fn calc_use_positions_and_live_bitmaps<'a, F: Function>(
|
||||||
state: &mut FastAllocState<'a, F>,
|
state: &mut FastAllocState<'a, F>,
|
||||||
const_state: &ReadOnlyData,
|
const_state: &ReadOnlyData,
|
||||||
) {
|
) -> Result<(), RegAllocError> {
|
||||||
|
// TODO: this could be folded into the bitmap calculation by making a
|
||||||
|
// reverse postorder idx -> (block idx, inst_count for all)
|
||||||
|
// mapping
|
||||||
|
//
|
||||||
// we use a pseudo-counter to have a uniform position for instructions
|
// we use a pseudo-counter to have a uniform position for instructions
|
||||||
|
// this takes 0.5-0.8% on average but has maxes of up to 2% of compile time
|
||||||
|
// so if it does not substantially increase compilation performance it should be killed
|
||||||
let mut cur_pos = 0u32;
|
let mut cur_pos = 0u32;
|
||||||
let len = const_state.postorder.len();
|
let len = const_state.postorder.len();
|
||||||
for i in 0..len {
|
for i in 0..len {
|
||||||
@@ -1072,4 +1089,127 @@ fn calc_use_positions<'a, F: Function>(
|
|||||||
|
|
||||||
cur_pos += 1;
|
cur_pos += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
calc_live_bitmaps(state, const_state)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct BlockBitmap {
|
||||||
|
storage: SmallVec<[u64; 2]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BlockBitmap {
|
||||||
|
fn init(block_count: usize) -> Self {
|
||||||
|
let u64_count = (block_count + 63) / 64;
|
||||||
|
let mut storage = SmallVec::<[u64; 2]>::with_capacity(u64_count);
|
||||||
|
storage.resize(u64_count, 0);
|
||||||
|
Self { storage }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set(&mut self, idx: usize) {
|
||||||
|
let idx = idx / 64;
|
||||||
|
let bit = 1u64 << (idx % 64);
|
||||||
|
self.storage[idx] |= bit;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn un_set(&mut self, idx: usize) {
|
||||||
|
let idx = idx / 64;
|
||||||
|
let bit = 1u64 << (idx % 64);
|
||||||
|
self.storage[idx] &= !bit;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_set(&mut self, idx: usize) -> bool {
|
||||||
|
let idx = idx / 64;
|
||||||
|
let bit = 1u64 << (idx % 64);
|
||||||
|
(self.storage[idx] & bit) != 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// currently, copy from liveranges.rs
|
||||||
|
// don't inline for better perf stats
|
||||||
|
#[inline(never)]
|
||||||
|
fn calc_live_bitmaps<'a, F: Function>(
|
||||||
|
state: &mut FastAllocState<'a, F>,
|
||||||
|
const_state: &ReadOnlyData,
|
||||||
|
) -> Result<(), RegAllocError> {
|
||||||
|
state.liveins.resize(state.blocks.len(), IndexSet::new());
|
||||||
|
state.liveouts.resize(state.blocks.len(), IndexSet::new());
|
||||||
|
|
||||||
|
// Run a worklist algorithm to precisely compute liveins and
|
||||||
|
// liveouts.
|
||||||
|
let mut workqueue = VecDeque::new();
|
||||||
|
let mut workqueue_set = BlockBitmap::init(state.liveins.len());
|
||||||
|
// Initialize workqueue with postorder traversal.
|
||||||
|
for &block in &const_state.postorder[..] {
|
||||||
|
workqueue.push_back(block);
|
||||||
|
workqueue_set.set(block.index());
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Some(block) = workqueue.pop_front() {
|
||||||
|
workqueue_set.un_set(block.index());
|
||||||
|
let insns = state.func.block_insns(block);
|
||||||
|
|
||||||
|
trace!("computing liveins for block{}", block.index());
|
||||||
|
|
||||||
|
//state.stats.livein_iterations += 1;
|
||||||
|
|
||||||
|
let mut live = state.liveouts[block.index()].clone();
|
||||||
|
trace!(" -> initial liveout set: {:?}", live);
|
||||||
|
|
||||||
|
// Include outgoing blockparams in the initial live set.
|
||||||
|
if state.func.is_branch(insns.last()) {
|
||||||
|
for i in 0..state.func.block_succs(block).len() {
|
||||||
|
for ¶m in state.func.branch_blockparams(block, insns.last(), i) {
|
||||||
|
live.set(param.vreg(), true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for inst in insns.rev().iter() {
|
||||||
|
for pos in &[OperandPos::Late, OperandPos::Early] {
|
||||||
|
for op in state.func.inst_operands(inst) {
|
||||||
|
if op.as_fixed_nonallocatable().is_some() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if op.pos() == *pos {
|
||||||
|
let was_live = live.get(op.vreg().vreg());
|
||||||
|
trace!("op {:?} was_live = {}", op, was_live);
|
||||||
|
match op.kind() {
|
||||||
|
OperandKind::Use => {
|
||||||
|
live.set(op.vreg().vreg(), true);
|
||||||
|
}
|
||||||
|
OperandKind::Def => {
|
||||||
|
live.set(op.vreg().vreg(), false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for &blockparam in state.func.block_params(block) {
|
||||||
|
live.set(blockparam.vreg(), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
for &pred in state.func.block_preds(block) {
|
||||||
|
if state.liveouts[pred.index()].union_with(&live) {
|
||||||
|
if !workqueue_set.is_set(pred.index()) {
|
||||||
|
workqueue_set.set(pred.index());
|
||||||
|
workqueue.push_back(pred);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
trace!("computed liveins at block{}: {:?}", block.index(), live);
|
||||||
|
state.liveins[block.index()] = live;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that there are no liveins to the entry block.
|
||||||
|
if !state.liveins[state.func.entry_block().index()].is_empty() {
|
||||||
|
trace!(
|
||||||
|
"non-empty liveins to entry block: {:?}",
|
||||||
|
state.liveins[state.func.entry_block().index()]
|
||||||
|
);
|
||||||
|
return Err(RegAllocError::EntryLivein);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user