calc live bitmaps

This commit is contained in:
T0b1
2023-04-16 03:23:36 +02:00
parent d31dbaaa16
commit 74f8e9a1fd

View File

@@ -1,9 +1,11 @@
use alloc::collections::VecDeque;
use alloc::format;
use alloc::vec::Vec;
use alloc::{string::String, vec};
use smallvec::{smallvec, SmallVec};
use std::{convert::TryFrom, println};
use crate::indexset::IndexSet;
use crate::{
cfg::CFGInfo, Allocation, Block, Edit, Function, Inst, MachineEnv, Operand, OperandConstraint,
OperandKind, OperandPos, Output, PReg, PRegSet, ProgPoint, RegAllocError, RegClass, SpillSlot,
@@ -142,6 +144,8 @@ struct FastAllocState<'a, F: Function> {
pub vregs: Vec<VRegData>,
pub pregs: Vec<PRegData>,
pub blocks: Vec<BlockData>,
pub liveins: Vec<IndexSet>,
pub liveouts: Vec<IndexSet>,
pub cur_stack_slot_idx: u32,
pub reftype_vregs_in_pregs_count: u32,
@@ -197,6 +201,9 @@ impl<'a, F: Function> FastAllocState<'a, F> {
// we need to create the alloc array beforehand because it needs to be sorted by inst index
// which we cannot guarantee when iterating through the blocks in reverse post-order
// TODO: this can be folded into any of the other iterations of the blocks by counting the operand counts for each instruction
// globally and writing the op count for each inst into inst_alloc_offsets[idx]
// then just iterate inst_alloc_offsets and adjust the indices
let allocs = {
let block_count = func.num_blocks();
let mut cur_idx = 0;
@@ -215,6 +222,8 @@ impl<'a, F: Function> FastAllocState<'a, F> {
vregs,
pregs,
blocks,
liveins: Vec::new(),
liveouts: Vec::new(),
cur_stack_slot_idx: 0,
reftype_vregs_in_pregs_count: 0,
@@ -399,7 +408,7 @@ pub fn run<F: Function>(func: &F, mach_env: &MachineEnv) -> Result<Output, RegAl
let mut state = FastAllocState::init(func, mach_env);
let const_state = ReadOnlyData::init(func, mach_env);
calc_use_positions(&mut state, &const_state);
calc_use_positions_and_live_bitmaps(&mut state, &const_state);
state.blocks[func.entry_block().index()].params_allocated = true;
@@ -1030,11 +1039,19 @@ fn handle_out_block_params<'a, F: Function>(
Ok(())
}
fn calc_use_positions<'a, F: Function>(
// don't inline for better perf stats
#[inline(never)]
fn calc_use_positions_and_live_bitmaps<'a, F: Function>(
state: &mut FastAllocState<'a, F>,
const_state: &ReadOnlyData,
) {
) -> Result<(), RegAllocError> {
// TODO: this could be folded into the bitmap calculation by making a
// reverse postorder idx -> (block idx, inst_count for all)
// mapping
//
// we use a pseudo-counter to have a uniform position for instructions
// this takes 0.5-0.8% on average but has maxes of up to 2% of compile time
// so if it does not substantially increase compilation performance it should be killed
let mut cur_pos = 0u32;
let len = const_state.postorder.len();
for i in 0..len {
@@ -1072,4 +1089,127 @@ fn calc_use_positions<'a, F: Function>(
cur_pos += 1;
}
calc_live_bitmaps(state, const_state)
}
struct BlockBitmap {
storage: SmallVec<[u64; 2]>,
}
impl BlockBitmap {
fn init(block_count: usize) -> Self {
let u64_count = (block_count + 63) / 64;
let mut storage = SmallVec::<[u64; 2]>::with_capacity(u64_count);
storage.resize(u64_count, 0);
Self { storage }
}
fn set(&mut self, idx: usize) {
let idx = idx / 64;
let bit = 1u64 << (idx % 64);
self.storage[idx] |= bit;
}
fn un_set(&mut self, idx: usize) {
let idx = idx / 64;
let bit = 1u64 << (idx % 64);
self.storage[idx] &= !bit;
}
fn is_set(&mut self, idx: usize) -> bool {
let idx = idx / 64;
let bit = 1u64 << (idx % 64);
(self.storage[idx] & bit) != 0
}
}
// currently, copy from liveranges.rs
// don't inline for better perf stats
#[inline(never)]
fn calc_live_bitmaps<'a, F: Function>(
state: &mut FastAllocState<'a, F>,
const_state: &ReadOnlyData,
) -> Result<(), RegAllocError> {
state.liveins.resize(state.blocks.len(), IndexSet::new());
state.liveouts.resize(state.blocks.len(), IndexSet::new());
// Run a worklist algorithm to precisely compute liveins and
// liveouts.
let mut workqueue = VecDeque::new();
let mut workqueue_set = BlockBitmap::init(state.liveins.len());
// Initialize workqueue with postorder traversal.
for &block in &const_state.postorder[..] {
workqueue.push_back(block);
workqueue_set.set(block.index());
}
while let Some(block) = workqueue.pop_front() {
workqueue_set.un_set(block.index());
let insns = state.func.block_insns(block);
trace!("computing liveins for block{}", block.index());
//state.stats.livein_iterations += 1;
let mut live = state.liveouts[block.index()].clone();
trace!(" -> initial liveout set: {:?}", live);
// Include outgoing blockparams in the initial live set.
if state.func.is_branch(insns.last()) {
for i in 0..state.func.block_succs(block).len() {
for &param in state.func.branch_blockparams(block, insns.last(), i) {
live.set(param.vreg(), true);
}
}
}
for inst in insns.rev().iter() {
for pos in &[OperandPos::Late, OperandPos::Early] {
for op in state.func.inst_operands(inst) {
if op.as_fixed_nonallocatable().is_some() {
continue;
}
if op.pos() == *pos {
let was_live = live.get(op.vreg().vreg());
trace!("op {:?} was_live = {}", op, was_live);
match op.kind() {
OperandKind::Use => {
live.set(op.vreg().vreg(), true);
}
OperandKind::Def => {
live.set(op.vreg().vreg(), false);
}
}
}
}
}
}
for &blockparam in state.func.block_params(block) {
live.set(blockparam.vreg(), false);
}
for &pred in state.func.block_preds(block) {
if state.liveouts[pred.index()].union_with(&live) {
if !workqueue_set.is_set(pred.index()) {
workqueue_set.set(pred.index());
workqueue.push_back(pred);
}
}
}
trace!("computed liveins at block{}: {:?}", block.index(), live);
state.liveins[block.index()] = live;
}
// Check that there are no liveins to the entry block.
if !state.liveins[state.func.entry_block().index()].is_empty() {
trace!(
"non-empty liveins to entry block: {:?}",
state.liveins[state.func.entry_block().index()]
);
return Err(RegAllocError::EntryLivein);
}
Ok(())
}