From 74f8e9a1fdffc7b0fbfa34d5c63ec1928421a286 Mon Sep 17 00:00:00 2001 From: T0b1 Date: Sun, 16 Apr 2023 03:23:36 +0200 Subject: [PATCH] calc live bitmaps --- src/ion/fast_alloc.rs | 146 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 143 insertions(+), 3 deletions(-) diff --git a/src/ion/fast_alloc.rs b/src/ion/fast_alloc.rs index bfb0234..15460ae 100644 --- a/src/ion/fast_alloc.rs +++ b/src/ion/fast_alloc.rs @@ -1,9 +1,11 @@ +use alloc::collections::VecDeque; use alloc::format; use alloc::vec::Vec; use alloc::{string::String, vec}; use smallvec::{smallvec, SmallVec}; use std::{convert::TryFrom, println}; +use crate::indexset::IndexSet; use crate::{ cfg::CFGInfo, Allocation, Block, Edit, Function, Inst, MachineEnv, Operand, OperandConstraint, OperandKind, OperandPos, Output, PReg, PRegSet, ProgPoint, RegAllocError, RegClass, SpillSlot, @@ -142,6 +144,8 @@ struct FastAllocState<'a, F: Function> { pub vregs: Vec, pub pregs: Vec, pub blocks: Vec, + pub liveins: Vec, + pub liveouts: Vec, pub cur_stack_slot_idx: u32, pub reftype_vregs_in_pregs_count: u32, @@ -197,6 +201,9 @@ impl<'a, F: Function> FastAllocState<'a, F> { // we need to create the alloc array beforehand because it needs to be sorted by inst index // which we cannot guarantee when iterating through the blocks in reverse post-order + // TODO: this can be folded into any of the other iterations of the blocks by counting the operand counts for each instruction + // globally and writing the op count for each inst into inst_alloc_offsets[idx] + // then just iterate inst_alloc_offsets and adjust the indices let allocs = { let block_count = func.num_blocks(); let mut cur_idx = 0; @@ -215,6 +222,8 @@ impl<'a, F: Function> FastAllocState<'a, F> { vregs, pregs, blocks, + liveins: Vec::new(), + liveouts: Vec::new(), cur_stack_slot_idx: 0, reftype_vregs_in_pregs_count: 0, @@ -399,7 +408,7 @@ pub fn run(func: &F, mach_env: &MachineEnv) -> Result( Ok(()) } -fn calc_use_positions<'a, F: Function>( +// don't inline for better perf stats +#[inline(never)] +fn calc_use_positions_and_live_bitmaps<'a, F: Function>( state: &mut FastAllocState<'a, F>, const_state: &ReadOnlyData, -) { +) -> Result<(), RegAllocError> { + // TODO: this could be folded into the bitmap calculation by making a + // reverse postorder idx -> (block idx, inst_count for all) + // mapping + // // we use a pseudo-counter to have a uniform position for instructions + // this takes 0.5-0.8% on average but has maxes of up to 2% of compile time + // so if it does not substantially increase compilation performance it should be killed let mut cur_pos = 0u32; let len = const_state.postorder.len(); for i in 0..len { @@ -1072,4 +1089,127 @@ fn calc_use_positions<'a, F: Function>( cur_pos += 1; } + + calc_live_bitmaps(state, const_state) +} + +struct BlockBitmap { + storage: SmallVec<[u64; 2]>, +} + +impl BlockBitmap { + fn init(block_count: usize) -> Self { + let u64_count = (block_count + 63) / 64; + let mut storage = SmallVec::<[u64; 2]>::with_capacity(u64_count); + storage.resize(u64_count, 0); + Self { storage } + } + + fn set(&mut self, idx: usize) { + let idx = idx / 64; + let bit = 1u64 << (idx % 64); + self.storage[idx] |= bit; + } + + fn un_set(&mut self, idx: usize) { + let idx = idx / 64; + let bit = 1u64 << (idx % 64); + self.storage[idx] &= !bit; + } + + fn is_set(&mut self, idx: usize) -> bool { + let idx = idx / 64; + let bit = 1u64 << (idx % 64); + (self.storage[idx] & bit) != 0 + } +} + +// currently, copy from liveranges.rs +// don't inline for better perf stats +#[inline(never)] +fn calc_live_bitmaps<'a, F: Function>( + state: &mut FastAllocState<'a, F>, + const_state: &ReadOnlyData, +) -> Result<(), RegAllocError> { + state.liveins.resize(state.blocks.len(), IndexSet::new()); + state.liveouts.resize(state.blocks.len(), IndexSet::new()); + + // Run a worklist algorithm to precisely compute liveins and + // liveouts. + let mut workqueue = VecDeque::new(); + let mut workqueue_set = BlockBitmap::init(state.liveins.len()); + // Initialize workqueue with postorder traversal. + for &block in &const_state.postorder[..] { + workqueue.push_back(block); + workqueue_set.set(block.index()); + } + + while let Some(block) = workqueue.pop_front() { + workqueue_set.un_set(block.index()); + let insns = state.func.block_insns(block); + + trace!("computing liveins for block{}", block.index()); + + //state.stats.livein_iterations += 1; + + let mut live = state.liveouts[block.index()].clone(); + trace!(" -> initial liveout set: {:?}", live); + + // Include outgoing blockparams in the initial live set. + if state.func.is_branch(insns.last()) { + for i in 0..state.func.block_succs(block).len() { + for ¶m in state.func.branch_blockparams(block, insns.last(), i) { + live.set(param.vreg(), true); + } + } + } + + for inst in insns.rev().iter() { + for pos in &[OperandPos::Late, OperandPos::Early] { + for op in state.func.inst_operands(inst) { + if op.as_fixed_nonallocatable().is_some() { + continue; + } + if op.pos() == *pos { + let was_live = live.get(op.vreg().vreg()); + trace!("op {:?} was_live = {}", op, was_live); + match op.kind() { + OperandKind::Use => { + live.set(op.vreg().vreg(), true); + } + OperandKind::Def => { + live.set(op.vreg().vreg(), false); + } + } + } + } + } + } + for &blockparam in state.func.block_params(block) { + live.set(blockparam.vreg(), false); + } + + for &pred in state.func.block_preds(block) { + if state.liveouts[pred.index()].union_with(&live) { + if !workqueue_set.is_set(pred.index()) { + workqueue_set.set(pred.index()); + workqueue.push_back(pred); + } + } + } + + trace!("computed liveins at block{}: {:?}", block.index(), live); + state.liveins[block.index()] = live; + } + + // Check that there are no liveins to the entry block. + if !state.liveins[state.func.entry_block().index()].is_empty() { + trace!( + "non-empty liveins to entry block: {:?}", + state.liveins[state.func.entry_block().index()] + ); + return Err(RegAllocError::EntryLivein); + } + + Ok(()) }