Files
regalloc2/src/ion/fast_alloc.rs
2023-05-05 02:10:23 +02:00

2423 lines
94 KiB
Rust

use alloc::collections::VecDeque;
use alloc::format;
use alloc::vec::Vec;
use alloc::{string::String, vec};
use smallvec::{smallvec, SmallVec};
use std::{convert::TryFrom, println};
use crate::indexset::IndexSet;
use crate::{
cfg::CFGInfo, Allocation, Block, Edit, Function, Inst, MachineEnv, Operand, OperandConstraint,
OperandKind, OperandPos, Output, PReg, PRegSet, ProgPoint, RegAllocError, RegClass, SpillSlot,
VReg,
};
use crate::{domtree, postorder, FxHashSet, InstPosition};
use super::data_structures::u64_key;
use super::Stats;
#[derive(Default, Clone)]
struct VRegData {
pub preg: Option<PReg>,
pub slot_idx: Option<u32>,
pub def_block: Option<Block>,
pub reftype: bool,
// use information
pub cur_use_idx: u32,
pub uses: SmallVec<[u32; 8]>,
}
#[derive(Default, Clone, Copy)]
struct PRegData {
pub vreg: Option<VReg>,
pub stack_pseudo: bool,
}
#[derive(Default, Clone, Copy)]
struct BlockData {
pub reg_allocated: bool,
pub params_allocated: bool,
}
struct ReadOnlyData {
pub postorder: Vec<Block>,
pub idom: Vec<Block>,
pub reg_order_int: Vec<PReg>,
pub reg_order_float: Vec<PReg>,
}
impl ReadOnlyData {
pub fn init<F: Function>(func: &F, mach_env: &MachineEnv) -> Self {
let reg_order_int = {
let class = RegClass::Int as usize;
let amount = mach_env.preferred_regs_by_class[class].len()
+ mach_env.non_preferred_regs_by_class[class].len();
let mut reg_order = Vec::with_capacity(amount);
reg_order.extend_from_slice(&mach_env.preferred_regs_by_class[class]);
reg_order.extend_from_slice(&mach_env.non_preferred_regs_by_class[class]);
reg_order
};
let reg_order_float = {
let class = RegClass::Float as usize;
let amount = mach_env.preferred_regs_by_class[class].len()
+ mach_env.non_preferred_regs_by_class[class].len();
let mut reg_order = Vec::with_capacity(amount);
reg_order.extend_from_slice(&mach_env.preferred_regs_by_class[class]);
reg_order.extend_from_slice(&mach_env.non_preferred_regs_by_class[class]);
reg_order
};
let postorder = postorder::calculate(func.num_blocks(), func.entry_block(), |b| {
func.block_succs(b)
});
let idom = domtree::calculate(
func.num_blocks(),
|b| func.block_preds(b),
&postorder,
func.entry_block(),
);
Self {
reg_order_int,
reg_order_float,
postorder,
idom,
}
}
pub fn reg_order(&self, class: RegClass) -> &[PReg] {
match class {
RegClass::Int => &self.reg_order_int,
RegClass::Float => &self.reg_order_float,
}
}
fn calc_preorder<F: Function>(func: &F) -> Vec<Block> {
let entry = func.entry_block();
let mut ret = vec![entry];
struct State<'a> {
block: Block,
succs: &'a [Block],
next_succ: usize,
}
let mut stack: SmallVec<[State; 64]> = smallvec![];
stack.push(State {
block: entry,
succs: func.block_succs(entry),
next_succ: 0,
});
while let Some(ref mut state) = stack.last_mut() {
if state.next_succ >= state.succs.len() {
stack.pop();
continue;
}
let block = state.succs[state.next_succ];
let succs = func.block_succs(block);
ret.push(block);
state.next_succ += 1;
if state.next_succ >= state.succs.len() {
stack.pop();
}
if !succs.is_empty() {
stack.push(State {
block,
succs: func.block_succs(block),
next_succ: 0,
});
}
}
ret
}
}
// https://burtleburtle.net/bob/rand/smallprng.html
struct PRNG {
a: u64,
b: u64,
c: u64,
d: u64,
}
impl PRNG {
fn new(seed: u64) -> Self {
Self {
a: 0xf1ea5eed,
b: seed,
c: seed,
d: seed,
}
}
fn val(&mut self) -> u64 {
let e = self.a - PRNG::rot(self.b, 27);
self.a = self.b ^ PRNG::rot(self.c, 17);
self.b = self.c + self.d;
self.c = self.d + e;
self.d = e + self.a;
self.d
}
fn rot(x: u64, k: u64) -> u64 {
(x << k) | (x >> (32 - k))
}
}
struct FastAllocState<'a, F: Function> {
pub vregs: Vec<VRegData>,
pub pregs: Vec<PRegData>,
pub blocks: Vec<BlockData>,
pub liveins: Vec<IndexSet>,
pub liveouts: Vec<IndexSet>,
pub cur_stack_slot_idx: u32,
pub reftype_vregs_in_pregs_count: u32,
pub stack_slot_count_int: u8,
pub stack_slot_count_float: u8,
pub cur_inst_pos: usize,
pub allocs: Vec<Allocation>,
pub inst_alloc_offsets: Vec<u32>,
pub edits: Vec<(ProgPoint, Edit)>,
pub safepoint_slots: Vec<(ProgPoint, Allocation)>,
pub reftype_vregs: &'a [VReg],
pub func: &'a F,
pub mach_env: &'a MachineEnv,
pub prng: PRNG,
}
impl<'a, F: Function> FastAllocState<'a, F> {
pub fn init(func: &'a F, mach_env: &'a MachineEnv) -> Self {
let reftype_vregs = func.reftype_vregs();
let vregs = {
let mut vregs = Vec::with_capacity(func.num_vregs());
vregs.resize(func.num_vregs(), VRegData::default());
for vreg in reftype_vregs {
vregs[vreg.vreg()].reftype = true;
}
vregs
};
let pregs = {
let mut pregs = Vec::with_capacity(PReg::NUM_INDEX);
pregs.resize(PReg::NUM_INDEX, PRegData::default());
for preg in &mach_env.fixed_stack_slots {
trace!("{} is a stack pseudo", preg);
pregs[preg.index()].stack_pseudo = true;
}
pregs
};
let blocks = {
let mut blocks = Vec::with_capacity(func.num_blocks());
blocks.resize(func.num_blocks(), BlockData::default());
blocks
};
trace!(
"Num Insts: {} Num Blocks: {}",
func.num_insts(),
func.num_blocks()
);
let mut inst_alloc_offsets = Vec::with_capacity(func.num_insts());
inst_alloc_offsets.resize(func.num_insts(), 0);
// we need to create the alloc array beforehand because it needs to be sorted by inst index
// which we cannot guarantee when iterating through the blocks in reverse post-order
// TODO: this can be folded into any of the other iterations of the blocks by counting the operand counts for each instruction
// globally and writing the op count for each inst into inst_alloc_offsets[idx]
// then just iterate inst_alloc_offsets and adjust the indices
let allocs = {
let block_count = func.num_blocks();
let mut cur_idx = 0;
for i in 0..block_count {
for inst in func.block_insns(Block::new(i)).iter() {
inst_alloc_offsets[inst.index()] = cur_idx as u32;
cur_idx += func.inst_operands(inst).len();
}
}
let mut allocs = Vec::with_capacity(cur_idx);
allocs.resize(cur_idx, Allocation::none());
allocs
};
trace!("InstAllocOffsets: {:?}", inst_alloc_offsets);
trace!("Allocs Len: {}", allocs.len());
let prng = PRNG::new(
(blocks.len() as u64) << 48 | (vregs.len() as u64) << 32 | (allocs.len() as u64),
);
Self {
vregs,
pregs,
blocks,
liveins: Vec::new(),
liveouts: Vec::new(),
cur_stack_slot_idx: 0,
reftype_vregs_in_pregs_count: 0,
cur_inst_pos: 0,
stack_slot_count_int: u8::try_from(func.spillslot_size(RegClass::Int))
.expect("that's a big integer"),
stack_slot_count_float: u8::try_from(func.spillslot_size(RegClass::Float))
.expect("that's a big float"),
allocs,
inst_alloc_offsets,
edits: Vec::new(),
safepoint_slots: Vec::new(),
reftype_vregs,
func,
mach_env,
prng,
}
}
pub fn get_or_alloc_stack_slot(&mut self, vreg: VReg) -> u32 {
if let Some(idx) = self.vregs[vreg.vreg()].slot_idx {
return idx;
}
self.alloc_stack_slot(vreg)
}
pub fn alloc_stack_slot(&mut self, vreg: VReg) -> u32 {
let data = &mut self.vregs[vreg.vreg()];
if data.slot_idx.is_some() {
panic!(
"Trying to allocate already allocated stack slot for {}",
vreg
);
}
let size = if vreg.class() == RegClass::Int {
self.stack_slot_count_int
} else {
self.stack_slot_count_float
};
let idx = self.cur_stack_slot_idx;
trace!("Allocated slot {} for {}", idx, vreg);
self.cur_stack_slot_idx += size as u32;
data.slot_idx = Some(idx);
idx
}
pub fn create_stack_slot(&mut self, class: RegClass) -> u32 {
let size = if class == RegClass::Int {
self.stack_slot_count_int
} else {
self.stack_slot_count_float
};
let idx = self.cur_stack_slot_idx;
self.cur_stack_slot_idx += size as u32;
idx
}
pub fn move_to_preg(&mut self, vreg: VReg, preg: PReg, pos: ProgPoint) {
trace!("Move {} to {} at {:?}", vreg, preg, pos);
/*if let Some(vreg) = &self.pregs[preg.index()].vreg {
let vdata = &mut self.vregs[vreg.vreg() as usize];
debug_assert!(vdata.preg.is_some());
debug_assert_eq!(vdata.preg.unwrap(), preg);
vdata.preg = None;
}*/
if let Some(cur_preg) = &self.vregs[vreg.vreg()].preg {
if *cur_preg == preg {
trace!("{} already in target reg", vreg);
return;
}
trace!("Moving directly from {} to {}", cur_preg, preg);
// Do a reg->reg move
self.edits.push((
pos,
Edit::Move {
from: Allocation::reg(*cur_preg),
to: Allocation::reg(preg),
},
));
// TODO: allow multiple pregs for a single vreg?
let pdata = &mut self.pregs[cur_preg.index()];
debug_assert!(pdata.vreg.is_some());
debug_assert_eq!(pdata.vreg.unwrap().vreg(), vreg.vreg());
pdata.vreg = None;
self.pregs[preg.index()].vreg = Some(vreg);
self.vregs[vreg.vreg()].preg = Some(preg);
return;
}
self.clear_preg(preg);
let vdata = &mut self.vregs[vreg.vreg()];
let pdata = &mut self.pregs[preg.index()];
if vdata.slot_idx.is_none() {
panic!("Trying to move from vreg that has no stack slot to preg");
}
let slot = vdata.slot_idx.unwrap() as usize;
trace!("Moving from slot {}", slot);
self.edits.push((
pos,
Edit::Move {
from: Allocation::stack(SpillSlot::new(slot)),
to: Allocation::reg(preg),
},
));
vdata.preg = Some(preg);
pdata.vreg = Some(vreg);
if vdata.reftype {
self.reftype_vregs_in_pregs_count += 1;
}
}
pub fn move_to_stack(&mut self, preg: PReg, vreg: VReg, pos: ProgPoint) {
trace!("Move {} of {} to stack at {:?}", preg, vreg, pos);
let vdata = &mut self.vregs[vreg.vreg()];
let pdata = &mut self.pregs[preg.index()];
if pdata.vreg.is_none() || vdata.preg.is_none() {
panic!("Trying to move from unallocated preg/vreg to stack");
}
debug_assert_eq!(vdata.preg.unwrap(), preg);
debug_assert_eq!(pdata.vreg.unwrap().vreg(), vreg.vreg());
if vdata.slot_idx.is_none() {
panic!("Trying to move to vreg without stack slot");
}
self.edits.push((
pos,
Edit::Move {
from: Allocation::reg(preg),
to: Allocation::stack(SpillSlot::new(vdata.slot_idx.unwrap() as usize)),
},
));
}
pub fn assign_preg(&mut self, preg: PReg, vreg: VReg) {
trace!("Assigning {} to {}", vreg, preg);
// TODO: somewhere assign_preg is called without making sure the vreg is clear (or inspite of it)
// need to make sure this is intended behavior
self.clear_preg(preg);
self.pregs[preg.index()].vreg = Some(vreg);
self.vregs[vreg.vreg()].preg = Some(preg);
if self.vregs[vreg.vreg()].reftype {
self.reftype_vregs_in_pregs_count += 1;
}
}
pub fn clear_preg(&mut self, preg: PReg) {
self.clear_preg_idx(preg.index());
}
fn clear_preg_idx(&mut self, preg: usize) {
trace!("Clearing preg {}", preg);
let pdata = &mut self.pregs[preg];
if let Some(vreg) = pdata.vreg {
let vdata = &mut self.vregs[vreg.vreg()];
debug_assert_eq!(vdata.preg.unwrap().index(), preg);
vdata.preg = None;
pdata.vreg = None;
if vdata.reftype {
self.reftype_vregs_in_pregs_count -= 1;
}
}
}
pub fn clear_vreg_from_reg(&mut self, vreg: VReg) {
trace!("Clearing vreg {} from reg", vreg);
let vdata = &mut self.vregs[vreg.vreg()];
if let Some(preg) = vdata.preg {
debug_assert!(self.pregs[preg.index()].vreg.is_some());
debug_assert_eq!(self.pregs[preg.index()].vreg.unwrap().vreg(), vreg.vreg());
self.pregs[preg.index()].vreg = None;
vdata.preg = None;
if vdata.reftype {
self.reftype_vregs_in_pregs_count -= 1;
}
}
}
pub fn clear_reftype_vregs(&mut self) {
if self.reftype_vregs_in_pregs_count == 0 {
return;
}
for i in 0..self.pregs.len() {
if let Some(vreg) = self.pregs[i].vreg.clone() {
let vreg = vreg.vreg();
if self.vregs[vreg].reftype {
self.clear_preg_idx(i);
}
}
}
}
pub fn vreg_used_at_cur_inst(&mut self, vreg: VReg) -> bool {
let vdata = &self.vregs[vreg.vreg()];
if vdata.cur_use_idx as usize >= vdata.uses.len() {
return false;
}
vdata.uses[vdata.cur_use_idx as usize] == self.cur_inst_pos as u32
}
pub fn vreg_next_use(&self, vreg: VReg) -> Option<u32> {
let vdata = &self.vregs[vreg.vreg()];
if vdata.cur_use_idx as usize >= vdata.uses.len() {
return None;
}
Some(vdata.uses[vdata.cur_use_idx as usize])
}
}
pub fn run<F: Function>(func: &F, mach_env: &MachineEnv) -> Result<Output, RegAllocError> {
if func.multi_spillslot_named_by_last_slot() {
panic!("MultiSpillslotIndexPos not supported");
}
let mut state = FastAllocState::init(func, mach_env);
let const_state = ReadOnlyData::init(func, mach_env);
calc_use_positions_and_live_bitmaps(&mut state, &const_state)?;
state.blocks[func.entry_block().index()].params_allocated = true;
let len = const_state.postorder.len();
for i in 0..len {
let block = const_state.postorder[len - 1 - i];
if state.blocks[block.index()].reg_allocated {
trace!("Block {} already allocated. Skipping", block.index());
continue;
}
state.blocks[block.index()].reg_allocated = true;
trace!("Allocating block {}", block.index());
trace!("Allocated pregs:");
for i in 0..state.pregs.len() {
if let Some(vreg) = &state.pregs[i].vreg {
trace!("p{}: {}", i, vreg);
}
}
allocate_block_insts(&mut state, &const_state, block)?;
handle_out_block_params(&mut state, &const_state, block)?;
let last_inst = state.func.block_insns(block).last();
if state.func.is_branch(last_inst) {
state.cur_inst_pos += 1;
}
}
// we do not iterate the blocks in their index order so the order of edits might not be sorted by progpoint
// however it should be nearly sorted
state.edits.sort_by_key(|entry| entry.0);
// these might also not be sorted
//state.safepoint_slots.sort_by_key(|entry| entry.0);
state
.safepoint_slots
.sort_unstable_by_key(|(progpoint, slot)| u64_key(progpoint.to_index(), slot.bits()));
if trace_enabled!() {
trace!("Edits:");
for edit in &state.edits {
match edit.1 {
Edit::Move { from, to } => {
trace!("At {:?} from {} to {}", edit.0, from, to);
}
}
}
trace!("StackMaps:");
for entry in &state.safepoint_slots {
trace!("At {:?} at {}", entry.0, entry.1);
}
}
Ok(Output {
num_spillslots: state.cur_stack_slot_idx as usize,
edits: state.edits,
allocs: state.allocs,
inst_alloc_offsets: state.inst_alloc_offsets,
safepoint_slots: state.safepoint_slots,
debug_locations: Vec::new(),
stats: Stats::default(),
})
}
fn vreg_killed<'a, F: Function>(
state: &FastAllocState<'a, F>,
inst: Inst,
block: Block,
block_last_inst: usize,
vreg: usize,
// if the vreg is used at the current instruction, count it as not killed
// TODO: this is currently always true but can be used for optimization later on
save_on_current_use: bool,
) -> bool {
let info = &state.vregs[vreg];
let block_after_pos = state.cur_inst_pos + (block_last_inst - inst.index()) + 1;
let cur_use_idx = info.cur_use_idx as usize;
let cur_pos = if save_on_current_use {
state.cur_inst_pos
} else {
state.cur_inst_pos + 1
};
trace!(
"Checking live-status of v{} in {:?} at inst {:?} (CurPos: {} SaveCurPos? {}): Liveout {}, block_after: {}",
vreg,
block,
inst,
state.cur_inst_pos,
save_on_current_use,
state.liveouts[block.index()].get(vreg),
block_after_pos
);
trace!(
"Uses of v{}: {:?}. Currently at {}",
vreg,
info.uses,
info.cur_use_idx
);
if !state.liveouts[block.index()].get(vreg) {
if info.uses.len() <= cur_use_idx {
trace!("Uses exhausted, vreg must be dead");
return true;
}
if info.uses.len() <= cur_use_idx + 1 {
trace!("next use: {}, no use after that", info.uses[cur_use_idx]);
} else {
trace!(
"next use: {}, {}",
info.uses[cur_use_idx],
info.uses[cur_use_idx + 1]
);
}
if !save_on_current_use && info.uses[cur_use_idx] == state.cur_inst_pos as u32 {
if info.uses.len() <= cur_use_idx + 1
|| info.uses[cur_use_idx + 1] >= block_after_pos as u32
{
trace!("v{} is killed", vreg);
return true;
}
}
}
return false;
}
fn allocate_block_insts<'a, F: Function>(
state: &mut FastAllocState<'a, F>,
const_state: &ReadOnlyData,
block: Block,
) -> Result<(), RegAllocError> {
let block_last_inst_idx = state.func.block_insns(block).last().index();
for inst in state.func.block_insns(block).iter() {
let edit_start_idx = state.edits.len();
let clobbers = state.func.inst_clobbers(inst);
let operands = state.func.inst_operands(inst);
let req_refs_on_stack = state.func.requires_refs_on_stack(inst);
let alloc_idx = state.inst_alloc_offsets[inst.index()] as usize;
trace!(
"Allocating Inst {} (refs_on_stack: {}, is_ret: {}, is_branch: {}, alloc_idx: {})",
inst.index(),
req_refs_on_stack,
state.func.is_ret(inst),
state.func.is_branch(inst),
alloc_idx
);
if trace_enabled!() {
let mut str = String::new();
for preg in clobbers {
if str.is_empty() {
str.push_str(&format!("{}", preg));
} else {
str.push_str(&format!(", {}", preg));
}
}
trace!("Clobbers: {}", str);
}
// keep track of which pregs where allocated so we can clear them later on
// TODO: wouldnt need this if we look up the inst a vreg was allocated at
let mut regs_allocated = PRegSet::empty();
// keep track of which pregs hold late uses/early writes and so are unelligible
// as destinations for late writes
let mut late_write_disallow_regs = PRegSet::empty();
// we need to keep track of late defs allocated during the fixed reg stage
// as they may not overlap with late uses and there is no order guarantee for inst_operands
let mut late_write_regs = PRegSet::empty();
if req_refs_on_stack {
state.clear_reftype_vregs();
let pos = ProgPoint::before(inst);
trace!("Calculating Stackmap for {:?}", pos);
for vreg in state.reftype_vregs {
let data = &state.vregs[vreg.vreg()];
if let Some(slot) = data.slot_idx {
if domtree::dominates(&const_state.idom, data.def_block.unwrap(), block) {
trace!("Marking vreg {} as saved on stack at {}", vreg, slot);
state
.safepoint_slots
.push((pos, Allocation::stack(SpillSlot::new(slot as usize))));
} else {
trace!("Skipping {} as it does not dominate", vreg);
}
}
}
}
for preg in clobbers {
// TODO: this might save a use that is killed at this inst i think
let vreg = if let Some(vreg) = &state.pregs[preg.index()].vreg {
*vreg
} else {
continue;
};
if state.vregs[vreg.vreg()].slot_idx.is_some() {
trace!("{} with {} clobbered but saved on stack", preg, vreg);
state.clear_preg(preg);
continue;
}
if !state.vreg_used_at_cur_inst(vreg)
&& vreg_killed(state, inst, block, block_last_inst_idx, vreg.vreg(), true)
{
trace!("{} with {} clobbered but vreg killed", preg, vreg);
state.clear_preg(preg);
continue;
}
state.alloc_stack_slot(vreg);
state.move_to_stack(preg, vreg, ProgPoint::before(inst));
state.clear_preg(preg);
}
// we allocate fixed defs/uses and stack allocations first
// TODO: if a fixed def is before a fixed use with the same preg here, it will incorrectly update state!!!
trace!("First alloc pass");
for (i, op) in operands.iter().enumerate() {
let vreg = op.vreg();
trace!("Operand {}: {}", i, op);
if op.as_fixed_nonallocatable().is_some() {
// it seems cranelift emits fixed reg uses with invalid vregs, handle them here
// TODO: treat them like normal vregs by just using last_vreg_index+1 for them?
match op.constraint() {
OperandConstraint::FixedReg(reg) => {
// Save vreg if needed
if let Some(vreg) = state.pregs[reg.index()].vreg {
let vreg_idx = vreg.vreg();
if state.vregs[vreg_idx].slot_idx.is_none()
&& (state.vreg_used_at_cur_inst(vreg)
|| !vreg_killed(
state,
inst,
block,
block_last_inst_idx,
vreg_idx,
true,
))
{
let slot = state.create_stack_slot(reg.class());
state.vregs[vreg_idx].slot_idx = Some(slot);
state.edits.push((
ProgPoint::before(inst),
Edit::Move {
from: Allocation::reg(reg),
to: Allocation::stack(SpillSlot::new(slot as usize)),
},
));
}
}
state.clear_preg(reg);
regs_allocated.add(reg);
state.allocs[alloc_idx + i] = Allocation::reg(reg);
trace!("Chose {} for operand {}", reg, i);
late_write_disallow_regs.add(reg);
}
_ => {
panic!(
"Invalid op constraint {:?} for invalid vreg",
op.constraint()
);
}
}
continue;
}
match op.constraint() {
OperandConstraint::FixedReg(reg) => {
match op.kind() {
OperandKind::Use => {
if req_refs_on_stack && state.vregs[vreg.vreg()].reftype {
panic!("reftype has fixed use when its required to be on stack");
return Err(RegAllocError::TooManyLiveRegs);
}
// TODO: make this proper
if regs_allocated.contains(reg) {
// if the reg was allocated by another early use/write or late use
// OR it is allocated and we have a late use we cannot do a correct allocation
if op.pos() == OperandPos::Late || !late_write_regs.contains(reg) {
panic!("fixed reg late use would overwrite another fixed reg use/early write");
return Err(RegAllocError::TooManyLiveRegs);
}
}
// are we already in the correct reg?
if let Some(cur_preg) = state.vregs[vreg.vreg()].preg {
if cur_preg == reg {
trace!("{} already in target {}", vreg, cur_preg);
state.allocs[alloc_idx + i] = Allocation::reg(cur_preg);
continue;
}
}
// Save vreg if needed
if let Some(vreg) = state.pregs[reg.index()].vreg {
let vreg_idx = vreg.vreg();
if state.vregs[vreg_idx].slot_idx.is_none()
&& (state.vreg_used_at_cur_inst(vreg)
|| !vreg_killed(
state,
inst,
block,
block_last_inst_idx,
vreg_idx,
true,
))
{
let slot = state.create_stack_slot(reg.class());
state.vregs[vreg_idx].slot_idx = Some(slot);
state.edits.push((
ProgPoint::before(inst),
Edit::Move {
from: Allocation::reg(reg),
to: Allocation::stack(SpillSlot::new(slot as usize)),
},
));
}
}
if let Some(cur_preg) = state.vregs[vreg.vreg()].preg {
trace!("Move {} directly from {} to {}", vreg, cur_preg, reg);
// Move from preg to preg
state.edits.push((
ProgPoint::before(inst),
Edit::Move {
from: Allocation::reg(cur_preg),
to: Allocation::reg(reg),
},
));
debug_assert_eq!(
state.pregs[cur_preg.index()].vreg.unwrap().vreg(),
vreg.vreg()
);
state.pregs[cur_preg.index()].vreg = None;
state.assign_preg(reg, vreg);
} else {
state.move_to_preg(vreg, reg, ProgPoint::before(inst));
}
state.allocs[alloc_idx + i] = Allocation::reg(reg);
if op.pos() == OperandPos::Late {
if clobbers.contains(reg) {
panic!("fixed late use would be clobbered");
return Err(RegAllocError::TooManyLiveRegs);
}
trace!("Operand {}'s allocation may not be used by a late def", i);
// late uses cannot share a register with late defs
late_write_disallow_regs.add(reg);
}
regs_allocated.add(reg);
trace!("Chose {} for operand {}", reg, i);
}
OperandKind::Def => {
if op.pos() == OperandPos::Late {
if late_write_disallow_regs.contains(reg) {
panic!("fixed late def would overwrite late use/early def");
return Err(RegAllocError::TooManyLiveRegs);
}
late_write_regs.add(reg);
} else {
if state.pregs[reg.index()].vreg.is_some() || clobbers.contains(reg)
{
// early defs cannot share a register with anything and cannot be clobbered
panic!("early def shares reg or is clobbered");
return Err(RegAllocError::TooManyLiveRegs);
}
trace!("Operand {}'s allocation may not be used by a late def", i);
// early defs cannot share a register with late defs
late_write_disallow_regs.add(reg);
}
// Save vreg if needed
if let Some(vreg) = state.pregs[reg.index()].vreg {
let vreg_idx = vreg.vreg();
if state.vregs[vreg_idx].slot_idx.is_none()
&& (op.pos() != OperandPos::Late
&& state.vreg_used_at_cur_inst(vreg)
|| !vreg_killed(
state,
inst,
block,
block_last_inst_idx,
vreg_idx,
true,
))
{
let slot = state.create_stack_slot(reg.class());
state.vregs[vreg_idx].slot_idx = Some(slot);
state.edits.push((
ProgPoint::before(inst),
Edit::Move {
from: Allocation::reg(reg),
to: Allocation::stack(SpillSlot::new(slot as usize)),
},
));
}
}
state.vregs[vreg.vreg()].def_block = Some(block);
state.allocs[alloc_idx + i] = Allocation::reg(reg);
state.assign_preg(reg, vreg);
// some pseudoops use the pseudo stack pregs as defs
if state.pregs[reg.index()].stack_pseudo {
// find preg to use as a tmp register
let mut pregs = PRegSet::empty();
for reg in const_state.reg_order(vreg.class()) {
if state.pregs[reg.index()].vreg.is_some() {
continue;
}
pregs.add(*reg);
}
for op in operands {
match op.constraint() {
OperandConstraint::FixedReg(reg) => {
if op.kind() == OperandKind::Use
&& op.pos() == OperandPos::Early
{
continue;
}
pregs.remove(reg);
}
_ => {}
}
}
if pregs == PRegSet::empty() {
panic!("No way to solve pseudo-stack preg");
}
// Move from pseudoreg to tmp_reg and then to stack
let tmp_reg = pregs.into_iter().next().unwrap();
if let Some(vreg) = state.pregs[tmp_reg.index()].vreg {
// Save vreg if needed
let vreg_idx = vreg.vreg();
if state.vregs[vreg_idx].slot_idx.is_none()
&& (op.pos() != OperandPos::Late
&& state.vreg_used_at_cur_inst(vreg)
|| !vreg_killed(
state,
inst,
block,
block_last_inst_idx,
vreg_idx,
true,
))
{
let slot = state.create_stack_slot(reg.class());
state.vregs[vreg_idx].slot_idx = Some(slot);
state.edits.push((
ProgPoint::before(inst),
Edit::Move {
from: Allocation::reg(tmp_reg),
to: Allocation::stack(SpillSlot::new(
slot as usize,
)),
},
));
}
state.clear_preg(tmp_reg);
}
state.edits.push((
ProgPoint::after(inst),
Edit::Move {
from: Allocation::reg(reg),
to: Allocation::reg(tmp_reg),
},
));
if state.pregs[reg.index()].vreg.is_some() {
state.clear_preg(reg);
}
state.assign_preg(tmp_reg, vreg);
state.move_to_stack(tmp_reg, vreg, ProgPoint::after(inst));
regs_allocated.add(tmp_reg);
} else {
state.alloc_stack_slot(vreg);
state.move_to_stack(reg, vreg, ProgPoint::after(inst));
regs_allocated.add(reg);
}
trace!("Chose {} for operand {}", reg, i);
}
}
}
OperandConstraint::Stack | OperandConstraint::Any => {
// we allocate Any on the stack for now
match op.kind() {
OperandKind::Use => {
if let Some(slot) = &state.vregs[vreg.vreg()].slot_idx {
state.allocs[alloc_idx + i] =
Allocation::stack(SpillSlot::new(*slot as usize));
trace!("Chose slot {} for operand {}", slot, i);
} else {
return Err(RegAllocError::SSA(vreg, inst));
}
}
OperandKind::Def => {
state.vregs[vreg.vreg()].def_block = Some(block);
let slot = state.alloc_stack_slot(vreg);
state.allocs[alloc_idx + i] =
Allocation::stack(SpillSlot::new(slot as usize));
trace!("Chose slot {} for operand {}", slot, i);
}
}
}
_ => continue,
}
}
// alloc non-fixed uses and early defs in registers
trace!("Second alloc pass");
for (i, op) in operands.iter().enumerate() {
if op.kind() == OperandKind::Def && op.pos() == OperandPos::Late {
continue;
}
trace!("Operand {}: {}", i, op);
let vreg = op.vreg();
if op.as_fixed_nonallocatable().is_some() {
continue;
}
match op.constraint() {
OperandConstraint::Reg => {
// Are we alredy in a reg?
if let Some(cur_preg) = &state.vregs[vreg.vreg()].preg {
assert_eq!(op.kind(), OperandKind::Use);
// Late uses need to survive the instruction
if op.pos() == OperandPos::Early || !clobbers.contains(*cur_preg) {
trace!("{} already in reg {}. Using that", vreg, cur_preg);
state.allocs[alloc_idx + i] = Allocation::reg(*cur_preg);
regs_allocated.add(*cur_preg);
continue;
}
}
// find first non-allocated register
let reg_order = const_state.reg_order(op.class());
let mut allocated = false;
for &reg in reg_order {
if regs_allocated.contains(reg) {
continue;
}
if let Some(cur_vreg) = &state.pregs[reg.index()].vreg {
// we can override the reg if the vreg was killed already
if !vreg_killed(
state,
inst,
block,
block_last_inst_idx,
cur_vreg.vreg(),
true,
) {
continue;
}
state.clear_preg(reg);
}
// reg should not contain anything
debug_assert!(state.pregs[reg.index()].vreg.is_none());
if op.kind() == OperandKind::Use
&& op.pos() == OperandPos::Late
&& clobbers.contains(reg)
{
continue;
}
state.allocs[alloc_idx + i] = Allocation::reg(reg);
regs_allocated.add(reg);
trace!("Chose {} for operand {}", reg, i);
if op.kind() == OperandKind::Use {
if req_refs_on_stack && state.vregs[vreg.vreg()].reftype {
panic!("reftype required to be in reg at safepoint");
return Err(RegAllocError::TooManyLiveRegs);
}
// need to move from stack to reg
state.move_to_preg(vreg, reg, ProgPoint::before(inst));
} else {
// early def
state.vregs[vreg.vreg()].def_block = Some(block);
state.assign_preg(reg, vreg);
state.alloc_stack_slot(vreg);
state.move_to_stack(reg, vreg, ProgPoint::after(inst));
}
allocated = true;
break;
}
if allocated {
continue;
}
trace!("No free register found for operand {}", i);
// No register available
// TODO: first evict pregs that already have a stack slot even if they are used earlier?
let mut evict_candidate = None;
let mut ffa_reg_pool = PRegSet::empty();
for &reg in reg_order {
if regs_allocated.contains(reg) {
continue;
}
if op.kind() == OperandKind::Use
&& op.pos() == OperandPos::Late
&& clobbers.contains(reg)
{
continue;
}
debug_assert!(state.pregs[reg.index()].vreg.is_some());
let vreg = state.pregs[reg.index()].vreg.unwrap();
if let Some(next_use) = state.vreg_next_use(vreg) {
if next_use == state.cur_inst_pos as u32 {
continue;
}
if let Some((_, pos)) = &evict_candidate {
if *pos < next_use {
evict_candidate = Some((reg, next_use));
}
} else {
evict_candidate = Some((reg, next_use));
}
} else {
// see further below
ffa_reg_pool.add(reg);
//panic!("preg should have already been chosen")
}
}
// TODO: we need some logic to shuffle assignments around if there is a late use that needs to survive a clobber
// and another reg is available but taken by an early use so it would not be an eviction_candidate
if let Some((reg, next_use)) = evict_candidate {
// Save vreg if needed
{
let vreg = state.pregs[reg.index()].vreg.unwrap();
trace!("Evicting {} with v{}", reg, vreg);
if state.vregs[vreg.vreg()].slot_idx.is_none()
&& !vreg_killed(
state,
inst,
block,
block_last_inst_idx,
vreg.vreg(),
true,
)
{
let slot = state.create_stack_slot(reg.class());
state.vregs[vreg.vreg()].slot_idx = Some(slot);
state.edits.push((
ProgPoint::before(inst),
Edit::Move {
from: Allocation::reg(reg),
to: Allocation::stack(SpillSlot::new(slot as usize)),
},
));
}
}
state.clear_preg(reg);
state.allocs[alloc_idx + i] = Allocation::reg(reg);
regs_allocated.add(reg);
if op.kind() == OperandKind::Use {
if req_refs_on_stack && state.vregs[vreg.vreg()].reftype {
panic!("reftype required to be in reg at safepoint");
return Err(RegAllocError::TooManyLiveRegs);
}
// need to move from stack to reg
state.move_to_preg(vreg, reg, ProgPoint::before(inst));
} else {
// early def
state.vregs[vreg.vreg()].def_block = Some(block);
state.assign_preg(reg, vreg);
state.alloc_stack_slot(vreg);
state.move_to_stack(reg, vreg, ProgPoint::after(inst));
}
trace!("Chose {} for operand {}", reg, i);
} else {
if ffa_reg_pool == PRegSet::empty() {
panic!("Out of registers: {:?}", regs_allocated);
return Err(RegAllocError::TooManyLiveRegs);
}
let preg = 'block: {
let len = ffa_reg_pool.bits.count_ones() as usize;
let mut idx = (state.prng.val() as usize % 128) % len;
for preg in ffa_reg_pool.into_iter() {
if idx == 0 {
break 'block preg;
}
idx -= 1;
}
panic!("I can't do math");
};
trace!("Chose {} from ffa_reg_pool", preg);
{
let vreg = state.pregs[preg.index()].vreg.unwrap();
// need to save vreg if it does not have a slot
if state.vregs[vreg.vreg()].slot_idx.is_none() {
let slot = state.create_stack_slot(preg.class());
state.vregs[vreg.vreg()].slot_idx = Some(slot);
state.edits.push((
ProgPoint::before(inst),
Edit::Move {
from: Allocation::reg(preg),
to: Allocation::stack(SpillSlot::new(slot as usize)),
},
));
}
}
state.clear_preg(preg);
state.allocs[alloc_idx + i] = Allocation::reg(preg);
regs_allocated.add(preg);
if op.kind() == OperandKind::Use {
if req_refs_on_stack && state.vregs[vreg.vreg()].reftype {
panic!("reftype required to be in reg at safepoint");
return Err(RegAllocError::TooManyLiveRegs);
}
// need to move from stack to reg
state.move_to_preg(vreg, preg, ProgPoint::before(inst));
} else {
// early def
state.vregs[vreg.vreg()].def_block = Some(block);
state.assign_preg(preg, vreg);
state.alloc_stack_slot(vreg);
state.move_to_stack(preg, vreg, ProgPoint::after(inst));
}
trace!("Chose {} for operand {}", preg, i);
}
}
OperandConstraint::Reuse(_) => {
panic!("Illegal register constraint reuse for early def or use");
}
_ => {}
}
}
// advance use_idx
for op in operands {
if op.kind() != OperandKind::Use || op.as_fixed_nonallocatable().is_some() {
continue;
}
let vreg_idx = op.vreg().vreg();
let info = &mut state.vregs[vreg_idx];
info.cur_use_idx += 1;
if vreg_killed(state, inst, block, block_last_inst_idx, vreg_idx, true) {
// TODO: clear stack slot
state.clear_vreg_from_reg(op.vreg());
}
/*let block_after_pos = state.cur_inst_pos + (block_last_inst_idx - inst.index()) + 1;
// check if vreg dies here
if !state.liveouts[block.index()].get(vreg_idx)
&& (info.uses.len() <= info.cur_use_idx as usize
|| info.uses[info.cur_use_idx as usize] > block_after_pos as u32)
{
// TODO: clear stack slot
state.clear_vreg_from_reg(op.vreg());
}*/
}
// TODO: this is currently a fix for the register state for uses that are clobbered as it is incorrectly set
// but this is inefficient as we could check for this when handling uses
trace!("Late clobber handling");
for preg in clobbers {
// TODO: this might save a use that is killed at this inst i think
let vreg = if let Some(vreg) = &state.pregs[preg.index()].vreg {
*vreg
} else {
continue;
};
if state.vregs[vreg.vreg()].slot_idx.is_some() {
trace!("{} with {} clobbered but saved on stack", preg, vreg);
state.clear_preg(preg);
continue;
}
// we don't care if the reg is used at the current inst
if vreg_killed(state, inst, block, block_last_inst_idx, vreg.vreg(), true) {
trace!("{} with {} clobbered but vreg killed", preg, vreg);
state.clear_preg(preg);
continue;
}
// TODO: this should not be hit i think as all we should be clearing here are use assignments and the vregs
// that need to be saved should have been saved at the check before
state.alloc_stack_slot(vreg);
state.move_to_stack(preg, vreg, ProgPoint::before(inst));
state.clear_preg(preg);
}
// alloc non-fixed late defs and reuse
trace!("Third alloc pass");
for (i, op) in operands.iter().enumerate() {
if op.kind() != OperandKind::Def || op.pos() != OperandPos::Late {
continue;
}
trace!("Operand {}: {}", i, op);
let vreg = op.vreg();
if op.as_fixed_nonallocatable().is_some() {
continue;
}
state.vregs[vreg.vreg()].def_block = Some(block);
match op.constraint() {
OperandConstraint::Reg => {
// find first non-allocated register
let reg_order = const_state.reg_order(op.class());
let mut allocated = false;
for &reg in reg_order {
if regs_allocated.contains(reg) || late_write_disallow_regs.contains(reg) {
continue;
}
if let Some(cur_vreg) = &state.pregs[reg.index()].vreg {
// we can override the reg if the vreg was killed already
if !vreg_killed(
state,
inst,
block,
block_last_inst_idx,
cur_vreg.vreg(),
true,
) {
continue;
}
state.clear_preg(reg);
}
// reg should not contain anything
regs_allocated.add(reg);
state.allocs[alloc_idx + i] = Allocation::reg(reg);
state.clear_preg(reg);
state.assign_preg(reg, vreg);
state.alloc_stack_slot(vreg);
state.move_to_stack(reg, vreg, ProgPoint::after(inst));
trace!("Chose {} for operand {}", reg, i);
allocated = true;
break;
}
if allocated {
continue;
}
trace!("No free register found for {}", vreg);
// TODO: first evict pregs that already have a stack slot even if they are used earlier?
let mut evict_candidate = None;
let mut ffa_reg_pool = PRegSet::empty();
for &reg in reg_order {
if regs_allocated.contains(reg) || late_write_disallow_regs.contains(reg) {
continue;
}
debug_assert!(state.pregs[reg.index()].vreg.is_some());
let vreg = state.pregs[reg.index()].vreg.unwrap();
if let Some(next_use) = state.vreg_next_use(vreg) {
if next_use == state.cur_inst_pos as u32 {
continue;
}
if let Some((_, pos)) = &evict_candidate {
if *pos < next_use {
evict_candidate = Some((reg, next_use));
}
} else {
evict_candidate = Some((reg, next_use));
}
} else {
// if we hit this it means that all uses are "before" this one in lowering-order
// we should probably find a nice heuristic for chosing which register to choose
// here. tbf we should probably find an overall better heuristic for chosing which register to evict
// rn just add the reg to a set and we pick a random one later
ffa_reg_pool.add(reg);
}
}
if let Some((reg, next_use)) = evict_candidate {
// Save vreg if needed
{
let vreg = state.pregs[reg.index()].vreg.unwrap();
trace!("Evicting {} with {}", reg, vreg);
if state.vregs[vreg.vreg()].slot_idx.is_none()
&& !vreg_killed(
state,
inst,
block,
block_last_inst_idx,
vreg.vreg(),
true,
)
{
let slot = state.create_stack_slot(reg.class());
state.vregs[vreg.vreg()].slot_idx = Some(slot);
state.edits.push((
ProgPoint::before(inst),
Edit::Move {
from: Allocation::reg(reg),
to: Allocation::stack(SpillSlot::new(slot as usize)),
},
));
}
}
state.clear_preg(reg);
regs_allocated.add(reg);
state.allocs[alloc_idx + i] = Allocation::reg(reg);
state.assign_preg(reg, vreg);
state.alloc_stack_slot(vreg);
state.move_to_stack(reg, vreg, ProgPoint::after(inst));
trace!("Chose {} for operand {}", reg, i);
} else {
if ffa_reg_pool == PRegSet::empty() {
panic!("Out of registers: {:?}", regs_allocated);
return Err(RegAllocError::TooManyLiveRegs);
}
let preg = 'block: {
let len = ffa_reg_pool.bits.count_ones() as usize;
let mut idx = (state.prng.val() as usize % 128) % len;
for preg in ffa_reg_pool.into_iter() {
if idx == 0 {
break 'block preg;
}
idx -= 1;
}
panic!("I can't do math");
};
trace!("Chose {} from ffa_reg_pool", preg);
{
let vreg = state.pregs[preg.index()].vreg.unwrap();
// need to save vreg if it does not have a slot
if state.vregs[vreg.vreg()].slot_idx.is_none() {
let slot = state.create_stack_slot(preg.class());
state.vregs[vreg.vreg()].slot_idx = Some(slot);
state.edits.push((
ProgPoint::before(inst),
Edit::Move {
from: Allocation::reg(preg),
to: Allocation::stack(SpillSlot::new(slot as usize)),
},
));
}
}
state.clear_preg(preg);
state.allocs[alloc_idx + i] = Allocation::reg(preg);
regs_allocated.add(preg);
if op.kind() == OperandKind::Use {
if req_refs_on_stack && state.vregs[vreg.vreg()].reftype {
panic!("reftype required to be in reg at safepoint");
return Err(RegAllocError::TooManyLiveRegs);
}
// need to move from stack to reg
state.move_to_preg(vreg, preg, ProgPoint::before(inst));
} else {
// early def
state.vregs[vreg.vreg()].def_block = Some(block);
state.assign_preg(preg, vreg);
state.alloc_stack_slot(vreg);
state.move_to_stack(preg, vreg, ProgPoint::after(inst));
}
trace!("Chose {} for operand {}", preg, i);
}
}
OperandConstraint::Reuse(idx) => {
debug_assert!(state.allocs[alloc_idx + idx].is_reg());
let preg = state.allocs[alloc_idx + idx].as_reg().unwrap();
debug_assert!(regs_allocated.contains(preg));
state.allocs[alloc_idx + i] = Allocation::reg(preg);
// Save vreg on stack if it is not killed
if let Some(vreg) = state.pregs[preg.index()].vreg {
let vreg_idx = vreg.vreg();
if state.vregs[vreg_idx].slot_idx.is_none()
&& !vreg_killed(state, inst, block, block_last_inst_idx, vreg_idx, true)
{
trace!("Saving {}", vreg);
let slot = state.create_stack_slot(preg.class());
state.vregs[vreg_idx].slot_idx = Some(slot);
state.edits.push((
ProgPoint::before(inst),
Edit::Move {
from: Allocation::reg(preg),
to: Allocation::stack(SpillSlot::new(slot as usize)),
},
));
}
}
state.clear_preg(preg);
state.assign_preg(preg, vreg);
//state.alloc_stack_slot(vreg);
//state.move_to_stack(preg, vreg, ProgPoint::after(inst));
}
_ => {
debug_assert!(!state.allocs[alloc_idx + i].is_none());
}
}
}
// fixup edit order
let mut first_post_pos = None;
for i in edit_start_idx..state.edits.len() {
debug_assert!(state.edits[i].0.inst() == inst);
match first_post_pos {
None => {
if state.edits[i].0.pos() == InstPosition::After {
first_post_pos = Some(i);
}
}
Some(pos) => {
if state.edits[i].0.pos() == InstPosition::Before {
state.edits.swap(pos, i);
first_post_pos = Some(pos + 1);
}
}
}
}
assert!(!state.allocs[alloc_idx..alloc_idx + operands.len()]
.iter()
.any(|a| a.is_none()));
trace!(
"Instruction Allocs: {:?}",
&state.allocs[alloc_idx..alloc_idx + operands.len()]
);
state.cur_inst_pos += 1;
}
// Move all liveout/block param vregs to a stack slot if they dont have one and clear pregs
for i in 0..state.pregs.len() {
match state.pregs[i].vreg {
None => {}
Some(vreg) => {
trace!("Clearing {} from p{}", vreg, i);
let idx = vreg.vreg();
// TODO: obv dont need that if the block param handle funcs can handle reg locations
let is_out_param = 'block: {
let last_inst = state.func.block_insns(block).last();
if !state.func.is_branch(last_inst) {
break 'block false;
}
for succ_idx in 0..state.func.block_succs(block).len() {
for out_vreg in state.func.branch_blockparams(block, last_inst, succ_idx) {
if *out_vreg == vreg {
break 'block true;
}
}
}
false
};
if (is_out_param || state.liveouts[block.index()].get(idx))
&& state.vregs[idx].slot_idx.is_none()
{
let preg = state.vregs[idx].preg.unwrap();
let slot = state.create_stack_slot(preg.class());
state.edits.push((
ProgPoint::before(Inst::new(block_last_inst_idx)),
Edit::Move {
from: Allocation::reg(preg),
to: Allocation::stack(SpillSlot::new(slot as usize)),
},
));
state.vregs[idx].slot_idx = Some(slot);
}
state.vregs[idx].preg = None;
state.pregs[i].vreg = None;
}
}
}
Ok(())
}
fn handle_out_block_params<'a, F: Function>(
state: &mut FastAllocState<'a, F>,
const_state: &ReadOnlyData,
block: Block,
) -> Result<(), RegAllocError> {
let last_inst = state.func.block_insns(block).last();
trace!(
"Allocating outgoing blockparams for {}, last_inst: {}",
block.index(),
last_inst.index()
);
if !state.func.is_branch(last_inst) {
trace!("Last inst {} is not a branch", last_inst.index());
return Ok(());
}
let mut pregs_used_by_br = PRegSet::empty();
{
let alloc_start = state.inst_alloc_offsets[last_inst.index()] as usize;
let alloc_end = if last_inst.index() + 1 == state.inst_alloc_offsets.len() {
state.allocs.len()
} else {
state.inst_alloc_offsets[last_inst.index() + 1] as usize
};
trace!("alloc_start: {}, alloc_end: {}", alloc_start, alloc_end);
for i in alloc_start..alloc_end {
if let Some(reg) = state.allocs[i].clone().as_reg() {
pregs_used_by_br.add(reg);
}
}
}
// wouldn't need this if the edits for this were made before the moves for the branch inst but that has its own share of problems i think
let tmp_reg_int = 'block: {
for reg in const_state.reg_order(RegClass::Int) {
if !pregs_used_by_br.contains(*reg) {
break 'block *reg;
}
}
panic!("No usable tmp_reg for block param handling");
};
let tmp_reg_float = 'block: {
for reg in const_state.reg_order(RegClass::Float) {
if !pregs_used_by_br.contains(*reg) {
break 'block *reg;
}
}
panic!("No usable tmp_reg for block param handling");
};
let succs = state.func.block_succs(block);
if succs.len() == 1 && state.blocks[succs[0].index()].params_allocated {
trace!("Only one allocated successor, moving allocations");
let succ = succs[0];
// move values to the already allocated places
let in_params = state.func.block_params(succ);
let out_params = state.func.branch_blockparams(block, last_inst, 0);
debug_assert_eq!(in_params.len(), out_params.len());
assert!(in_params.len() < 254);
if in_params.is_empty() {
trace!("No params. Skipping");
return Ok(());
}
if in_params.len() > 254 {
panic!("unsupported block argument length");
}
// TODO: this is a really dumb way to handle cycles/chains
// need a better algo
/*let mut tmp_slots: SmallVec<[u32; 4]> = SmallVec::new();
for i in 0..out_params.len() {
let out_vreg = out_params[i];
let out_slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap();
if out_vreg == VReg::invalid() {
panic!("")
}
let tmp_slot = state.create_stack_slot(out_vreg.class());
let tmp_reg = if out_vreg.class() == RegClass::Int {
tmp_reg_int
} else {
tmp_reg_float
};
tmp_slots.push(tmp_slot);
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::stack(SpillSlot::new(out_slot as usize)),
to: Allocation::reg(tmp_reg),
},
));
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::reg(tmp_reg),
to: Allocation::stack(SpillSlot::new(tmp_slot as usize)),
},
));
}
for i in 0..out_params.len() {
let out_vreg = out_params[i];
let in_vreg = in_params[i];
let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap();
let tmp_slot = tmp_slots[i];
let tmp_reg = if out_vreg.class() == RegClass::Int {
tmp_reg_int
} else {
tmp_reg_float
};
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::stack(SpillSlot::new(tmp_slot as usize)),
to: Allocation::reg(tmp_reg),
},
));
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::reg(tmp_reg),
to: Allocation::stack(SpillSlot::new(in_slot as usize)),
},
));
}*/
let mut depends: SmallVec<[SmallVec<[u8; 2]>; 4]> = SmallVec::new();
depends.resize(out_params.len(), SmallVec::new());
let mut depends_rev: SmallVec<[SmallVec<[u8; 2]>; 4]> = SmallVec::new();
depends_rev.resize(out_params.len(), SmallVec::new());
let mut params_left: SmallVec<[u8; 8]> = SmallVec::new();
let mut remap: SmallVec<[Option<u32>; 8]> = SmallVec::new();
remap.resize(out_params.len(), None);
for i in 0..in_params.len() {
params_left.push(i as u8);
}
// out_slot -> in_slot
// if an in_slot is used as an out_slot, the in_slot may only be override once the out_slot is done
for i in 0..out_params.len() {
let out_slot = state.vregs[out_params[i].vreg()].slot_idx.unwrap();
for j in 0..in_params.len() {
let in_slot = state.vregs[in_params[j].vreg()].slot_idx.unwrap();
if i == j {
continue;
}
if out_slot == in_slot {
depends[j].push(i as u8);
depends_rev[i].push(j as u8);
}
}
}
// 5->3
// 3->6
// 6->5
// depends: [1,2,0]
//
while !params_left.is_empty() {
let count = params_left.len();
// Check if any non-dependent block argument can be written
let mut i = 0;
while i < params_left.len() {
let param_idx = params_left[i] as usize;
if !depends[param_idx].is_empty() {
i += 1;
continue;
}
params_left.swap_remove(i);
/*for depend_idx in depends_rev[param_idx] {
depends[depend_idx].re
}*/
for entry in &mut depends {
entry.retain(|idx| *idx as usize != param_idx);
}
let out_vreg = out_params[param_idx];
let in_vreg = in_params[param_idx];
let out_slot = match remap[param_idx] {
Some(idx) => idx,
None => state.vregs[out_vreg.vreg()].slot_idx.unwrap(),
};
let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap();
assert_eq!(out_vreg.class(), in_vreg.class());
let tmp_reg = if out_vreg.class() == RegClass::Int {
tmp_reg_int
} else {
tmp_reg_float
};
trace!(
"Move {} from slot {} to slot {} for {}",
out_vreg,
out_slot,
in_slot,
in_vreg
);
if out_slot == in_slot {
continue;
}
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::stack(SpillSlot::new(out_slot as usize)),
to: Allocation::reg(tmp_reg),
},
));
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::reg(tmp_reg),
to: Allocation::stack(SpillSlot::new(in_slot as usize)),
},
));
}
if params_left.len() == count {
// only cycles left, break first element
let param_idx = params_left[0] as usize;
for i in &params_left {
assert_eq!(depends[*i as usize].len(), 1);
}
debug_assert_eq!(depends[param_idx].len(), 1);
let depend_idx = depends[param_idx][0] as usize;
// need to move the out_slot of the dependency to a temporary slot
let depend_vreg = out_params[depend_idx];
let depend_out_slot = state.vregs[depend_vreg.vreg()].slot_idx.unwrap();
let tmp_slot = state.create_stack_slot(depend_vreg.class());
let tmp_reg = if depend_vreg.class() == RegClass::Int {
tmp_reg_int
} else {
tmp_reg_float
};
trace!(
"Breaking cycle for {} by moving {} from slot {} to slot {}",
param_idx,
depend_idx,
depend_out_slot,
tmp_slot
);
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::stack(SpillSlot::new(depend_out_slot as usize)),
to: Allocation::reg(tmp_reg),
},
));
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::reg(tmp_reg),
to: Allocation::stack(SpillSlot::new(tmp_slot as usize)),
},
));
// TODO: assert!(remap[depend_idx].is_none())
remap[depend_idx] = Some(tmp_slot);
depends[param_idx].clear();
}
}
/*let mut depend_count: SmallVec<[u8; 8]> = SmallVec::new();
let mut params_left: SmallVec<[u8; 8]> = SmallVec::new();
let mut remap: SmallVec<[Option<u32>; 8]> = SmallVec::new();
depend_count.resize(out_params.len(), 0);
remap.resize(out_params.len(), None);
for i in 0..in_params.len() {
params_left.push(i as u8);
}
for i in 0..out_params.len() {
let out_slot = state.vregs[out_params[i].vreg()].slot_idx.unwrap();
for j in 0..in_params.len() {
let in_slot = state.vregs[in_params[j].vreg()].slot_idx.unwrap();
if i == j {
continue;
}
if out_slot == in_slot {
depend_count[j] += 1;
}
}
}
while !params_left.is_empty() {
let count = params_left.len();
let mut i = 0;
while i < params_left.len() {
let idx = params_left[i] as usize;
if depend_count[idx] != 0 {
i += 1;
continue;
}
params_left.swap_remove(i);
let out_slot = state.vregs[out_params[idx].vreg()].slot_idx.unwrap();
// check if any other value depends on this
for j in 0..params_left.len() {
let idx = params_left[j] as usize;
let in_slot = state.vregs[in_params[idx].vreg()].slot_idx.unwrap();
// TODO: this decreses its own depend_count
if in_slot == out_slot {
depend_count[idx] -= 1;
}
}
let in_vreg = in_params[idx];
let out_vreg = out_params[idx];
debug_assert!(state.vregs[in_vreg.vreg()].slot_idx.is_some());
debug_assert!(state.vregs[out_vreg.vreg()].slot_idx.is_some());
let tmp_reg = if out_vreg.class() == RegClass::Int {
tmp_reg_int
} else {
tmp_reg_float
};
let out_slot = match remap[idx] {
Some(idx) => idx,
None => state.vregs[out_vreg.vreg()].slot_idx.unwrap(),
};
//let out_slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap();
let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap();
trace!(
"Move {} from slot {} to slot {} for {}",
out_vreg,
out_slot,
in_slot,
in_vreg
);
if out_slot == in_slot {
continue;
}
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::stack(SpillSlot::new(out_slot as usize)),
to: Allocation::reg(tmp_reg),
},
));
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::reg(tmp_reg),
to: Allocation::stack(SpillSlot::new(in_slot as usize)),
},
));
}
if params_left.len() == count {
// everything left belongs to a cycle
if cfg!(debug_assertions) {
for i in 0..params_left.len() {
assert_eq!(depend_count[params_left[i] as usize], 1);
}
}
// just move the first element to a tmp pos and search which reg depends on it
// TODO: we should sanity check that all regs in the cycle die after the block
let idx = params_left[0] as usize;
let tmp_reg = if out_params[idx].class() == RegClass::Int {
tmp_reg_int
} else {
tmp_reg_float
};
let out_slot = state.vregs[out_params[idx].vreg()].slot_idx.take().unwrap();
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::stack(SpillSlot::new(out_slot as usize)),
to: Allocation::reg(tmp_reg),
},
));
// TODO: mark out_slot as free
let new_slot = state.create_stack_slot(out_params[idx].class());
trace!(
"Cycle detected. Breaking by allocating new slot {} for {}",
new_slot,
out_params[idx]
);
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::reg(tmp_reg),
to: Allocation::stack(SpillSlot::new(new_slot as usize)),
},
));
debug_assert!(remap[idx].is_none());
remap[idx] = Some(new_slot);
for j in 0..params_left.len() {
let in_slot = state.vregs[in_params[params_left[j] as usize].vreg()]
.slot_idx
.unwrap();
if in_slot == out_slot {
depend_count[params_left[j] as usize] -= 1;
}
}
}
}*/
// TODO: need to break cycles
// e.g.
// Move v144 from slot 5 to slot 2 for v135
// Move v145 from slot 6 to slot 3 for v136
// Move v146 from slot 3 to slot 4 for v137
// or
// 0: 6->4
// 1: 6->3
// 2: 3->5
// 3: 5->6
// depends: [3, 3, 1, 2]
// or
// 3->6
// 6->3
// 2->1
// 1->4
// depends: [1, 0, 0xFF, 2]
/*for i in 0..in_params.len() {
let in_vreg = in_params[i];
let out_vreg = out_params[i];
debug_assert!(state.vregs[in_vreg.vreg()].slot_idx.is_some());
debug_assert!(state.vregs[out_vreg.vreg()].slot_idx.is_some());
let tmp_reg = if out_vreg.class() == RegClass::Int {
tmp_reg_int
} else {
tmp_reg_float
};
let out_slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap();
let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap();
trace!(
"Move {} from slot {} to slot {} for {}",
out_vreg,
out_slot,
in_slot,
in_vreg
);
if out_slot == in_slot {
continue;
}
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::stack(SpillSlot::new(out_slot as usize)),
to: Allocation::reg(tmp_reg),
},
));
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::reg(tmp_reg),
to: Allocation::stack(SpillSlot::new(in_slot as usize)),
},
));
}*/
} else {
trace!("Successors not allocated. Creating allocation");
let mut allocs = SmallVec::<[(VReg, u32); 4]>::new();
// set incoming block params of successor to the current stack slot
for (i, &succ) in state.func.block_succs(block).iter().enumerate() {
trace!("Creating block {}", succ.index());
if state.blocks[succ.index()].params_allocated {
return Err(RegAllocError::CritEdge(block, succ));
}
// we allocate the params here
// TODO: can there be a problem if the same successor occurs multiple times?
state.blocks[succ.index()].params_allocated = true;
let in_params = state.func.block_params(succ);
let out_params = state.func.branch_blockparams(block, last_inst, i);
debug_assert_eq!(in_params.len(), out_params.len());
let mut vregs_passed = SmallVec::<[VReg; 4]>::new();
for i in 0..in_params.len() {
let out_vreg = out_params[i];
let in_vreg = in_params[i];
debug_assert!(state.vregs[out_vreg.vreg()].slot_idx.is_some());
debug_assert!(state.vregs[in_vreg.vreg()].slot_idx.is_none());
let out_slot_idx = state.vregs[out_vreg.vreg()].slot_idx.unwrap();
if out_vreg == VReg::invalid() {
panic!("")
}
if in_vreg == VReg::invalid() {
panic!("")
}
state.vregs[in_vreg.vreg()].def_block = Some(succ);
// TODO: if out_vreg dies at this edge, we could reuse its stack slot
let mut no_alias = false;
if !vregs_passed.contains(&out_vreg) {
let mut alloced = false;
for alloc in &allocs {
if alloc.0 != out_vreg {
continue;
}
// we can use the already moved into stack slot
state.vregs[in_vreg.vreg()].slot_idx = Some(alloc.1);
vregs_passed.push(out_vreg);
alloced = true;
break;
}
vregs_passed.push(out_vreg);
if alloced {
continue;
}
if !state.liveouts[block.index()].get(out_vreg.vreg()) {
let slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap();
trace!(
"{} died at the edge, reuse stack slot {} for {}",
out_vreg,
slot,
in_vreg
);
// we can reuse the stack slot since the variable dies
state.vregs[in_vreg.vreg()].slot_idx = Some(slot);
continue;
}
no_alias = true;
}
// need to duplicate to avoid aliasing or create a new stack slot
// TODO: this creates multiple duplications for multiple blocks, can be avoided
let tmp_reg = if out_vreg.class() == RegClass::Int {
tmp_reg_int
} else {
tmp_reg_float
};
let slot = state.create_stack_slot(out_vreg.class());
trace!(
"Moving {} from slot {} to slot {} for {}",
out_vreg,
out_slot_idx,
slot,
in_vreg
);
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::stack(SpillSlot::new(out_slot_idx as usize)),
to: Allocation::reg(tmp_reg),
},
));
state.edits.push((
ProgPoint::before(last_inst),
Edit::Move {
from: Allocation::reg(tmp_reg),
to: Allocation::stack(SpillSlot::new(slot as usize)),
},
));
state.vregs[in_vreg.vreg()].slot_idx = Some(slot);
if no_alias {
allocs.push((out_vreg, slot));
}
}
}
}
Ok(())
}
// don't inline for better perf stats
#[inline(never)]
fn calc_use_positions_and_live_bitmaps<'a, F: Function>(
state: &mut FastAllocState<'a, F>,
const_state: &ReadOnlyData,
) -> Result<(), RegAllocError> {
// TODO: this could be folded into the bitmap calculation by making a
// reverse postorder idx -> (block idx, inst_count for all)
// mapping
//
// we use a pseudo-counter to have a uniform position for instructions
// this takes 0.5-0.8% on average but has maxes of up to 2% of compile time
// so if it does not substantially increase compilation performance it should be killed
let mut cur_pos = 0u32;
let len = const_state.postorder.len();
for i in 0..len {
let block = const_state.postorder[len - 1 - i];
trace!("Calculating uses for block {}", block.index());
let insts = state.func.block_insns(block);
for inst in insts.clone().iter() {
let operands = state.func.inst_operands(inst);
for op in operands {
if op.kind() != OperandKind::Use {
continue;
}
if op.vreg() == VReg::invalid() {
continue;
}
trace!(
"Use of {} at {} (inst {})",
op.vreg(),
cur_pos,
inst.index()
);
state.vregs[op.vreg().vreg()].uses.push(cur_pos);
}
cur_pos += 1;
}
let last_inst = insts.last();
if !state.func.is_branch(last_inst) {
continue;
}
for i in 0..state.func.block_succs(block).len() {
for vreg in state.func.branch_blockparams(block, last_inst, i) {
trace!(
"Use of {} in blockparam at {} (inst {})",
vreg,
cur_pos,
last_inst.index()
);
state.vregs[vreg.vreg()].uses.push(cur_pos);
}
}
cur_pos += 1;
}
calc_live_bitmaps(state, const_state)
}
struct BlockBitmap {
storage: SmallVec<[u64; 2]>,
}
impl BlockBitmap {
fn init(block_count: usize) -> Self {
let u64_count = (block_count + 63) / 64;
let mut storage = SmallVec::<[u64; 2]>::with_capacity(u64_count);
storage.resize(u64_count, 0);
Self { storage }
}
fn set(&mut self, idx: usize) {
let storage_idx = idx / 64;
let bit = 1u64 << (idx % 64);
self.storage[storage_idx] |= bit;
}
fn un_set(&mut self, idx: usize) {
let storage_idx = idx / 64;
let bit = 1u64 << (idx % 64);
self.storage[storage_idx] &= !bit;
}
fn is_set(&mut self, idx: usize) -> bool {
let storage_idx = idx / 64;
let bit = 1u64 << (idx % 64);
(self.storage[storage_idx] & bit) != 0
}
}
// currently, copy from liveranges.rs
// don't inline for better perf stats
// this is currently very expensive, takes 3% of 10.2% total time for the register allocator
#[inline(never)]
fn calc_live_bitmaps<'a, F: Function>(
state: &mut FastAllocState<'a, F>,
const_state: &ReadOnlyData,
) -> Result<(), RegAllocError> {
state.liveins.resize(state.blocks.len(), IndexSet::new());
state.liveouts.resize(state.blocks.len(), IndexSet::new());
// Run a worklist algorithm to precisely compute liveins and
// liveouts.
let mut workqueue = VecDeque::new();
let mut workqueue_set = BlockBitmap::init(state.liveins.len());
// Initialize workqueue with postorder traversal.
for &block in &const_state.postorder[..] {
workqueue.push_back(block);
workqueue_set.set(block.index());
}
while let Some(block) = workqueue.pop_front() {
workqueue_set.un_set(block.index());
let insns = state.func.block_insns(block);
trace!("computing liveins for block{}", block.index());
//state.stats.livein_iterations += 1;
let mut live = state.liveouts[block.index()].clone();
trace!(" -> initial liveout set: {:?}", live);
// Include outgoing blockparams in the initial live set.
if state.func.is_branch(insns.last()) {
for i in 0..state.func.block_succs(block).len() {
for &param in state.func.branch_blockparams(block, insns.last(), i) {
live.set(param.vreg(), true);
}
}
}
for inst in insns.rev().iter() {
// TODO: this differs from the algo in liveranges.rs by not iterating through the positions
// as in SSA it should make no difference as there can be no vreg that is both a use and def at
// a single instruction
for op in state.func.inst_operands(inst) {
if op.as_fixed_nonallocatable().is_some() {
continue;
}
let was_live = live.get(op.vreg().vreg());
trace!("op {:?} was_live = {}", op, was_live);
match op.kind() {
OperandKind::Use => {
live.set(op.vreg().vreg(), true);
}
OperandKind::Def => {
live.set(op.vreg().vreg(), false);
}
}
}
}
// TODO: i dont get why blockparams are not live when going in?
for &blockparam in state.func.block_params(block) {
live.set(blockparam.vreg(), false);
}
for &pred in state.func.block_preds(block) {
if state.liveouts[pred.index()].union_with(&live) {
trace!(
"liveouts of block{} changed to: {:?}",
pred.index(),
state.liveouts[pred.index()]
);
if !workqueue_set.is_set(pred.index()) {
workqueue_set.set(pred.index());
workqueue.push_back(pred);
}
}
}
trace!("computed liveins at block{}: {:?}", block.index(), live);
state.liveins[block.index()] = live;
}
// Check that there are no liveins to the entry block.
if !state.liveins[state.func.entry_block().index()].is_empty() {
trace!(
"non-empty liveins to entry block: {:?}",
state.liveins[state.func.entry_block().index()]
);
return Err(RegAllocError::EntryLivein);
}
for idx in 0..state.blocks.len() {
trace!("Livein for block {}: {:?}", idx, state.liveins[idx]);
trace!("Liveouts for block {}: {:?}", idx, state.liveouts[idx]);
}
Ok(())
}