use alloc::collections::VecDeque; use alloc::format; use alloc::vec::Vec; use alloc::{string::String, vec}; use smallvec::{smallvec, SmallVec}; use std::{convert::TryFrom, println}; use crate::indexset::IndexSet; use crate::{ cfg::CFGInfo, Allocation, Block, Edit, Function, Inst, MachineEnv, Operand, OperandConstraint, OperandKind, OperandPos, Output, PReg, PRegSet, ProgPoint, RegAllocError, RegClass, SpillSlot, VReg, }; use crate::{domtree, postorder, FxHashSet, InstPosition}; use super::data_structures::u64_key; use super::Stats; #[derive(Default, Clone)] struct VRegData { pub preg: Option, pub slot_idx: Option, pub def_block: Option, pub reftype: bool, // use information pub cur_use_idx: u32, pub uses: SmallVec<[u32; 8]>, } #[derive(Default, Clone, Copy)] struct PRegData { pub vreg: Option, pub stack_pseudo: bool, } #[derive(Default, Clone, Copy)] struct BlockData { pub reg_allocated: bool, pub params_allocated: bool, } struct ReadOnlyData { pub postorder: Vec, pub idom: Vec, pub reg_order_int: Vec, pub reg_order_float: Vec, } impl ReadOnlyData { pub fn init(func: &F, mach_env: &MachineEnv) -> Self { let reg_order_int = { let class = RegClass::Int as usize; let amount = mach_env.preferred_regs_by_class[class].len() + mach_env.non_preferred_regs_by_class[class].len(); let mut reg_order = Vec::with_capacity(amount); reg_order.extend_from_slice(&mach_env.preferred_regs_by_class[class]); reg_order.extend_from_slice(&mach_env.non_preferred_regs_by_class[class]); reg_order }; let reg_order_float = { let class = RegClass::Float as usize; let amount = mach_env.preferred_regs_by_class[class].len() + mach_env.non_preferred_regs_by_class[class].len(); let mut reg_order = Vec::with_capacity(amount); reg_order.extend_from_slice(&mach_env.preferred_regs_by_class[class]); reg_order.extend_from_slice(&mach_env.non_preferred_regs_by_class[class]); reg_order }; let postorder = postorder::calculate(func.num_blocks(), func.entry_block(), |b| { func.block_succs(b) }); let idom = domtree::calculate( func.num_blocks(), |b| func.block_preds(b), &postorder, func.entry_block(), ); Self { reg_order_int, reg_order_float, postorder, idom, } } pub fn reg_order(&self, class: RegClass) -> &[PReg] { match class { RegClass::Int => &self.reg_order_int, RegClass::Float => &self.reg_order_float, } } fn calc_preorder(func: &F) -> Vec { let entry = func.entry_block(); let mut ret = vec![entry]; struct State<'a> { block: Block, succs: &'a [Block], next_succ: usize, } let mut stack: SmallVec<[State; 64]> = smallvec![]; stack.push(State { block: entry, succs: func.block_succs(entry), next_succ: 0, }); while let Some(ref mut state) = stack.last_mut() { if state.next_succ >= state.succs.len() { stack.pop(); continue; } let block = state.succs[state.next_succ]; let succs = func.block_succs(block); ret.push(block); state.next_succ += 1; if state.next_succ >= state.succs.len() { stack.pop(); } if !succs.is_empty() { stack.push(State { block, succs: func.block_succs(block), next_succ: 0, }); } } ret } } // https://burtleburtle.net/bob/rand/smallprng.html struct PRNG { a: u64, b: u64, c: u64, d: u64, } impl PRNG { fn new(seed: u64) -> Self { Self { a: 0xf1ea5eed, b: seed, c: seed, d: seed, } } fn val(&mut self) -> u64 { let e = self.a - PRNG::rot(self.b, 27); self.a = self.b ^ PRNG::rot(self.c, 17); self.b = self.c + self.d; self.c = self.d + e; self.d = e + self.a; self.d } fn rot(x: u64, k: u64) -> u64 { (x << k) | (x >> (32 - k)) } } struct FastAllocState<'a, F: Function> { pub vregs: Vec, pub pregs: Vec, pub blocks: Vec, pub liveins: Vec, pub liveouts: Vec, pub cur_stack_slot_idx: u32, pub reftype_vregs_in_pregs_count: u32, pub stack_slot_count_int: u8, pub stack_slot_count_float: u8, pub cur_inst_pos: usize, pub allocs: Vec, pub inst_alloc_offsets: Vec, pub edits: Vec<(ProgPoint, Edit)>, pub safepoint_slots: Vec<(ProgPoint, Allocation)>, pub reftype_vregs: &'a [VReg], pub func: &'a F, pub mach_env: &'a MachineEnv, pub prng: PRNG, } impl<'a, F: Function> FastAllocState<'a, F> { pub fn init(func: &'a F, mach_env: &'a MachineEnv) -> Self { let reftype_vregs = func.reftype_vregs(); let vregs = { let mut vregs = Vec::with_capacity(func.num_vregs()); vregs.resize(func.num_vregs(), VRegData::default()); for vreg in reftype_vregs { vregs[vreg.vreg()].reftype = true; } vregs }; let pregs = { let mut pregs = Vec::with_capacity(PReg::NUM_INDEX); pregs.resize(PReg::NUM_INDEX, PRegData::default()); for preg in &mach_env.fixed_stack_slots { trace!("{} is a stack pseudo", preg); pregs[preg.index()].stack_pseudo = true; } pregs }; let blocks = { let mut blocks = Vec::with_capacity(func.num_blocks()); blocks.resize(func.num_blocks(), BlockData::default()); blocks }; trace!( "Num Insts: {} Num Blocks: {}", func.num_insts(), func.num_blocks() ); let mut inst_alloc_offsets = Vec::with_capacity(func.num_insts()); inst_alloc_offsets.resize(func.num_insts(), 0); // we need to create the alloc array beforehand because it needs to be sorted by inst index // which we cannot guarantee when iterating through the blocks in reverse post-order // TODO: this can be folded into any of the other iterations of the blocks by counting the operand counts for each instruction // globally and writing the op count for each inst into inst_alloc_offsets[idx] // then just iterate inst_alloc_offsets and adjust the indices let allocs = { let block_count = func.num_blocks(); let mut cur_idx = 0; for i in 0..block_count { for inst in func.block_insns(Block::new(i)).iter() { inst_alloc_offsets[inst.index()] = cur_idx as u32; cur_idx += func.inst_operands(inst).len(); } } let mut allocs = Vec::with_capacity(cur_idx); allocs.resize(cur_idx, Allocation::none()); allocs }; trace!("InstAllocOffsets: {:?}", inst_alloc_offsets); trace!("Allocs Len: {}", allocs.len()); let prng = PRNG::new( (blocks.len() as u64) << 48 | (vregs.len() as u64) << 32 | (allocs.len() as u64), ); Self { vregs, pregs, blocks, liveins: Vec::new(), liveouts: Vec::new(), cur_stack_slot_idx: 0, reftype_vregs_in_pregs_count: 0, cur_inst_pos: 0, stack_slot_count_int: u8::try_from(func.spillslot_size(RegClass::Int)) .expect("that's a big integer"), stack_slot_count_float: u8::try_from(func.spillslot_size(RegClass::Float)) .expect("that's a big float"), allocs, inst_alloc_offsets, edits: Vec::new(), safepoint_slots: Vec::new(), reftype_vregs, func, mach_env, prng, } } pub fn get_or_alloc_stack_slot(&mut self, vreg: VReg) -> u32 { if let Some(idx) = self.vregs[vreg.vreg()].slot_idx { return idx; } self.alloc_stack_slot(vreg) } pub fn alloc_stack_slot(&mut self, vreg: VReg) -> u32 { let data = &mut self.vregs[vreg.vreg()]; if data.slot_idx.is_some() { panic!( "Trying to allocate already allocated stack slot for {}", vreg ); } let size = if vreg.class() == RegClass::Int { self.stack_slot_count_int } else { self.stack_slot_count_float }; let idx = self.cur_stack_slot_idx; trace!("Allocated slot {} for {}", idx, vreg); self.cur_stack_slot_idx += size as u32; data.slot_idx = Some(idx); idx } pub fn create_stack_slot(&mut self, class: RegClass) -> u32 { let size = if class == RegClass::Int { self.stack_slot_count_int } else { self.stack_slot_count_float }; let idx = self.cur_stack_slot_idx; self.cur_stack_slot_idx += size as u32; idx } pub fn move_to_preg(&mut self, vreg: VReg, preg: PReg, pos: ProgPoint) { trace!("Move {} to {} at {:?}", vreg, preg, pos); /*if let Some(vreg) = &self.pregs[preg.index()].vreg { let vdata = &mut self.vregs[vreg.vreg() as usize]; debug_assert!(vdata.preg.is_some()); debug_assert_eq!(vdata.preg.unwrap(), preg); vdata.preg = None; }*/ if let Some(cur_preg) = &self.vregs[vreg.vreg()].preg { if *cur_preg == preg { trace!("{} already in target reg", vreg); return; } trace!("Moving directly from {} to {}", cur_preg, preg); // Do a reg->reg move self.edits.push(( pos, Edit::Move { from: Allocation::reg(*cur_preg), to: Allocation::reg(preg), }, )); // TODO: allow multiple pregs for a single vreg? let pdata = &mut self.pregs[cur_preg.index()]; debug_assert!(pdata.vreg.is_some()); debug_assert_eq!(pdata.vreg.unwrap().vreg(), vreg.vreg()); pdata.vreg = None; self.pregs[preg.index()].vreg = Some(vreg); self.vregs[vreg.vreg()].preg = Some(preg); return; } self.clear_preg(preg); let vdata = &mut self.vregs[vreg.vreg()]; let pdata = &mut self.pregs[preg.index()]; if vdata.slot_idx.is_none() { panic!("Trying to move from vreg that has no stack slot to preg"); } let slot = vdata.slot_idx.unwrap() as usize; trace!("Moving from slot {}", slot); self.edits.push(( pos, Edit::Move { from: Allocation::stack(SpillSlot::new(slot)), to: Allocation::reg(preg), }, )); vdata.preg = Some(preg); pdata.vreg = Some(vreg); if vdata.reftype { self.reftype_vregs_in_pregs_count += 1; } } pub fn move_to_stack(&mut self, preg: PReg, vreg: VReg, pos: ProgPoint) { trace!("Move {} of {} to stack at {:?}", preg, vreg, pos); let vdata = &mut self.vregs[vreg.vreg()]; let pdata = &mut self.pregs[preg.index()]; if pdata.vreg.is_none() || vdata.preg.is_none() { panic!("Trying to move from unallocated preg/vreg to stack"); } debug_assert_eq!(vdata.preg.unwrap(), preg); debug_assert_eq!(pdata.vreg.unwrap().vreg(), vreg.vreg()); if vdata.slot_idx.is_none() { panic!("Trying to move to vreg without stack slot"); } self.edits.push(( pos, Edit::Move { from: Allocation::reg(preg), to: Allocation::stack(SpillSlot::new(vdata.slot_idx.unwrap() as usize)), }, )); } pub fn assign_preg(&mut self, preg: PReg, vreg: VReg) { trace!("Assigning {} to {}", vreg, preg); // TODO: somewhere assign_preg is called without making sure the vreg is clear (or inspite of it) // need to make sure this is intended behavior self.clear_preg(preg); self.pregs[preg.index()].vreg = Some(vreg); self.vregs[vreg.vreg()].preg = Some(preg); if self.vregs[vreg.vreg()].reftype { self.reftype_vregs_in_pregs_count += 1; } } pub fn clear_preg(&mut self, preg: PReg) { self.clear_preg_idx(preg.index()); } fn clear_preg_idx(&mut self, preg: usize) { trace!("Clearing preg {}", preg); let pdata = &mut self.pregs[preg]; if let Some(vreg) = pdata.vreg { let vdata = &mut self.vregs[vreg.vreg()]; debug_assert_eq!(vdata.preg.unwrap().index(), preg); vdata.preg = None; pdata.vreg = None; if vdata.reftype { self.reftype_vregs_in_pregs_count -= 1; } } } pub fn clear_vreg_from_reg(&mut self, vreg: VReg) { trace!("Clearing vreg {} from reg", vreg); let vdata = &mut self.vregs[vreg.vreg()]; if let Some(preg) = vdata.preg { debug_assert!(self.pregs[preg.index()].vreg.is_some()); debug_assert_eq!(self.pregs[preg.index()].vreg.unwrap().vreg(), vreg.vreg()); self.pregs[preg.index()].vreg = None; vdata.preg = None; if vdata.reftype { self.reftype_vregs_in_pregs_count -= 1; } } } pub fn clear_reftype_vregs(&mut self) { if self.reftype_vregs_in_pregs_count == 0 { return; } for i in 0..self.pregs.len() { if let Some(vreg) = self.pregs[i].vreg.clone() { let vreg = vreg.vreg(); if self.vregs[vreg].reftype { self.clear_preg_idx(i); } } } } pub fn vreg_used_at_cur_inst(&mut self, vreg: VReg) -> bool { let vdata = &self.vregs[vreg.vreg()]; if vdata.cur_use_idx as usize >= vdata.uses.len() { return false; } vdata.uses[vdata.cur_use_idx as usize] == self.cur_inst_pos as u32 } pub fn vreg_next_use(&self, vreg: VReg) -> Option { let vdata = &self.vregs[vreg.vreg()]; if vdata.cur_use_idx as usize >= vdata.uses.len() { return None; } Some(vdata.uses[vdata.cur_use_idx as usize]) } } pub fn run(func: &F, mach_env: &MachineEnv) -> Result { if func.multi_spillslot_named_by_last_slot() { panic!("MultiSpillslotIndexPos not supported"); } let mut state = FastAllocState::init(func, mach_env); let const_state = ReadOnlyData::init(func, mach_env); calc_use_positions_and_live_bitmaps(&mut state, &const_state)?; state.blocks[func.entry_block().index()].params_allocated = true; let len = const_state.postorder.len(); for i in 0..len { let block = const_state.postorder[len - 1 - i]; if state.blocks[block.index()].reg_allocated { trace!("Block {} already allocated. Skipping", block.index()); continue; } state.blocks[block.index()].reg_allocated = true; trace!("Allocating block {}", block.index()); trace!("Allocated pregs:"); for i in 0..state.pregs.len() { if let Some(vreg) = &state.pregs[i].vreg { trace!("p{}: {}", i, vreg); } } allocate_block_insts(&mut state, &const_state, block)?; handle_out_block_params(&mut state, &const_state, block)?; let last_inst = state.func.block_insns(block).last(); if state.func.is_branch(last_inst) { state.cur_inst_pos += 1; } } // we do not iterate the blocks in their index order so the order of edits might not be sorted by progpoint // however it should be nearly sorted state.edits.sort_by_key(|entry| entry.0); // these might also not be sorted //state.safepoint_slots.sort_by_key(|entry| entry.0); state .safepoint_slots .sort_unstable_by_key(|(progpoint, slot)| u64_key(progpoint.to_index(), slot.bits())); if trace_enabled!() { trace!("Edits:"); for edit in &state.edits { match edit.1 { Edit::Move { from, to } => { trace!("At {:?} from {} to {}", edit.0, from, to); } } } trace!("StackMaps:"); for entry in &state.safepoint_slots { trace!("At {:?} at {}", entry.0, entry.1); } } Ok(Output { num_spillslots: state.cur_stack_slot_idx as usize, edits: state.edits, allocs: state.allocs, inst_alloc_offsets: state.inst_alloc_offsets, safepoint_slots: state.safepoint_slots, debug_locations: Vec::new(), stats: Stats::default(), }) } fn vreg_killed<'a, F: Function>( state: &FastAllocState<'a, F>, inst: Inst, block: Block, block_last_inst: usize, vreg: usize, // if the vreg is used at the current instruction, count it as not killed // TODO: this is currently always true but can be used for optimization later on save_on_current_use: bool, ) -> bool { let info = &state.vregs[vreg]; let block_after_pos = state.cur_inst_pos + (block_last_inst - inst.index()) + 1; let cur_use_idx = info.cur_use_idx as usize; let cur_pos = if save_on_current_use { state.cur_inst_pos } else { state.cur_inst_pos + 1 }; trace!( "Checking live-status of v{} in {:?} at inst {:?} (CurPos: {} SaveCurPos? {}): Liveout {}, block_after: {}", vreg, block, inst, state.cur_inst_pos, save_on_current_use, state.liveouts[block.index()].get(vreg), block_after_pos ); trace!( "Uses of v{}: {:?}. Currently at {}", vreg, info.uses, info.cur_use_idx ); if !state.liveouts[block.index()].get(vreg) { if info.uses.len() <= cur_use_idx { trace!("Uses exhausted, vreg must be dead"); return true; } if info.uses.len() <= cur_use_idx + 1 { trace!("next use: {}, no use after that", info.uses[cur_use_idx]); } else { trace!( "next use: {}, {}", info.uses[cur_use_idx], info.uses[cur_use_idx + 1] ); } if !save_on_current_use && info.uses[cur_use_idx] == state.cur_inst_pos as u32 { if info.uses.len() <= cur_use_idx + 1 || info.uses[cur_use_idx + 1] >= block_after_pos as u32 { trace!("v{} is killed", vreg); return true; } } } return false; } fn allocate_block_insts<'a, F: Function>( state: &mut FastAllocState<'a, F>, const_state: &ReadOnlyData, block: Block, ) -> Result<(), RegAllocError> { let block_last_inst_idx = state.func.block_insns(block).last().index(); for inst in state.func.block_insns(block).iter() { let edit_start_idx = state.edits.len(); let clobbers = state.func.inst_clobbers(inst); let operands = state.func.inst_operands(inst); let req_refs_on_stack = state.func.requires_refs_on_stack(inst); let alloc_idx = state.inst_alloc_offsets[inst.index()] as usize; trace!( "Allocating Inst {} (refs_on_stack: {}, is_ret: {}, is_branch: {}, alloc_idx: {})", inst.index(), req_refs_on_stack, state.func.is_ret(inst), state.func.is_branch(inst), alloc_idx ); if trace_enabled!() { let mut str = String::new(); for preg in clobbers { if str.is_empty() { str.push_str(&format!("{}", preg)); } else { str.push_str(&format!(", {}", preg)); } } trace!("Clobbers: {}", str); } // keep track of which pregs where allocated so we can clear them later on // TODO: wouldnt need this if we look up the inst a vreg was allocated at let mut regs_allocated = PRegSet::empty(); // keep track of which pregs hold late uses/early writes and so are unelligible // as destinations for late writes let mut late_write_disallow_regs = PRegSet::empty(); // we need to keep track of late defs allocated during the fixed reg stage // as they may not overlap with late uses and there is no order guarantee for inst_operands let mut late_write_regs = PRegSet::empty(); if req_refs_on_stack { state.clear_reftype_vregs(); let pos = ProgPoint::before(inst); trace!("Calculating Stackmap for {:?}", pos); for vreg in state.reftype_vregs { let data = &state.vregs[vreg.vreg()]; if let Some(slot) = data.slot_idx { if domtree::dominates(&const_state.idom, data.def_block.unwrap(), block) { trace!("Marking vreg {} as saved on stack at {}", vreg, slot); state .safepoint_slots .push((pos, Allocation::stack(SpillSlot::new(slot as usize)))); } else { trace!("Skipping {} as it does not dominate", vreg); } } } } for preg in clobbers { // TODO: this might save a use that is killed at this inst i think let vreg = if let Some(vreg) = &state.pregs[preg.index()].vreg { *vreg } else { continue; }; if state.vregs[vreg.vreg()].slot_idx.is_some() { trace!("{} with {} clobbered but saved on stack", preg, vreg); state.clear_preg(preg); continue; } if !state.vreg_used_at_cur_inst(vreg) && vreg_killed(state, inst, block, block_last_inst_idx, vreg.vreg(), true) { trace!("{} with {} clobbered but vreg killed", preg, vreg); state.clear_preg(preg); continue; } state.alloc_stack_slot(vreg); state.move_to_stack(preg, vreg, ProgPoint::before(inst)); state.clear_preg(preg); } // we allocate fixed defs/uses and stack allocations first // TODO: if a fixed def is before a fixed use with the same preg here, it will incorrectly update state!!! trace!("First alloc pass"); for (i, op) in operands.iter().enumerate() { let vreg = op.vreg(); trace!("Operand {}: {}", i, op); if op.as_fixed_nonallocatable().is_some() { // it seems cranelift emits fixed reg uses with invalid vregs, handle them here // TODO: treat them like normal vregs by just using last_vreg_index+1 for them? match op.constraint() { OperandConstraint::FixedReg(reg) => { // Save vreg if needed if let Some(vreg) = state.pregs[reg.index()].vreg { let vreg_idx = vreg.vreg(); if state.vregs[vreg_idx].slot_idx.is_none() && (state.vreg_used_at_cur_inst(vreg) || !vreg_killed( state, inst, block, block_last_inst_idx, vreg_idx, true, )) { let slot = state.create_stack_slot(reg.class()); state.vregs[vreg_idx].slot_idx = Some(slot); state.edits.push(( ProgPoint::before(inst), Edit::Move { from: Allocation::reg(reg), to: Allocation::stack(SpillSlot::new(slot as usize)), }, )); } } state.clear_preg(reg); regs_allocated.add(reg); state.allocs[alloc_idx + i] = Allocation::reg(reg); trace!("Chose {} for operand {}", reg, i); late_write_disallow_regs.add(reg); } _ => { panic!( "Invalid op constraint {:?} for invalid vreg", op.constraint() ); } } continue; } match op.constraint() { OperandConstraint::FixedReg(reg) => { match op.kind() { OperandKind::Use => { if req_refs_on_stack && state.vregs[vreg.vreg()].reftype { panic!("reftype has fixed use when its required to be on stack"); return Err(RegAllocError::TooManyLiveRegs); } // TODO: make this proper if regs_allocated.contains(reg) { // if the reg was allocated by another early use/write or late use // OR it is allocated and we have a late use we cannot do a correct allocation if op.pos() == OperandPos::Late || !late_write_regs.contains(reg) { panic!("fixed reg late use would overwrite another fixed reg use/early write"); return Err(RegAllocError::TooManyLiveRegs); } } // are we already in the correct reg? if let Some(cur_preg) = state.vregs[vreg.vreg()].preg { if cur_preg == reg { trace!("{} already in target {}", vreg, cur_preg); state.allocs[alloc_idx + i] = Allocation::reg(cur_preg); continue; } } // Save vreg if needed if let Some(vreg) = state.pregs[reg.index()].vreg { let vreg_idx = vreg.vreg(); if state.vregs[vreg_idx].slot_idx.is_none() && (state.vreg_used_at_cur_inst(vreg) || !vreg_killed( state, inst, block, block_last_inst_idx, vreg_idx, true, )) { let slot = state.create_stack_slot(reg.class()); state.vregs[vreg_idx].slot_idx = Some(slot); state.edits.push(( ProgPoint::before(inst), Edit::Move { from: Allocation::reg(reg), to: Allocation::stack(SpillSlot::new(slot as usize)), }, )); } } if let Some(cur_preg) = state.vregs[vreg.vreg()].preg { trace!("Move {} directly from {} to {}", vreg, cur_preg, reg); // Move from preg to preg state.edits.push(( ProgPoint::before(inst), Edit::Move { from: Allocation::reg(cur_preg), to: Allocation::reg(reg), }, )); debug_assert_eq!( state.pregs[cur_preg.index()].vreg.unwrap().vreg(), vreg.vreg() ); state.pregs[cur_preg.index()].vreg = None; state.assign_preg(reg, vreg); } else { state.move_to_preg(vreg, reg, ProgPoint::before(inst)); } state.allocs[alloc_idx + i] = Allocation::reg(reg); if op.pos() == OperandPos::Late { if clobbers.contains(reg) { panic!("fixed late use would be clobbered"); return Err(RegAllocError::TooManyLiveRegs); } trace!("Operand {}'s allocation may not be used by a late def", i); // late uses cannot share a register with late defs late_write_disallow_regs.add(reg); } regs_allocated.add(reg); trace!("Chose {} for operand {}", reg, i); } OperandKind::Def => { if op.pos() == OperandPos::Late { if late_write_disallow_regs.contains(reg) { panic!("fixed late def would overwrite late use/early def"); return Err(RegAllocError::TooManyLiveRegs); } late_write_regs.add(reg); } else { if state.pregs[reg.index()].vreg.is_some() || clobbers.contains(reg) { // early defs cannot share a register with anything and cannot be clobbered panic!("early def shares reg or is clobbered"); return Err(RegAllocError::TooManyLiveRegs); } trace!("Operand {}'s allocation may not be used by a late def", i); // early defs cannot share a register with late defs late_write_disallow_regs.add(reg); } // Save vreg if needed if let Some(vreg) = state.pregs[reg.index()].vreg { let vreg_idx = vreg.vreg(); if state.vregs[vreg_idx].slot_idx.is_none() && (op.pos() != OperandPos::Late && state.vreg_used_at_cur_inst(vreg) || !vreg_killed( state, inst, block, block_last_inst_idx, vreg_idx, true, )) { let slot = state.create_stack_slot(reg.class()); state.vregs[vreg_idx].slot_idx = Some(slot); state.edits.push(( ProgPoint::before(inst), Edit::Move { from: Allocation::reg(reg), to: Allocation::stack(SpillSlot::new(slot as usize)), }, )); } } state.vregs[vreg.vreg()].def_block = Some(block); state.allocs[alloc_idx + i] = Allocation::reg(reg); state.assign_preg(reg, vreg); // some pseudoops use the pseudo stack pregs as defs if state.pregs[reg.index()].stack_pseudo { // find preg to use as a tmp register let mut pregs = PRegSet::empty(); for reg in const_state.reg_order(vreg.class()) { if state.pregs[reg.index()].vreg.is_some() { continue; } pregs.add(*reg); } for op in operands { match op.constraint() { OperandConstraint::FixedReg(reg) => { if op.kind() == OperandKind::Use && op.pos() == OperandPos::Early { continue; } pregs.remove(reg); } _ => {} } } if pregs == PRegSet::empty() { panic!("No way to solve pseudo-stack preg"); } // Move from pseudoreg to tmp_reg and then to stack let tmp_reg = pregs.into_iter().next().unwrap(); if let Some(vreg) = state.pregs[tmp_reg.index()].vreg { // Save vreg if needed let vreg_idx = vreg.vreg(); if state.vregs[vreg_idx].slot_idx.is_none() && (op.pos() != OperandPos::Late && state.vreg_used_at_cur_inst(vreg) || !vreg_killed( state, inst, block, block_last_inst_idx, vreg_idx, true, )) { let slot = state.create_stack_slot(reg.class()); state.vregs[vreg_idx].slot_idx = Some(slot); state.edits.push(( ProgPoint::before(inst), Edit::Move { from: Allocation::reg(tmp_reg), to: Allocation::stack(SpillSlot::new( slot as usize, )), }, )); } state.clear_preg(tmp_reg); } state.edits.push(( ProgPoint::after(inst), Edit::Move { from: Allocation::reg(reg), to: Allocation::reg(tmp_reg), }, )); if state.pregs[reg.index()].vreg.is_some() { state.clear_preg(reg); } state.assign_preg(tmp_reg, vreg); state.move_to_stack(tmp_reg, vreg, ProgPoint::after(inst)); regs_allocated.add(tmp_reg); } else { state.alloc_stack_slot(vreg); state.move_to_stack(reg, vreg, ProgPoint::after(inst)); regs_allocated.add(reg); } trace!("Chose {} for operand {}", reg, i); } } } OperandConstraint::Stack | OperandConstraint::Any => { // we allocate Any on the stack for now match op.kind() { OperandKind::Use => { if let Some(slot) = &state.vregs[vreg.vreg()].slot_idx { state.allocs[alloc_idx + i] = Allocation::stack(SpillSlot::new(*slot as usize)); trace!("Chose slot {} for operand {}", slot, i); } else { return Err(RegAllocError::SSA(vreg, inst)); } } OperandKind::Def => { state.vregs[vreg.vreg()].def_block = Some(block); let slot = state.alloc_stack_slot(vreg); state.allocs[alloc_idx + i] = Allocation::stack(SpillSlot::new(slot as usize)); trace!("Chose slot {} for operand {}", slot, i); } } } _ => continue, } } // alloc non-fixed uses and early defs in registers trace!("Second alloc pass"); for (i, op) in operands.iter().enumerate() { if op.kind() == OperandKind::Def && op.pos() == OperandPos::Late { continue; } trace!("Operand {}: {}", i, op); let vreg = op.vreg(); if op.as_fixed_nonallocatable().is_some() { continue; } match op.constraint() { OperandConstraint::Reg => { // Are we alredy in a reg? if let Some(cur_preg) = &state.vregs[vreg.vreg()].preg { assert_eq!(op.kind(), OperandKind::Use); // Late uses need to survive the instruction if op.pos() == OperandPos::Early || !clobbers.contains(*cur_preg) { trace!("{} already in reg {}. Using that", vreg, cur_preg); state.allocs[alloc_idx + i] = Allocation::reg(*cur_preg); regs_allocated.add(*cur_preg); continue; } } // find first non-allocated register let reg_order = const_state.reg_order(op.class()); let mut allocated = false; for ® in reg_order { if regs_allocated.contains(reg) { continue; } if let Some(cur_vreg) = &state.pregs[reg.index()].vreg { // we can override the reg if the vreg was killed already if !vreg_killed( state, inst, block, block_last_inst_idx, cur_vreg.vreg(), true, ) { continue; } state.clear_preg(reg); } // reg should not contain anything debug_assert!(state.pregs[reg.index()].vreg.is_none()); if op.kind() == OperandKind::Use && op.pos() == OperandPos::Late && clobbers.contains(reg) { continue; } state.allocs[alloc_idx + i] = Allocation::reg(reg); regs_allocated.add(reg); trace!("Chose {} for operand {}", reg, i); if op.kind() == OperandKind::Use { if req_refs_on_stack && state.vregs[vreg.vreg()].reftype { panic!("reftype required to be in reg at safepoint"); return Err(RegAllocError::TooManyLiveRegs); } // need to move from stack to reg state.move_to_preg(vreg, reg, ProgPoint::before(inst)); } else { // early def state.vregs[vreg.vreg()].def_block = Some(block); state.assign_preg(reg, vreg); state.alloc_stack_slot(vreg); state.move_to_stack(reg, vreg, ProgPoint::after(inst)); } allocated = true; break; } if allocated { continue; } trace!("No free register found for operand {}", i); // No register available // TODO: first evict pregs that already have a stack slot even if they are used earlier? let mut evict_candidate = None; let mut ffa_reg_pool = PRegSet::empty(); for ® in reg_order { if regs_allocated.contains(reg) { continue; } if op.kind() == OperandKind::Use && op.pos() == OperandPos::Late && clobbers.contains(reg) { continue; } debug_assert!(state.pregs[reg.index()].vreg.is_some()); let vreg = state.pregs[reg.index()].vreg.unwrap(); if let Some(next_use) = state.vreg_next_use(vreg) { if next_use == state.cur_inst_pos as u32 { continue; } if let Some((_, pos)) = &evict_candidate { if *pos < next_use { evict_candidate = Some((reg, next_use)); } } else { evict_candidate = Some((reg, next_use)); } } else { // see further below ffa_reg_pool.add(reg); //panic!("preg should have already been chosen") } } // TODO: we need some logic to shuffle assignments around if there is a late use that needs to survive a clobber // and another reg is available but taken by an early use so it would not be an eviction_candidate if let Some((reg, next_use)) = evict_candidate { // Save vreg if needed { let vreg = state.pregs[reg.index()].vreg.unwrap(); trace!("Evicting {} with v{}", reg, vreg); if state.vregs[vreg.vreg()].slot_idx.is_none() && !vreg_killed( state, inst, block, block_last_inst_idx, vreg.vreg(), true, ) { let slot = state.create_stack_slot(reg.class()); state.vregs[vreg.vreg()].slot_idx = Some(slot); state.edits.push(( ProgPoint::before(inst), Edit::Move { from: Allocation::reg(reg), to: Allocation::stack(SpillSlot::new(slot as usize)), }, )); } } state.clear_preg(reg); state.allocs[alloc_idx + i] = Allocation::reg(reg); regs_allocated.add(reg); if op.kind() == OperandKind::Use { if req_refs_on_stack && state.vregs[vreg.vreg()].reftype { panic!("reftype required to be in reg at safepoint"); return Err(RegAllocError::TooManyLiveRegs); } // need to move from stack to reg state.move_to_preg(vreg, reg, ProgPoint::before(inst)); } else { // early def state.vregs[vreg.vreg()].def_block = Some(block); state.assign_preg(reg, vreg); state.alloc_stack_slot(vreg); state.move_to_stack(reg, vreg, ProgPoint::after(inst)); } trace!("Chose {} for operand {}", reg, i); } else { if ffa_reg_pool == PRegSet::empty() { panic!("Out of registers: {:?}", regs_allocated); return Err(RegAllocError::TooManyLiveRegs); } let preg = 'block: { let len = ffa_reg_pool.bits.count_ones() as usize; let mut idx = (state.prng.val() as usize % 128) % len; for preg in ffa_reg_pool.into_iter() { if idx == 0 { break 'block preg; } idx -= 1; } panic!("I can't do math"); }; trace!("Chose {} from ffa_reg_pool", preg); { let vreg = state.pregs[preg.index()].vreg.unwrap(); // need to save vreg if it does not have a slot if state.vregs[vreg.vreg()].slot_idx.is_none() { let slot = state.create_stack_slot(preg.class()); state.vregs[vreg.vreg()].slot_idx = Some(slot); state.edits.push(( ProgPoint::before(inst), Edit::Move { from: Allocation::reg(preg), to: Allocation::stack(SpillSlot::new(slot as usize)), }, )); } } state.clear_preg(preg); state.allocs[alloc_idx + i] = Allocation::reg(preg); regs_allocated.add(preg); if op.kind() == OperandKind::Use { if req_refs_on_stack && state.vregs[vreg.vreg()].reftype { panic!("reftype required to be in reg at safepoint"); return Err(RegAllocError::TooManyLiveRegs); } // need to move from stack to reg state.move_to_preg(vreg, preg, ProgPoint::before(inst)); } else { // early def state.vregs[vreg.vreg()].def_block = Some(block); state.assign_preg(preg, vreg); state.alloc_stack_slot(vreg); state.move_to_stack(preg, vreg, ProgPoint::after(inst)); } trace!("Chose {} for operand {}", preg, i); } } OperandConstraint::Reuse(_) => { panic!("Illegal register constraint reuse for early def or use"); } _ => {} } } // advance use_idx for op in operands { if op.kind() != OperandKind::Use || op.as_fixed_nonallocatable().is_some() { continue; } let vreg_idx = op.vreg().vreg(); let info = &mut state.vregs[vreg_idx]; info.cur_use_idx += 1; if vreg_killed(state, inst, block, block_last_inst_idx, vreg_idx, true) { // TODO: clear stack slot state.clear_vreg_from_reg(op.vreg()); } /*let block_after_pos = state.cur_inst_pos + (block_last_inst_idx - inst.index()) + 1; // check if vreg dies here if !state.liveouts[block.index()].get(vreg_idx) && (info.uses.len() <= info.cur_use_idx as usize || info.uses[info.cur_use_idx as usize] > block_after_pos as u32) { // TODO: clear stack slot state.clear_vreg_from_reg(op.vreg()); }*/ } // TODO: this is currently a fix for the register state for uses that are clobbered as it is incorrectly set // but this is inefficient as we could check for this when handling uses trace!("Late clobber handling"); for preg in clobbers { // TODO: this might save a use that is killed at this inst i think let vreg = if let Some(vreg) = &state.pregs[preg.index()].vreg { *vreg } else { continue; }; if state.vregs[vreg.vreg()].slot_idx.is_some() { trace!("{} with {} clobbered but saved on stack", preg, vreg); state.clear_preg(preg); continue; } // we don't care if the reg is used at the current inst if vreg_killed(state, inst, block, block_last_inst_idx, vreg.vreg(), true) { trace!("{} with {} clobbered but vreg killed", preg, vreg); state.clear_preg(preg); continue; } // TODO: this should not be hit i think as all we should be clearing here are use assignments and the vregs // that need to be saved should have been saved at the check before state.alloc_stack_slot(vreg); state.move_to_stack(preg, vreg, ProgPoint::before(inst)); state.clear_preg(preg); } // alloc non-fixed late defs and reuse trace!("Third alloc pass"); for (i, op) in operands.iter().enumerate() { if op.kind() != OperandKind::Def || op.pos() != OperandPos::Late { continue; } trace!("Operand {}: {}", i, op); let vreg = op.vreg(); if op.as_fixed_nonallocatable().is_some() { continue; } state.vregs[vreg.vreg()].def_block = Some(block); match op.constraint() { OperandConstraint::Reg => { // find first non-allocated register let reg_order = const_state.reg_order(op.class()); let mut allocated = false; for ® in reg_order { if regs_allocated.contains(reg) || late_write_disallow_regs.contains(reg) { continue; } if let Some(cur_vreg) = &state.pregs[reg.index()].vreg { // we can override the reg if the vreg was killed already if !vreg_killed( state, inst, block, block_last_inst_idx, cur_vreg.vreg(), true, ) { continue; } state.clear_preg(reg); } // reg should not contain anything regs_allocated.add(reg); state.allocs[alloc_idx + i] = Allocation::reg(reg); state.clear_preg(reg); state.assign_preg(reg, vreg); state.alloc_stack_slot(vreg); state.move_to_stack(reg, vreg, ProgPoint::after(inst)); trace!("Chose {} for operand {}", reg, i); allocated = true; break; } if allocated { continue; } trace!("No free register found for {}", vreg); // TODO: first evict pregs that already have a stack slot even if they are used earlier? let mut evict_candidate = None; let mut ffa_reg_pool = PRegSet::empty(); for ® in reg_order { if regs_allocated.contains(reg) || late_write_disallow_regs.contains(reg) { continue; } debug_assert!(state.pregs[reg.index()].vreg.is_some()); let vreg = state.pregs[reg.index()].vreg.unwrap(); if let Some(next_use) = state.vreg_next_use(vreg) { if next_use == state.cur_inst_pos as u32 { continue; } if let Some((_, pos)) = &evict_candidate { if *pos < next_use { evict_candidate = Some((reg, next_use)); } } else { evict_candidate = Some((reg, next_use)); } } else { // if we hit this it means that all uses are "before" this one in lowering-order // we should probably find a nice heuristic for chosing which register to choose // here. tbf we should probably find an overall better heuristic for chosing which register to evict // rn just add the reg to a set and we pick a random one later ffa_reg_pool.add(reg); } } if let Some((reg, next_use)) = evict_candidate { // Save vreg if needed { let vreg = state.pregs[reg.index()].vreg.unwrap(); trace!("Evicting {} with {}", reg, vreg); if state.vregs[vreg.vreg()].slot_idx.is_none() && !vreg_killed( state, inst, block, block_last_inst_idx, vreg.vreg(), true, ) { let slot = state.create_stack_slot(reg.class()); state.vregs[vreg.vreg()].slot_idx = Some(slot); state.edits.push(( ProgPoint::before(inst), Edit::Move { from: Allocation::reg(reg), to: Allocation::stack(SpillSlot::new(slot as usize)), }, )); } } state.clear_preg(reg); regs_allocated.add(reg); state.allocs[alloc_idx + i] = Allocation::reg(reg); state.assign_preg(reg, vreg); state.alloc_stack_slot(vreg); state.move_to_stack(reg, vreg, ProgPoint::after(inst)); trace!("Chose {} for operand {}", reg, i); } else { if ffa_reg_pool == PRegSet::empty() { panic!("Out of registers: {:?}", regs_allocated); return Err(RegAllocError::TooManyLiveRegs); } let preg = 'block: { let len = ffa_reg_pool.bits.count_ones() as usize; let mut idx = (state.prng.val() as usize % 128) % len; for preg in ffa_reg_pool.into_iter() { if idx == 0 { break 'block preg; } idx -= 1; } panic!("I can't do math"); }; trace!("Chose {} from ffa_reg_pool", preg); { let vreg = state.pregs[preg.index()].vreg.unwrap(); // need to save vreg if it does not have a slot if state.vregs[vreg.vreg()].slot_idx.is_none() { let slot = state.create_stack_slot(preg.class()); state.vregs[vreg.vreg()].slot_idx = Some(slot); state.edits.push(( ProgPoint::before(inst), Edit::Move { from: Allocation::reg(preg), to: Allocation::stack(SpillSlot::new(slot as usize)), }, )); } } state.clear_preg(preg); state.allocs[alloc_idx + i] = Allocation::reg(preg); regs_allocated.add(preg); if op.kind() == OperandKind::Use { if req_refs_on_stack && state.vregs[vreg.vreg()].reftype { panic!("reftype required to be in reg at safepoint"); return Err(RegAllocError::TooManyLiveRegs); } // need to move from stack to reg state.move_to_preg(vreg, preg, ProgPoint::before(inst)); } else { // early def state.vregs[vreg.vreg()].def_block = Some(block); state.assign_preg(preg, vreg); state.alloc_stack_slot(vreg); state.move_to_stack(preg, vreg, ProgPoint::after(inst)); } trace!("Chose {} for operand {}", preg, i); } } OperandConstraint::Reuse(idx) => { debug_assert!(state.allocs[alloc_idx + idx].is_reg()); let preg = state.allocs[alloc_idx + idx].as_reg().unwrap(); debug_assert!(regs_allocated.contains(preg)); state.allocs[alloc_idx + i] = Allocation::reg(preg); // Save vreg on stack if it is not killed if let Some(vreg) = state.pregs[preg.index()].vreg { let vreg_idx = vreg.vreg(); if state.vregs[vreg_idx].slot_idx.is_none() && !vreg_killed(state, inst, block, block_last_inst_idx, vreg_idx, true) { trace!("Saving {}", vreg); let slot = state.create_stack_slot(preg.class()); state.vregs[vreg_idx].slot_idx = Some(slot); state.edits.push(( ProgPoint::before(inst), Edit::Move { from: Allocation::reg(preg), to: Allocation::stack(SpillSlot::new(slot as usize)), }, )); } } state.clear_preg(preg); state.assign_preg(preg, vreg); //state.alloc_stack_slot(vreg); //state.move_to_stack(preg, vreg, ProgPoint::after(inst)); } _ => { debug_assert!(!state.allocs[alloc_idx + i].is_none()); } } } // fixup edit order let mut first_post_pos = None; for i in edit_start_idx..state.edits.len() { debug_assert!(state.edits[i].0.inst() == inst); match first_post_pos { None => { if state.edits[i].0.pos() == InstPosition::After { first_post_pos = Some(i); } } Some(pos) => { if state.edits[i].0.pos() == InstPosition::Before { state.edits.swap(pos, i); first_post_pos = Some(pos + 1); } } } } assert!(!state.allocs[alloc_idx..alloc_idx + operands.len()] .iter() .any(|a| a.is_none())); trace!( "Instruction Allocs: {:?}", &state.allocs[alloc_idx..alloc_idx + operands.len()] ); state.cur_inst_pos += 1; } // Move all liveout/block param vregs to a stack slot if they dont have one and clear pregs for i in 0..state.pregs.len() { match state.pregs[i].vreg { None => {} Some(vreg) => { trace!("Clearing {} from p{}", vreg, i); let idx = vreg.vreg(); // TODO: obv dont need that if the block param handle funcs can handle reg locations let is_out_param = 'block: { let last_inst = state.func.block_insns(block).last(); if !state.func.is_branch(last_inst) { break 'block false; } for succ_idx in 0..state.func.block_succs(block).len() { for out_vreg in state.func.branch_blockparams(block, last_inst, succ_idx) { if *out_vreg == vreg { break 'block true; } } } false }; if (is_out_param || state.liveouts[block.index()].get(idx)) && state.vregs[idx].slot_idx.is_none() { let preg = state.vregs[idx].preg.unwrap(); let slot = state.create_stack_slot(preg.class()); state.edits.push(( ProgPoint::before(Inst::new(block_last_inst_idx)), Edit::Move { from: Allocation::reg(preg), to: Allocation::stack(SpillSlot::new(slot as usize)), }, )); state.vregs[idx].slot_idx = Some(slot); } state.vregs[idx].preg = None; state.pregs[i].vreg = None; } } } Ok(()) } fn handle_out_block_params<'a, F: Function>( state: &mut FastAllocState<'a, F>, const_state: &ReadOnlyData, block: Block, ) -> Result<(), RegAllocError> { let last_inst = state.func.block_insns(block).last(); trace!( "Allocating outgoing blockparams for {}, last_inst: {}", block.index(), last_inst.index() ); if !state.func.is_branch(last_inst) { trace!("Last inst {} is not a branch", last_inst.index()); return Ok(()); } let mut pregs_used_by_br = PRegSet::empty(); { let alloc_start = state.inst_alloc_offsets[last_inst.index()] as usize; let alloc_end = if last_inst.index() + 1 == state.inst_alloc_offsets.len() { state.allocs.len() } else { state.inst_alloc_offsets[last_inst.index() + 1] as usize }; trace!("alloc_start: {}, alloc_end: {}", alloc_start, alloc_end); for i in alloc_start..alloc_end { if let Some(reg) = state.allocs[i].clone().as_reg() { pregs_used_by_br.add(reg); } } } // wouldn't need this if the edits for this were made before the moves for the branch inst but that has its own share of problems i think let tmp_reg_int = 'block: { for reg in const_state.reg_order(RegClass::Int) { if !pregs_used_by_br.contains(*reg) { break 'block *reg; } } panic!("No usable tmp_reg for block param handling"); }; let tmp_reg_float = 'block: { for reg in const_state.reg_order(RegClass::Float) { if !pregs_used_by_br.contains(*reg) { break 'block *reg; } } panic!("No usable tmp_reg for block param handling"); }; let succs = state.func.block_succs(block); if succs.len() == 1 && state.blocks[succs[0].index()].params_allocated { trace!("Only one allocated successor, moving allocations"); let succ = succs[0]; // move values to the already allocated places let in_params = state.func.block_params(succ); let out_params = state.func.branch_blockparams(block, last_inst, 0); debug_assert_eq!(in_params.len(), out_params.len()); assert!(in_params.len() < 254); if in_params.is_empty() { trace!("No params. Skipping"); return Ok(()); } if in_params.len() > 254 { panic!("unsupported block argument length"); } // TODO: this is a really dumb way to handle cycles/chains // need a better algo /*let mut tmp_slots: SmallVec<[u32; 4]> = SmallVec::new(); for i in 0..out_params.len() { let out_vreg = out_params[i]; let out_slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); if out_vreg == VReg::invalid() { panic!("") } let tmp_slot = state.create_stack_slot(out_vreg.class()); let tmp_reg = if out_vreg.class() == RegClass::Int { tmp_reg_int } else { tmp_reg_float }; tmp_slots.push(tmp_slot); state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::stack(SpillSlot::new(out_slot as usize)), to: Allocation::reg(tmp_reg), }, )); state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::reg(tmp_reg), to: Allocation::stack(SpillSlot::new(tmp_slot as usize)), }, )); } for i in 0..out_params.len() { let out_vreg = out_params[i]; let in_vreg = in_params[i]; let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap(); let tmp_slot = tmp_slots[i]; let tmp_reg = if out_vreg.class() == RegClass::Int { tmp_reg_int } else { tmp_reg_float }; state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::stack(SpillSlot::new(tmp_slot as usize)), to: Allocation::reg(tmp_reg), }, )); state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::reg(tmp_reg), to: Allocation::stack(SpillSlot::new(in_slot as usize)), }, )); }*/ let mut depends: SmallVec<[SmallVec<[u8; 2]>; 4]> = SmallVec::new(); depends.resize(out_params.len(), SmallVec::new()); let mut depends_rev: SmallVec<[SmallVec<[u8; 2]>; 4]> = SmallVec::new(); depends_rev.resize(out_params.len(), SmallVec::new()); let mut params_left: SmallVec<[u8; 8]> = SmallVec::new(); let mut remap: SmallVec<[Option; 8]> = SmallVec::new(); remap.resize(out_params.len(), None); for i in 0..in_params.len() { params_left.push(i as u8); } // out_slot -> in_slot // if an in_slot is used as an out_slot, the in_slot may only be override once the out_slot is done for i in 0..out_params.len() { let out_slot = state.vregs[out_params[i].vreg()].slot_idx.unwrap(); for j in 0..in_params.len() { let in_slot = state.vregs[in_params[j].vreg()].slot_idx.unwrap(); if i == j { continue; } if out_slot == in_slot { depends[j].push(i as u8); depends_rev[i].push(j as u8); } } } // 5->3 // 3->6 // 6->5 // depends: [1,2,0] // while !params_left.is_empty() { let count = params_left.len(); // Check if any non-dependent block argument can be written let mut i = 0; while i < params_left.len() { let param_idx = params_left[i] as usize; if !depends[param_idx].is_empty() { i += 1; continue; } params_left.swap_remove(i); /*for depend_idx in depends_rev[param_idx] { depends[depend_idx].re }*/ for entry in &mut depends { entry.retain(|idx| *idx as usize != param_idx); } let out_vreg = out_params[param_idx]; let in_vreg = in_params[param_idx]; let out_slot = match remap[param_idx] { Some(idx) => idx, None => state.vregs[out_vreg.vreg()].slot_idx.unwrap(), }; let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap(); assert_eq!(out_vreg.class(), in_vreg.class()); let tmp_reg = if out_vreg.class() == RegClass::Int { tmp_reg_int } else { tmp_reg_float }; trace!( "Move {} from slot {} to slot {} for {}", out_vreg, out_slot, in_slot, in_vreg ); if out_slot == in_slot { continue; } state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::stack(SpillSlot::new(out_slot as usize)), to: Allocation::reg(tmp_reg), }, )); state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::reg(tmp_reg), to: Allocation::stack(SpillSlot::new(in_slot as usize)), }, )); } if params_left.len() == count { // only cycles left, break first element let param_idx = params_left[0] as usize; for i in ¶ms_left { assert_eq!(depends[*i as usize].len(), 1); } debug_assert_eq!(depends[param_idx].len(), 1); let depend_idx = depends[param_idx][0] as usize; // need to move the out_slot of the dependency to a temporary slot let depend_vreg = out_params[depend_idx]; let depend_out_slot = state.vregs[depend_vreg.vreg()].slot_idx.unwrap(); let tmp_slot = state.create_stack_slot(depend_vreg.class()); let tmp_reg = if depend_vreg.class() == RegClass::Int { tmp_reg_int } else { tmp_reg_float }; trace!( "Breaking cycle for {} by moving {} from slot {} to slot {}", param_idx, depend_idx, depend_out_slot, tmp_slot ); state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::stack(SpillSlot::new(depend_out_slot as usize)), to: Allocation::reg(tmp_reg), }, )); state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::reg(tmp_reg), to: Allocation::stack(SpillSlot::new(tmp_slot as usize)), }, )); // TODO: assert!(remap[depend_idx].is_none()) remap[depend_idx] = Some(tmp_slot); depends[param_idx].clear(); } } /*let mut depend_count: SmallVec<[u8; 8]> = SmallVec::new(); let mut params_left: SmallVec<[u8; 8]> = SmallVec::new(); let mut remap: SmallVec<[Option; 8]> = SmallVec::new(); depend_count.resize(out_params.len(), 0); remap.resize(out_params.len(), None); for i in 0..in_params.len() { params_left.push(i as u8); } for i in 0..out_params.len() { let out_slot = state.vregs[out_params[i].vreg()].slot_idx.unwrap(); for j in 0..in_params.len() { let in_slot = state.vregs[in_params[j].vreg()].slot_idx.unwrap(); if i == j { continue; } if out_slot == in_slot { depend_count[j] += 1; } } } while !params_left.is_empty() { let count = params_left.len(); let mut i = 0; while i < params_left.len() { let idx = params_left[i] as usize; if depend_count[idx] != 0 { i += 1; continue; } params_left.swap_remove(i); let out_slot = state.vregs[out_params[idx].vreg()].slot_idx.unwrap(); // check if any other value depends on this for j in 0..params_left.len() { let idx = params_left[j] as usize; let in_slot = state.vregs[in_params[idx].vreg()].slot_idx.unwrap(); // TODO: this decreses its own depend_count if in_slot == out_slot { depend_count[idx] -= 1; } } let in_vreg = in_params[idx]; let out_vreg = out_params[idx]; debug_assert!(state.vregs[in_vreg.vreg()].slot_idx.is_some()); debug_assert!(state.vregs[out_vreg.vreg()].slot_idx.is_some()); let tmp_reg = if out_vreg.class() == RegClass::Int { tmp_reg_int } else { tmp_reg_float }; let out_slot = match remap[idx] { Some(idx) => idx, None => state.vregs[out_vreg.vreg()].slot_idx.unwrap(), }; //let out_slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap(); trace!( "Move {} from slot {} to slot {} for {}", out_vreg, out_slot, in_slot, in_vreg ); if out_slot == in_slot { continue; } state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::stack(SpillSlot::new(out_slot as usize)), to: Allocation::reg(tmp_reg), }, )); state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::reg(tmp_reg), to: Allocation::stack(SpillSlot::new(in_slot as usize)), }, )); } if params_left.len() == count { // everything left belongs to a cycle if cfg!(debug_assertions) { for i in 0..params_left.len() { assert_eq!(depend_count[params_left[i] as usize], 1); } } // just move the first element to a tmp pos and search which reg depends on it // TODO: we should sanity check that all regs in the cycle die after the block let idx = params_left[0] as usize; let tmp_reg = if out_params[idx].class() == RegClass::Int { tmp_reg_int } else { tmp_reg_float }; let out_slot = state.vregs[out_params[idx].vreg()].slot_idx.take().unwrap(); state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::stack(SpillSlot::new(out_slot as usize)), to: Allocation::reg(tmp_reg), }, )); // TODO: mark out_slot as free let new_slot = state.create_stack_slot(out_params[idx].class()); trace!( "Cycle detected. Breaking by allocating new slot {} for {}", new_slot, out_params[idx] ); state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::reg(tmp_reg), to: Allocation::stack(SpillSlot::new(new_slot as usize)), }, )); debug_assert!(remap[idx].is_none()); remap[idx] = Some(new_slot); for j in 0..params_left.len() { let in_slot = state.vregs[in_params[params_left[j] as usize].vreg()] .slot_idx .unwrap(); if in_slot == out_slot { depend_count[params_left[j] as usize] -= 1; } } } }*/ // TODO: need to break cycles // e.g. // Move v144 from slot 5 to slot 2 for v135 // Move v145 from slot 6 to slot 3 for v136 // Move v146 from slot 3 to slot 4 for v137 // or // 0: 6->4 // 1: 6->3 // 2: 3->5 // 3: 5->6 // depends: [3, 3, 1, 2] // or // 3->6 // 6->3 // 2->1 // 1->4 // depends: [1, 0, 0xFF, 2] /*for i in 0..in_params.len() { let in_vreg = in_params[i]; let out_vreg = out_params[i]; debug_assert!(state.vregs[in_vreg.vreg()].slot_idx.is_some()); debug_assert!(state.vregs[out_vreg.vreg()].slot_idx.is_some()); let tmp_reg = if out_vreg.class() == RegClass::Int { tmp_reg_int } else { tmp_reg_float }; let out_slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap(); trace!( "Move {} from slot {} to slot {} for {}", out_vreg, out_slot, in_slot, in_vreg ); if out_slot == in_slot { continue; } state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::stack(SpillSlot::new(out_slot as usize)), to: Allocation::reg(tmp_reg), }, )); state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::reg(tmp_reg), to: Allocation::stack(SpillSlot::new(in_slot as usize)), }, )); }*/ } else { trace!("Successors not allocated. Creating allocation"); let mut allocs = SmallVec::<[(VReg, u32); 4]>::new(); // set incoming block params of successor to the current stack slot for (i, &succ) in state.func.block_succs(block).iter().enumerate() { trace!("Creating block {}", succ.index()); if state.blocks[succ.index()].params_allocated { return Err(RegAllocError::CritEdge(block, succ)); } // we allocate the params here // TODO: can there be a problem if the same successor occurs multiple times? state.blocks[succ.index()].params_allocated = true; let in_params = state.func.block_params(succ); let out_params = state.func.branch_blockparams(block, last_inst, i); debug_assert_eq!(in_params.len(), out_params.len()); let mut vregs_passed = SmallVec::<[VReg; 4]>::new(); for i in 0..in_params.len() { let out_vreg = out_params[i]; let in_vreg = in_params[i]; debug_assert!(state.vregs[out_vreg.vreg()].slot_idx.is_some()); debug_assert!(state.vregs[in_vreg.vreg()].slot_idx.is_none()); let out_slot_idx = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); if out_vreg == VReg::invalid() { panic!("") } if in_vreg == VReg::invalid() { panic!("") } state.vregs[in_vreg.vreg()].def_block = Some(succ); // TODO: if out_vreg dies at this edge, we could reuse its stack slot let mut no_alias = false; if !vregs_passed.contains(&out_vreg) { let mut alloced = false; for alloc in &allocs { if alloc.0 != out_vreg { continue; } // we can use the already moved into stack slot state.vregs[in_vreg.vreg()].slot_idx = Some(alloc.1); vregs_passed.push(out_vreg); alloced = true; break; } vregs_passed.push(out_vreg); if alloced { continue; } if !state.liveouts[block.index()].get(out_vreg.vreg()) { let slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); trace!( "{} died at the edge, reuse stack slot {} for {}", out_vreg, slot, in_vreg ); // we can reuse the stack slot since the variable dies state.vregs[in_vreg.vreg()].slot_idx = Some(slot); continue; } no_alias = true; } // need to duplicate to avoid aliasing or create a new stack slot // TODO: this creates multiple duplications for multiple blocks, can be avoided let tmp_reg = if out_vreg.class() == RegClass::Int { tmp_reg_int } else { tmp_reg_float }; let slot = state.create_stack_slot(out_vreg.class()); trace!( "Moving {} from slot {} to slot {} for {}", out_vreg, out_slot_idx, slot, in_vreg ); state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::stack(SpillSlot::new(out_slot_idx as usize)), to: Allocation::reg(tmp_reg), }, )); state.edits.push(( ProgPoint::before(last_inst), Edit::Move { from: Allocation::reg(tmp_reg), to: Allocation::stack(SpillSlot::new(slot as usize)), }, )); state.vregs[in_vreg.vreg()].slot_idx = Some(slot); if no_alias { allocs.push((out_vreg, slot)); } } } } Ok(()) } // don't inline for better perf stats #[inline(never)] fn calc_use_positions_and_live_bitmaps<'a, F: Function>( state: &mut FastAllocState<'a, F>, const_state: &ReadOnlyData, ) -> Result<(), RegAllocError> { // TODO: this could be folded into the bitmap calculation by making a // reverse postorder idx -> (block idx, inst_count for all) // mapping // // we use a pseudo-counter to have a uniform position for instructions // this takes 0.5-0.8% on average but has maxes of up to 2% of compile time // so if it does not substantially increase compilation performance it should be killed let mut cur_pos = 0u32; let len = const_state.postorder.len(); for i in 0..len { let block = const_state.postorder[len - 1 - i]; trace!("Calculating uses for block {}", block.index()); let insts = state.func.block_insns(block); for inst in insts.clone().iter() { let operands = state.func.inst_operands(inst); for op in operands { if op.kind() != OperandKind::Use { continue; } if op.vreg() == VReg::invalid() { continue; } trace!( "Use of {} at {} (inst {})", op.vreg(), cur_pos, inst.index() ); state.vregs[op.vreg().vreg()].uses.push(cur_pos); } cur_pos += 1; } let last_inst = insts.last(); if !state.func.is_branch(last_inst) { continue; } for i in 0..state.func.block_succs(block).len() { for vreg in state.func.branch_blockparams(block, last_inst, i) { trace!( "Use of {} in blockparam at {} (inst {})", vreg, cur_pos, last_inst.index() ); state.vregs[vreg.vreg()].uses.push(cur_pos); } } cur_pos += 1; } calc_live_bitmaps(state, const_state) } struct BlockBitmap { storage: SmallVec<[u64; 2]>, } impl BlockBitmap { fn init(block_count: usize) -> Self { let u64_count = (block_count + 63) / 64; let mut storage = SmallVec::<[u64; 2]>::with_capacity(u64_count); storage.resize(u64_count, 0); Self { storage } } fn set(&mut self, idx: usize) { let storage_idx = idx / 64; let bit = 1u64 << (idx % 64); self.storage[storage_idx] |= bit; } fn un_set(&mut self, idx: usize) { let storage_idx = idx / 64; let bit = 1u64 << (idx % 64); self.storage[storage_idx] &= !bit; } fn is_set(&mut self, idx: usize) -> bool { let storage_idx = idx / 64; let bit = 1u64 << (idx % 64); (self.storage[storage_idx] & bit) != 0 } } // currently, copy from liveranges.rs // don't inline for better perf stats // this is currently very expensive, takes 3% of 10.2% total time for the register allocator #[inline(never)] fn calc_live_bitmaps<'a, F: Function>( state: &mut FastAllocState<'a, F>, const_state: &ReadOnlyData, ) -> Result<(), RegAllocError> { state.liveins.resize(state.blocks.len(), IndexSet::new()); state.liveouts.resize(state.blocks.len(), IndexSet::new()); // Run a worklist algorithm to precisely compute liveins and // liveouts. let mut workqueue = VecDeque::new(); let mut workqueue_set = BlockBitmap::init(state.liveins.len()); // Initialize workqueue with postorder traversal. for &block in &const_state.postorder[..] { workqueue.push_back(block); workqueue_set.set(block.index()); } while let Some(block) = workqueue.pop_front() { workqueue_set.un_set(block.index()); let insns = state.func.block_insns(block); trace!("computing liveins for block{}", block.index()); //state.stats.livein_iterations += 1; let mut live = state.liveouts[block.index()].clone(); trace!(" -> initial liveout set: {:?}", live); // Include outgoing blockparams in the initial live set. if state.func.is_branch(insns.last()) { for i in 0..state.func.block_succs(block).len() { for ¶m in state.func.branch_blockparams(block, insns.last(), i) { live.set(param.vreg(), true); } } } for inst in insns.rev().iter() { // TODO: this differs from the algo in liveranges.rs by not iterating through the positions // as in SSA it should make no difference as there can be no vreg that is both a use and def at // a single instruction for op in state.func.inst_operands(inst) { if op.as_fixed_nonallocatable().is_some() { continue; } let was_live = live.get(op.vreg().vreg()); trace!("op {:?} was_live = {}", op, was_live); match op.kind() { OperandKind::Use => { live.set(op.vreg().vreg(), true); } OperandKind::Def => { live.set(op.vreg().vreg(), false); } } } } // TODO: i dont get why blockparams are not live when going in? for &blockparam in state.func.block_params(block) { live.set(blockparam.vreg(), false); } for &pred in state.func.block_preds(block) { if state.liveouts[pred.index()].union_with(&live) { trace!( "liveouts of block{} changed to: {:?}", pred.index(), state.liveouts[pred.index()] ); if !workqueue_set.is_set(pred.index()) { workqueue_set.set(pred.index()); workqueue.push_back(pred); } } } trace!("computed liveins at block{}: {:?}", block.index(), live); state.liveins[block.index()] = live; } // Check that there are no liveins to the entry block. if !state.liveins[state.func.entry_block().index()].is_empty() { trace!( "non-empty liveins to entry block: {:?}", state.liveins[state.func.entry_block().index()] ); return Err(RegAllocError::EntryLivein); } for idx in 0..state.blocks.len() { trace!("Livein for block {}: {:?}", idx, state.liveins[idx]); trace!("Liveouts for block {}: {:?}", idx, state.liveouts[idx]); } Ok(()) }