From b6cccb7ecbf9e7f837821ce064c0f3399dda8496 Mon Sep 17 00:00:00 2001 From: T0b1 Date: Tue, 23 May 2023 13:37:23 +0200 Subject: [PATCH] WIP --- src/ion/data_structures.rs | 31 + src/ion/fast_alloc.rs | 3 +- src/ion/fast_alloc2.rs | 1201 ++++++++++++++++++++++++++++++++++++ src/ion/mod.rs | 1 + 4 files changed, 1235 insertions(+), 1 deletion(-) create mode 100644 src/ion/fast_alloc2.rs diff --git a/src/ion/data_structures.rs b/src/ion/data_structures.rs index ecc903e..5702f72 100644 --- a/src/ion/data_structures.rs +++ b/src/ion/data_structures.rs @@ -686,3 +686,34 @@ pub fn u64_key(b: u32, a: u32) -> u64 { pub fn u128_key(d: u32, c: u32, b: u32, a: u32) -> u128 { a as u128 | (b as u128) << 32 | (c as u128) << 64 | (d as u128) << 96 } + +pub struct Bitmap { + storage: SmallVec<[u64; 2]>, +} + +impl Bitmap { + pub fn init(entry_count: usize) -> Self { + let u64_count = (entry_count + 63) / 64; + let mut storage = SmallVec::<[u64; 2]>::with_capacity(u64_count); + storage.resize(u64_count, 0); + Self { storage } + } + + pub fn set(&mut self, idx: usize) { + let storage_idx = idx / 64; + let bit = 1u64 << (idx % 64); + self.storage[storage_idx] |= bit; + } + + pub fn un_set(&mut self, idx: usize) { + let storage_idx = idx / 64; + let bit = 1u64 << (idx % 64); + self.storage[storage_idx] &= !bit; + } + + pub fn is_set(&mut self, idx: usize) -> bool { + let storage_idx = idx / 64; + let bit = 1u64 << (idx % 64); + (self.storage[storage_idx] & bit) != 0 + } +} diff --git a/src/ion/fast_alloc.rs b/src/ion/fast_alloc.rs index df07acb..fe725f1 100644 --- a/src/ion/fast_alloc.rs +++ b/src/ion/fast_alloc.rs @@ -25,7 +25,7 @@ struct VRegData { // use information pub cur_use_idx: u32, - pub uses: SmallVec<[u32; 8]>, + pub uses: SmallVec<[u32; 4]>, } #[derive(Default, Clone, Copy)] @@ -38,6 +38,7 @@ struct PRegData { struct BlockData { pub reg_allocated: bool, pub params_allocated: bool, + // pub livein_locations_idx: u32, // adding this is very expensive!!! } struct ReadOnlyData { diff --git a/src/ion/fast_alloc2.rs b/src/ion/fast_alloc2.rs new file mode 100644 index 0000000..f5fbbcb --- /dev/null +++ b/src/ion/fast_alloc2.rs @@ -0,0 +1,1201 @@ +// note: for livein position tracking. i think we could use a stack since you should only need to restore the last block that had multiple successors +// need to think about how multiple successors are handled + +// also need to think of an efficient way to store the use indices +// maybe out-of-band somehow? but we find the uses out-of-order so that is hard + +use alloc::{collections::VecDeque, format, string::String, vec::Vec}; +use smallvec::SmallVec; + +use crate::{ + domtree, indexset::IndexSet, postorder, Allocation, Block, Edit, Function, Inst, MachineEnv, + OperandConstraint, OperandKind, OperandPos, Output, PReg, PRegSet, ProgPoint, RegAllocError, + RegClass, SpillSlot, VReg, +}; + +use super::{ + data_structures::{u64_key, Bitmap}, + Stats, +}; + +#[derive(Clone)] +struct VRegData { + preg: PReg, + stack_slot: u32, + + // 1 = reftype, 2 = preg_valid, 4 = stack_slot_valid + flags: u8, + + // use information + // TODO: need to check if it is faster to have a unified vector-backed linked list + // or just storing the first two uses of a vreg since most vregs should not have many uses + pub cur_use_idx: u8, + pub uses: SmallVec<[u32; 4]>, +} + +#[derive(Clone)] +struct PRegData { + vreg: VReg, + pub stack_pseudo: bool, + vreg_valid: bool, +} + +#[derive(Default, Clone)] +struct BlockData { + // 1 = reg_allocated, 2 = params_allocated + flags: u8, +} + +// https://burtleburtle.net/bob/rand/smallprng.html +struct PRNG { + a: u64, + b: u64, + c: u64, + d: u64, +} + +#[derive(Copy, Clone)] +struct LiveinLoc { + preg: PReg, + preg_valid: bool, + slot_valid: bool, + slot: u32, +} + +struct FastAlloc<'a, F: Function> { + pub vregs: Vec, + pub pregs: Vec, + pub blocks: Vec, + pub liveins: Vec, + pub liveouts: Vec, + + pub cur_stack_slot_idx: u32, + // we keep track how many reftype vregs are in registers so we know if any need to be spilled + pub reftype_vregs_in_pregs_count: u32, + + pub stack_slot_count_int: u8, + pub stack_slot_count_float: u8, + + pub cur_inst_pos: usize, + + pub livein_loc_lookup: Vec, + pub livein_locs: Vec, + + pub allocs: Vec, + pub inst_alloc_offsets: Vec, + pub edits: Vec<(ProgPoint, Edit)>, + pub safepoint_slots: Vec<(ProgPoint, Allocation)>, + + pub postorder: Vec, + pub idom: Vec, + + // consolidation of preferred and non-preferred registers from input + pub reg_order_int: Vec, + pub reg_order_float: Vec, + + pub func: &'a F, + pub mach_env: &'a MachineEnv, + pub prng: PRNG, +} + +impl Default for VRegData { + // TODO: only use PReg::invalid()/-1 for stack slot with debug_assertions to be able to zero initialize the array? + fn default() -> Self { + Self { + preg: PReg::invalid(), + stack_slot: 0xFFFFFFFF, + flags: 0, + cur_use_idx: 0, + uses: SmallVec::new(), + } + } +} + +impl VRegData { + const FLAG_REFTYPE: u8 = 1; + const FLAG_PREG: u8 = 2; + const FLAG_SLOT: u8 = 4; + + pub fn preg(&self) -> Option { + if self.flags & Self::FLAG_PREG != 0 { + Some(self.preg) + } else { + None + } + } + + pub fn set_preg(&mut self, preg: PReg) { + self.preg = preg; + self.flags |= Self::FLAG_PREG; + } + + pub fn clear_preg(&mut self) { + self.flags &= !Self::FLAG_PREG; + if cfg!(debug_assertions) { + self.preg = PReg::invalid(); + } + } + + pub fn stack_slot(&self) -> Option { + if self.flags & Self::FLAG_SLOT != 0 { + Some(self.stack_slot) + } else { + None + } + } + + pub fn set_stack_slot(&mut self, slot: u32) { + self.stack_slot = slot; + self.flags |= Self::FLAG_SLOT; + } + + pub fn clear_stack_slot(&mut self) { + self.flags &= !Self::FLAG_SLOT; + if cfg!(debug_assertions) { + self.stack_slot = 0xFFFFFFFF; + } + } + + pub fn is_reftype(&self) -> bool { + self.flags & Self::FLAG_REFTYPE != 0 + } + + pub fn set_reftype(&mut self, reftype: bool) { + if reftype { + self.flags |= Self::FLAG_REFTYPE; + } else { + self.flags &= !Self::FLAG_REFTYPE; + } + } +} + +impl Default for PRegData { + // TODO: only use VReg::invalid() with debug_assertions to be able to zero initialize the array? + fn default() -> Self { + Self { + vreg: VReg::invalid(), + stack_pseudo: false, + vreg_valid: false, + } + } +} + +impl PRegData { + pub fn vreg(&self) -> Option { + if self.vreg_valid { + Some(self.vreg) + } else { + None + } + } + + pub fn set_vreg(&mut self, vreg: VReg) { + self.vreg = vreg; + self.vreg_valid = true; + } + + pub fn clear_vreg(&mut self) { + self.vreg_valid = false; + if cfg!(debug_assertions) { + self.vreg = VReg::invalid(); + } + } +} + +impl BlockData { + const FLAG_REG: u8 = 1; + const FLAG_PARAMS: u8 = 2; + + pub fn regs_allocated(&self) -> bool { + self.flags & Self::FLAG_REG != 0 + } + + pub fn set_regs_allocated(&mut self) { + self.flags |= Self::FLAG_REG; + } + + pub fn params_allocated(&self) -> bool { + self.flags & Self::FLAG_PARAMS != 0 + } + + pub fn set_params_allocated(&mut self) { + self.flags |= Self::FLAG_PARAMS + } +} + +impl PRNG { + fn new(seed: u64) -> Self { + Self { + a: 0xf1ea5eed, + b: seed, + c: seed, + d: seed, + } + } + + fn val(&mut self) -> u64 { + let e = self.a.wrapping_sub(PRNG::rot(self.b, 27)); + self.a = self.b ^ PRNG::rot(self.c, 17); + self.b = self.c.wrapping_add(self.d); + self.c = self.d.wrapping_add(e); + self.d = e.wrapping_add(self.a); + self.d + } + + fn rot(x: u64, k: u64) -> u64 { + (x << k) | (x >> (32 - k)) + } +} + +impl LiveinLoc { + fn init(preg: Option, slot: Option) -> Self { + Self { + preg: preg.unwrap_or(PReg::from_index(0)), + preg_valid: preg.is_some(), + slot_valid: slot.is_some(), + slot: slot.unwrap_or(0), + } + } + + fn preg(&self) -> Option { + if self.preg_valid { + Some(self.preg) + } else { + None + } + } + + fn slot(&self) -> Option { + if self.slot_valid { + Some(self.slot) + } else { + None + } + } +} + +impl<'a, F: Function> FastAlloc<'a, F> { + fn init(func: &'a F, mach_env: &'a MachineEnv) -> Self { + // Not calculated here: + // - liveness/use positions + // - allocation count/inst_alloc_offsets + + let (postorder, idom) = Self::postorder_idom(func); + let (reg_order_int, reg_order_float) = Self::reg_order(mach_env); + + let reftype_vregs = func.reftype_vregs(); + + let vregs = { + let mut vregs = Vec::with_capacity(func.num_vregs()); + vregs.resize(func.num_vregs(), VRegData::default()); + + for vreg in reftype_vregs { + vregs[vreg.vreg()].set_reftype(true); + } + + vregs + }; + + let pregs = { + let mut pregs = Vec::with_capacity(PReg::NUM_INDEX); + pregs.resize(PReg::NUM_INDEX, PRegData::default()); + + for preg in &mach_env.fixed_stack_slots { + trace!("{} is a stack pseudo", preg); + pregs[preg.index()].stack_pseudo = true; + } + pregs + }; + + let blocks = { + let mut blocks = Vec::with_capacity(func.num_blocks()); + blocks.resize(func.num_blocks(), BlockData::default()); + blocks + }; + + // TODO: replace at a later date? + let prng = PRNG::new( + (blocks.len() as u64) << 48 | (vregs.len() as u64) << 32 | (func.num_insts() as u64), + ); + + let inst_alloc_offsets = { + let mut v = Vec::with_capacity(func.num_insts()); + v.resize(func.num_insts(), 0); + v + }; + + let livein_loc_lookup = { + let mut v = Vec::with_capacity(blocks.len()); + v.resize(blocks.len(), 0); + v + }; + + Self { + vregs, + pregs, + liveins: Vec::with_capacity(blocks.len()), + liveouts: Vec::with_capacity(blocks.len()), + blocks, + cur_stack_slot_idx: 0, + reftype_vregs_in_pregs_count: 0, + stack_slot_count_int: func.spillslot_size(RegClass::Int) as u8, + stack_slot_count_float: func.spillslot_size(RegClass::Float) as u8, + cur_inst_pos: 0, + livein_loc_lookup, + livein_locs: Vec::new(), + allocs: Vec::new(), + inst_alloc_offsets, + edits: Vec::new(), + safepoint_slots: Vec::new(), + postorder, + idom, + reg_order_int, + reg_order_float, + func, + mach_env, + prng, + } + } + + fn postorder_idom(func: &F) -> (Vec, Vec) { + let postorder = postorder::calculate(func.num_blocks(), func.entry_block(), |b| { + func.block_succs(b) + }); + + let idom = domtree::calculate( + func.num_blocks(), + |b| func.block_preds(b), + &postorder, + func.entry_block(), + ); + + (postorder, idom) + } + + fn reg_order(mach_env: &MachineEnv) -> (Vec, Vec) { + let reg_order_int = { + let class = RegClass::Int as usize; + let amount = mach_env.preferred_regs_by_class[class].len() + + mach_env.non_preferred_regs_by_class[class].len(); + let mut reg_order = Vec::with_capacity(amount); + reg_order.extend_from_slice(&mach_env.preferred_regs_by_class[class]); + reg_order.extend_from_slice(&mach_env.non_preferred_regs_by_class[class]); + reg_order + }; + + let reg_order_float = { + let class = RegClass::Float as usize; + let amount = mach_env.preferred_regs_by_class[class].len() + + mach_env.non_preferred_regs_by_class[class].len(); + let mut reg_order = Vec::with_capacity(amount); + reg_order.extend_from_slice(&mach_env.preferred_regs_by_class[class]); + reg_order.extend_from_slice(&mach_env.non_preferred_regs_by_class[class]); + reg_order + }; + + (reg_order_int, reg_order_float) + } + + fn first_backtrace(&mut self) -> Result<(), RegAllocError> { + // calculate: + // - livein/liveout bitmaps + // - uses + // - alloc count + + let block_to_inst_pos = { + let mut v = Vec::with_capacity(self.blocks.len()); + v.resize(self.blocks.len(), 0u32); + + let mut cur_inst_pos = 0; + for &b in self.postorder.iter().rev() { + v[b.index()] = cur_inst_pos; + cur_inst_pos += self.func.block_insns(b).len() as u32 + 1; + } + v + }; + + self.liveins.resize(self.blocks.len(), IndexSet::new()); + self.liveouts.resize(self.blocks.len(), IndexSet::new()); + + // Run a worklist algorithm to precisely compute liveins and + // liveouts. + let mut workqueue = VecDeque::new(); + let mut workqueue_set = Bitmap::init(self.liveins.len()); + // Initialize workqueue with postorder traversal. + for &block in &self.postorder[..] { + workqueue.push_back(block); + workqueue_set.set(block.index()); + } + + let mut iter_count = 0; + while let Some(block) = workqueue.pop_front() { + // only calculate uses and allocs in the first pass + let calc_only_bitmaps = iter_count >= self.postorder.len(); + iter_count += 1; + + workqueue_set.un_set(block.index()); + let insns = self.func.block_insns(block); + + trace!("computing liveins for block{}", block.index()); + + //state.stats.livein_iterations += 1; + + let mut live = self.liveouts[block.index()].clone(); + trace!(" -> initial liveout set: {:?}", live); + + let mut inst_pos = block_to_inst_pos[block.index()] + insns.len() as u32 + 1; + + // Include outgoing blockparams in the initial live set. + if self.func.is_branch(insns.last()) { + for i in 0..self.func.block_succs(block).len() { + for ¶m in self.func.branch_blockparams(block, insns.last(), i) { + live.set(param.vreg(), true); + let vreg_idx = param.vreg(); + if !calc_only_bitmaps && self.vregs[vreg_idx].uses.last() != Some(&inst_pos) + { + trace!( + "Recording use of {} in block {} at {} (outparam)", + param, + block.index(), + inst_pos + ); + self.vregs[vreg_idx].uses.push(inst_pos); + self.vregs[vreg_idx].cur_use_idx += 1; + } + } + } + } + + inst_pos -= 1; + + for inst in insns.rev().iter() { + let operands = self.func.inst_operands(inst); + if !calc_only_bitmaps { + self.inst_alloc_offsets[inst.index()] = operands.len() as u32; + } + for op in operands { + if op.as_fixed_nonallocatable().is_some() { + continue; + } + let was_live = live.get(op.vreg().vreg()); + trace!("op {:?} was_live = {}", op, was_live); + match op.kind() { + OperandKind::Use => { + live.set(op.vreg().vreg(), true); + let vreg_idx = op.vreg().vreg(); + if !calc_only_bitmaps + && self.vregs[vreg_idx].uses.last() != Some(&inst_pos) + { + trace!( + "Recording use of {} in block {} at {}", + op.vreg(), + block.index(), + inst_pos + ); + self.vregs[vreg_idx].uses.push(inst_pos); + self.vregs[vreg_idx].cur_use_idx += 1; + } + } + OperandKind::Def => { + live.set(op.vreg().vreg(), false); + } + } + } + inst_pos -= 1; + } + + debug_assert_eq!(inst_pos, block_to_inst_pos[block.index()]); + + for &blockparam in self.func.block_params(block) { + live.set(blockparam.vreg(), false); + } + + for &pred in self.func.block_preds(block) { + if self.liveouts[pred.index()].union_with(&live) { + trace!( + "liveouts of block{} changed to: {:?}", + pred.index(), + self.liveouts[pred.index()] + ); + if !workqueue_set.is_set(pred.index()) { + workqueue_set.set(pred.index()); + workqueue.push_back(pred); + } + } + } + + trace!("computed liveins at block{}: {:?}", block.index(), live); + self.liveins[block.index()] = live; + } + + // Check that there are no liveins to the entry block. + if !self.liveins[self.func.entry_block().index()].is_empty() { + trace!( + "non-empty liveins to entry block: {:?}", + self.liveins[self.func.entry_block().index()] + ); + return Err(RegAllocError::EntryLivein); + } + + for idx in 0..self.blocks.len() { + trace!("Livein for block {}: {:?}", idx, self.liveins[idx]); + trace!("Liveouts for block {}: {:?}", idx, self.liveouts[idx]); + } + + Ok(()) + } + + fn setup_allocs(&mut self) { + // - fixup alloc offsets + // - alloc allocs vec + + let mut cur_add_off = 0; + for i in 0..self.inst_alloc_offsets.len() { + let val = self.inst_alloc_offsets[i] + cur_add_off; + self.inst_alloc_offsets[i] = val; + cur_add_off = val; + } + self.allocs.resize(cur_add_off as usize, Allocation::none()); + } + + fn forward_iter(&mut self) -> Result<(), RegAllocError> { + let postorder_len = self.postorder.len(); + for i in 0..postorder_len { + let block = self.postorder[postorder_len - i - 1]; + if self.blocks[block.index()].regs_allocated() { + // TODO: this should not even be hit, no? + trace!("Block {} already allocated. Skipping", block.index()); + continue; + } + self.blocks[block.index()].set_regs_allocated(); + + if trace_enabled!() { + trace!("Allocating block {}", block.index()); + trace!("PReg State:"); + for i in 0..self.pregs.len() { + if let Some(vreg) = &self.pregs[i].vreg() { + trace!(" -> {}: {}", PReg::new(i, vreg.class()), vreg); + } + } + } + + // TODO: evaluate whether it is better to store that info per vreg or per block + + // save current livein state if we have multiple predecessors + // as the later ones need the location of variables to move arguments properly + let preds = self.func.block_preds(block); + if preds.len() > 1 { + trace!("Saving livein locations"); + debug_assert_eq!(self.livein_loc_lookup[block.index()], 0); + self.livein_loc_lookup[block.index()] = self.livein_locs.len() as u32; + for vreg in self.liveins[block.index()].iter() { + let data = &self.vregs[vreg]; + trace!( + " -> {} at {:?} and slot {:?}", + vreg, + data.preg(), + data.stack_slot() + ); + self.livein_locs + .push(LiveinLoc::init(data.preg(), data.stack_slot())); + } + } + + // load livein locations if our predecessor has multiple successors + // TODO: we only need to do this if we are not the first one + if preds.len() == 1 && self.func.block_succs(preds[0]).len() > 1 { + trace!("Restoring livein locations"); + let mut loc_idx = self.livein_loc_lookup[block.index()] as usize; + let liveins = std::mem::take(&mut self.liveins); + for vreg in liveins[block.index()].iter() { + let data = &mut self.vregs[vreg]; + let loc = self.livein_locs[loc_idx]; + trace!(" -> {} at {:?} and slot {:?}", vreg, loc.preg(), loc.slot()); + match (data.preg(), loc.preg()) { + (Some(cur_preg), Some(loc_preg)) => { + if cur_preg != loc_preg { + self.clear_preg(cur_preg.index()); + self.clear_preg(loc_preg.index()); + self.assign_preg(loc_preg, VReg::new(vreg, loc_preg.class())); + } + } + (Some(cur_preg), None) => { + self.clear_preg(cur_preg.index()); + } + (None, Some(loc_preg)) => { + self.clear_preg(loc_preg.index()); + self.assign_preg(loc_preg, VReg::new(vreg, loc_preg.class())); + } + (None, None) => {} + } + let data = &mut self.vregs[vreg]; + match loc.slot() { + Some(slot) => { + data.set_stack_slot(slot); + } + None => { + data.clear_stack_slot(); + } + } + + loc_idx += 1; + } + self.liveins = liveins; + } + + self.alloc_block_insts(block)?; + self.alloc_block_edge(block)?; + } + + todo!("") + } + + fn alloc_block_insts(&mut self, block: Block) -> Result<(), RegAllocError> { + let block_last_inst = self.func.block_insns(block).last().index(); + for inst in self.func.block_insns(block).iter() { + let edit_start_idx = self.edits.len(); + let clobbers = self.func.inst_clobbers(inst); + let operands = self.func.inst_operands(inst); + let req_refs_on_stack = self.func.requires_refs_on_stack(inst); + let alloc_idx = self.inst_alloc_offsets[inst.index()] as usize; + + trace!( + "Allocating Inst {} (refs_on_stack: {}, is_ret: {}, is_branch: {}, alloc_idx: {})", + inst.index(), + req_refs_on_stack, + self.func.is_ret(inst), + self.func.is_branch(inst), + alloc_idx + ); + + if trace_enabled!() { + let mut str = String::new(); + for preg in clobbers { + if str.is_empty() { + str.push_str(&format!("{}", preg)); + } else { + str.push_str(&format!(", {}", preg)); + } + } + trace!("Clobbers: {}", str); + } + + if req_refs_on_stack { + self.create_stackmap_for_reftypes(inst, block, block_last_inst); + } + + // TODO: hardcode operand patterns for ISA, e.g. Early Use 1, Early Use 2, Late Reuse(1) for x86 and Early Use 1, Early Use 2, Late Def 1 for ARM + + // we created an ordered list of operands that is divided into six blocks: + // - fixed uses + // - 'any'/stack uses + // - fixed/stack defs + // - non-fixed uses and early defs + // - non-fixed defs and reuses + // - 'any' defs + + // we handle them the following way + // first pass: fixed uses; allocate as given + // second pass: 'any'/stack uses + // - preferred in reg + // - TODO: these should come after fixed defs, no? + // third pass: fixed defs; allocate as given + // fourth pass: non-fixed uses and early defs + // - allocate in reg if it does not interfere with fixed def/use + // - spill vreg which is farthest away from being used again + // - after it, process clobbers + // fifth pass: non-fixed defs and reuses + // - sames as fourth pass + // sixth pass: 'any' defs + // - preferred in reg if there is space + + // TODO: check if this is actually faster than iterating six times and checking for the conditions + // latter is probably faster... + let mut op_lookup: SmallVec<[u8; 8]> = SmallVec::new(); + let mut fixed_use_end = 0; + let mut any_use_end = 0; + let mut fixed_def_end = 0; + let mut nf_use_end = 0; + let mut nf_def_end = 0; + + let mut fixed_use_regs = PRegSet::empty(); + let mut regs_allocated = PRegSet::empty(); + let mut late_def_disallow = PRegSet::empty(); + + for (i, op) in operands.iter().enumerate() { + trace!("Operand {}: {}", i, op); + match op.kind() { + OperandKind::Use => match op.constraint() { + OperandConstraint::FixedReg(_) => { + op_lookup.insert(fixed_use_end, i as u8); + fixed_use_end += 1; + any_use_end += 1; + fixed_def_end += 1; + nf_use_end += 1; + nf_def_end += 1; + } + OperandConstraint::Any | OperandConstraint::Stack => { + op_lookup.insert(any_use_end, i as u8); + any_use_end += 1; + fixed_def_end += 1; + nf_use_end += 1; + nf_def_end += 1; + } + OperandConstraint::Reg => { + op_lookup.insert(nf_use_end, i as u8); + nf_use_end += 1; + nf_def_end += 1; + } + _ => panic!("invalid"), + }, + OperandKind::Def => match op.constraint() { + OperandConstraint::FixedReg(_) | OperandConstraint::Stack => { + op_lookup.insert(fixed_def_end, i as u8); + fixed_def_end += 1; + nf_use_end += 1; + nf_def_end += 1; + } + OperandConstraint::Any => { + op_lookup.insert(op_lookup.len(), i as u8); + } + OperandConstraint::Reg | OperandConstraint::Reuse(_) => { + op_lookup.insert(nf_def_end, i as u8); + nf_def_end += 1; + } + }, + } + } + + let mut op_lookup_idx = 0; + trace!("First alloc pass: Fixed uses"); + while op_lookup_idx < fixed_use_end { + let op_idx = op_lookup[op_lookup_idx] as usize; + op_lookup_idx += 1; + let op = &operands[op_idx]; + debug_assert_eq!(op.kind(), OperandKind::Use); + + match op.constraint() { + OperandConstraint::FixedReg(preg) => { + if fixed_use_regs.contains(preg) { + return Err(RegAllocError::TooManyLiveRegs); + } + + self.move_vreg_to_preg( + op.vreg(), + preg, + ProgPoint::before(inst), + inst, + block, + block_last_inst, + ); + + fixed_use_regs.add(preg); + regs_allocated.add(preg); + if op.pos() == OperandPos::Late { + late_def_disallow.add(preg); + } + self.allocs[alloc_idx + op_idx] = Allocation::reg(preg); + self.vregs[op.vreg().vreg()].cur_use_idx -= 1; + trace!(" -> Allocated op {} to {}", op_idx, preg); + } + _ => unreachable!(), + } + } + + trace!("Second alloc pass: Any and stack uses"); + while op_lookup_idx < any_use_end { + let op_idx = op_lookup[op_lookup_idx] as usize; + op_lookup_idx += 1; + let op = &operands[op_idx]; + debug_assert_eq!(op.kind(), OperandKind::Use); + + let vreg = op.vreg().vreg(); + + match op.constraint() { + OperandConstraint::Any => { + match self.vregs[vreg].preg() { + Some(preg) => { + self.allocs[alloc_idx + op_idx] = Allocation::reg(preg); + regs_allocated.add(preg); + if op.pos() == OperandPos::Late { + late_def_disallow.add(preg); + } + trace!(" -> Allocated op {} to {}", op_idx, preg); + } + None => { + let slot = self.vregs[vreg].stack_slot().unwrap(); + self.allocs[alloc_idx + op_idx] = + Allocation::stack(SpillSlot::new(slot as usize)); + trace!(" -> Allocated op {} to slot {}", op_idx, slot); + } + } + self.vregs[vreg].cur_use_idx -= 1; + } + OperandConstraint::Stack => { + let slot = match self.vregs[vreg].stack_slot() { + Some(slot) => slot, + None => { + self.alloc_and_move_to_stack(op.vreg(), ProgPoint::before(inst)); + self.vregs[vreg].stack_slot().unwrap() + } + }; + self.allocs[alloc_idx + op_idx] = + Allocation::stack(SpillSlot::new(slot as usize)); + self.vregs[vreg].cur_use_idx -= 1; + trace!(" -> Allocated op {} to slot {}", op_idx, slot); + } + _ => unreachable!(), + } + } + + trace!("Third alloc pass: Fixed defs"); + while op_lookup_idx < fixed_def_end { + let op_idx = op_lookup[op_lookup_idx] as usize; + op_lookup_idx += 1; + let op = &operands[op_idx]; + debug_assert_eq!(op.kind(), OperandKind::Def); + + match op.constraint() { + OperandConstraint::FixedReg(preg) => { + if op.pos() == OperandPos::Early { + if regs_allocated.contains(preg) { + trace!("Early fixed def clashes with register allocated for use"); + return Err(RegAllocError::TooManyLiveRegs); + } + } else { + if late_def_disallow.contains(preg) { + trace!( + "Late fixed def clashes with register allocated for late use" + ); + return Err(RegAllocError::TooManyLiveRegs); + } + } + + self.allocate_preg_for_vreg( + preg, + op.vreg(), + ProgPoint::before(inst), + inst, + block, + block_last_inst, + ); + + if op.pos() == OperandPos::Early { + regs_allocated.add(preg); + } + late_def_disallow.add(preg); + + trace!(" -> Allocated op {} to {}", op_idx, preg); + } + _ => unreachable!(), + } + } + + trace!("Fourth alloc pass: Non-fixed uses and early defs"); + trace!("Fifth alloc pass: Non-fixed defs and reuses"); + trace!("Sixth alloc pass: Any defs"); + todo!("") + } + todo!("") + } + + fn alloc_block_edge(&mut self, block: Block) -> Result<(), RegAllocError> { + todo!("") + } + + fn run(&mut self) -> Result<(), RegAllocError> { + self.first_backtrace()?; + self.setup_allocs(); + self.blocks[self.func.entry_block().index()].set_params_allocated(); + self.forward_iter()?; + + // TODO: we can probably sort this more efficiently with domain knowledge + // we do not iterate the blocks in their index order so the order of edits might not be sorted by progpoint + // however it should be nearly sorted + self.edits.sort_by_key(|entry| entry.0); + // these might also not be sorted + //state.safepoint_slots.sort_by_key(|entry| entry.0); + self.safepoint_slots + .sort_unstable_by_key(|(progpoint, slot)| u64_key(progpoint.to_index(), slot.bits())); + + if trace_enabled!() { + trace!("End State:"); + trace!("Edits:"); + for edit in &self.edits { + match edit.1 { + Edit::Move { from, to } => { + trace!(" -> At {:?} from {} to {}", edit.0, from, to); + } + } + } + + trace!("StackMaps:"); + for entry in &self.safepoint_slots { + trace!(" -> At {:?} at {}", entry.0, entry.1); + } + } + + Ok(()) + } + + fn create_stackmap_for_reftypes(&mut self, inst: Inst, block: Block, block_last_inst: usize) { + // make sure all reftypes have a valid stackslot + self.move_reftype_to_stack(inst); + let pos = ProgPoint::before(inst); + trace!("Calculating Stackmap for {:?}", pos); + + for vreg in self.liveins[block.index()].iter() { + let data = &self.vregs[vreg]; + if !data.is_reftype() { + continue; + } + + if self.vreg_killed(vreg, inst, block, block_last_inst, true) { + continue; + } + + let slot = data.stack_slot().unwrap(); + trace!("Marking vreg {} as saved on stack at {}", vreg, slot); + self.safepoint_slots + .push((pos, Allocation::stack(SpillSlot::new(slot as usize)))); + } + } + + // Helper functions + + // Moving + + // Moves vreg to preg and while spilling any vreg alive in preg + fn move_vreg_to_preg( + &mut self, + vreg: VReg, + preg: PReg, + pos: ProgPoint, + inst: Inst, + block: Block, + block_last_inst: usize, + ) { + todo!("") + } + + // Allocates a stack slot for a vreg and moves it there + // panics if there is a stack slot already or the vreg is not in a preg + fn alloc_and_move_to_stack(&mut self, vreg: VReg, pos: ProgPoint) { + self.alloc_stack_slot(vreg); + self.move_to_stack(vreg, pos); + } + + // Moves a vreg to stack + // panics if there is no stack slot or the vreg is not in a preg + fn move_to_stack(&mut self, vreg: VReg, pos: ProgPoint) { + let slot = if let Some(slot) = self.vregs[vreg.vreg()].stack_slot() { + slot + } else { + panic!("Trying to move {} to stack but it has no slot", vreg); + }; + + let preg = if let Some(preg) = self.vregs[vreg.vreg()].preg() { + preg + } else { + panic!("Trying to move {} to stack but it is not in a preg", vreg); + }; + + trace!("Moving {} from {} to slot {}", vreg, preg, slot); + self.edits.push(( + pos, + Edit::Move { + from: Allocation::reg(preg), + to: Allocation::stack(SpillSlot::new(slot as usize)), + }, + )); + } + + // Allocates a stack slot for a vreg + // panics if there is a slot already + fn alloc_stack_slot(&mut self, vreg: VReg) -> u32 { + if self.vregs[vreg.vreg()].stack_slot().is_some() { + panic!( + "Trying to alloc slot for v{} but it already has a slot", + vreg + ); + } + + let slot = self.create_stack_slot(vreg.class()); + self.vregs[vreg.vreg()].set_stack_slot(slot); + trace!("Allocated slot {} for {}", slot, vreg); + slot + } + + // Creates a stack slot for a given register class + fn create_stack_slot(&mut self, class: RegClass) -> u32 { + let size = if class == RegClass::Int { + self.stack_slot_count_int + } else { + self.stack_slot_count_float + }; + let idx = self.cur_stack_slot_idx; + self.cur_stack_slot_idx += size as u32; + trace!("Created slot {} for {:?}", idx, class); + idx + } + + // State Helpers + + // assigns vreg to preg and saves any live vreg in the preg + fn allocate_preg_for_vreg( + &mut self, + preg: PReg, + vreg: VReg, + pos: ProgPoint, + inst: Inst, + block: Block, + block_last_inst: usize, + ) { + todo!("") + } + + fn assign_preg(&mut self, preg: PReg, vreg: VReg) { + if self.pregs[preg.index()].vreg().is_some() { + panic!("Trying to assign {} to {} but it is not free", vreg, preg); + } + + if self.vregs[vreg.vreg()].preg().is_some() { + panic!("Trying to assign {} to {} but vreg is in reg", vreg, preg); + } + + trace!("Assigning {} to {}", vreg, preg); + self.pregs[preg.index()].set_vreg(vreg); + self.vregs[vreg.vreg()].set_preg(preg); + if self.vregs[vreg.vreg()].is_reftype() { + self.reftype_vregs_in_pregs_count += 1; + } + } + + // Clear a preg but do not do any moving + fn clear_preg(&mut self, preg: usize) { + if let Some(vreg) = self.pregs[preg].vreg() { + trace!("Clearing p{} with {}", preg, vreg); + self.pregs[preg].clear_vreg(); + debug_assert_eq!(self.vregs[vreg.vreg()].preg().unwrap().index(), preg); + self.vregs[vreg.vreg()].clear_preg(); + if self.vregs[vreg.vreg()].is_reftype() { + self.reftype_vregs_in_pregs_count -= 1; + } + } + } + + fn vreg_killed( + &self, + vreg: usize, + inst: Inst, + block: Block, + block_last_inst: usize, + save_on_current_use: bool, + ) -> bool { + let info = &self.vregs[vreg]; + let block_after_pos = self.cur_inst_pos + (block_last_inst - inst.index()) + 2; + let cur_use_idx = info.cur_use_idx as usize; + trace!( + "Checking live-status of v{} in {:?} at inst {:?}; CurPos: {}, SaveOnCurrent: {}, Liveout {}, block_after: {}", + vreg, + block, + inst, + self.cur_inst_pos, + save_on_current_use, + self.liveouts[block.index()].get(vreg), + block_after_pos + ); + trace!( + " -> uses: {:?}. currently at {}", + info.uses, + info.cur_use_idx + ); + + if !self.liveouts[block.index()].get(vreg) { + if cur_use_idx == 0xFF { + trace!(" -> uses exhausted, vreg must be dead"); + return true; + } + + if cur_use_idx == 0 { + trace!( + " -> next use: {}, no use after that", + info.uses[cur_use_idx] + ); + } else { + trace!( + " -> next uses: {}, {}", + info.uses[cur_use_idx], + info.uses[cur_use_idx - 1] + ); + } + + if save_on_current_use && info.uses[cur_use_idx] == self.cur_inst_pos as u32 { + trace!(" -> use is current, must be saved. vreg is not killed"); + return false; + } + + let next_use = if info.uses[cur_use_idx] == self.cur_inst_pos as u32 { + // skip the current use + if cur_use_idx == 0 { + trace!(" -> no use after the current one, vreg is killed"); + // no use after the current one, vreg is killed + return true; + } + + info.uses[cur_use_idx - 1] + } else { + info.uses[cur_use_idx] + }; + trace!(" -> next use is {}", next_use); + + if next_use >= block_after_pos as u32 { + trace!(" -> next use not in this block, vreg is killed"); + return true; + } else { + trace!(" -> next use is in this block, vreg not killed"); + return false; + } + } + + trace!(" -> vreg is liveout"); + return false; + } + + // Misc + + /// Make sure all reftype vregs currently in pregs have a stack slot + fn move_reftype_to_stack(&mut self, inst: Inst) { + for i in 0..self.pregs.len() { + if let Some(vreg) = self.pregs[i].vreg() { + if !self.vregs[vreg.vreg()].is_reftype() { + continue; + } + + if self.vregs[vreg.vreg()].stack_slot().is_some() { + continue; + } + + self.alloc_and_move_to_stack(vreg, ProgPoint::before(inst)); + } + } + } +} + +pub fn run(func: &F, mach_env: &MachineEnv) -> Result { + if func.multi_spillslot_named_by_last_slot() { + panic!("MultiSpillslotIndexPos not supported"); + } + + let mut state = FastAlloc::init(func, mach_env); + state.run()?; + Ok(Output { + num_spillslots: state.cur_stack_slot_idx as usize, + edits: state.edits, + allocs: state.allocs, + inst_alloc_offsets: state.inst_alloc_offsets, + safepoint_slots: state.safepoint_slots, + debug_locations: Vec::new(), + stats: Stats::default(), + }) +} diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 7fdae2b..a6d8970 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -37,6 +37,7 @@ use process::*; use smallvec::smallvec; pub(crate) mod dump; mod fast_alloc; +mod fast_alloc2; pub(crate) mod moves; pub(crate) mod spill; pub(crate) mod stackmap;