diff --git a/src/ion/fast_alloc.rs b/src/ion/fast_alloc.rs index b081443..bd76b1c 100644 --- a/src/ion/fast_alloc.rs +++ b/src/ion/fast_alloc.rs @@ -11,7 +11,7 @@ use crate::{ OperandKind, OperandPos, Output, PReg, PRegSet, ProgPoint, RegAllocError, RegClass, SpillSlot, VReg, }; -use crate::{domtree, postorder, InstPosition}; +use crate::{domtree, postorder, FxHashSet, InstPosition}; use super::data_structures::u64_key; use super::Stats; @@ -915,14 +915,228 @@ fn handle_out_block_params<'a, F: Function>( return Ok(()); } + if in_params.len() > 254 { + panic!("unsupported block argument length"); + } + // TODO: this is a really dumb way to handle cycles/chains // need a better algo - let mut depend_count: SmallVec<[u8; 8]> = SmallVec::new(); + /*let mut tmp_slots: SmallVec<[u32; 4]> = SmallVec::new(); + for i in 0..out_params.len() { + let out_vreg = out_params[i]; + let out_slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); + + if out_vreg == VReg::invalid() { + panic!("") + } + + let tmp_slot = state.create_stack_slot(out_vreg.class()); + let tmp_reg = if out_vreg.class() == RegClass::Int { + tmp_reg_int + } else { + tmp_reg_float + }; + + tmp_slots.push(tmp_slot); + state.edits.push(( + ProgPoint::before(last_inst), + Edit::Move { + from: Allocation::stack(SpillSlot::new(out_slot as usize)), + to: Allocation::reg(tmp_reg), + }, + )); + state.edits.push(( + ProgPoint::before(last_inst), + Edit::Move { + from: Allocation::reg(tmp_reg), + to: Allocation::stack(SpillSlot::new(tmp_slot as usize)), + }, + )); + } + + for i in 0..out_params.len() { + let out_vreg = out_params[i]; + let in_vreg = in_params[i]; + let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap(); + + let tmp_slot = tmp_slots[i]; + let tmp_reg = if out_vreg.class() == RegClass::Int { + tmp_reg_int + } else { + tmp_reg_float + }; + + state.edits.push(( + ProgPoint::before(last_inst), + Edit::Move { + from: Allocation::stack(SpillSlot::new(tmp_slot as usize)), + to: Allocation::reg(tmp_reg), + }, + )); + state.edits.push(( + ProgPoint::before(last_inst), + Edit::Move { + from: Allocation::reg(tmp_reg), + to: Allocation::stack(SpillSlot::new(in_slot as usize)), + }, + )); + }*/ + + let mut depends: SmallVec<[SmallVec<[u8; 2]>; 4]> = SmallVec::new(); + depends.resize(out_params.len(), SmallVec::new()); + let mut depends_rev: SmallVec<[SmallVec<[u8; 2]>; 4]> = SmallVec::new(); + depends_rev.resize(out_params.len(), SmallVec::new()); let mut params_left: SmallVec<[u8; 8]> = SmallVec::new(); - //let mut remap: SmallVec<[Option; 8]> = SmallVec::new(); + let mut remap: SmallVec<[Option; 8]> = SmallVec::new(); + remap.resize(out_params.len(), None); + + for i in 0..in_params.len() { + params_left.push(i as u8); + } + + // out_slot -> in_slot + // if an in_slot is used as an out_slot, the in_slot may only be override once the out_slot is done + for i in 0..out_params.len() { + let out_slot = state.vregs[out_params[i].vreg()].slot_idx.unwrap(); + for j in 0..in_params.len() { + let in_slot = state.vregs[in_params[j].vreg()].slot_idx.unwrap(); + if i == j { + continue; + } + + if out_slot == in_slot { + depends[j].push(i as u8); + depends_rev[i].push(j as u8); + } + } + } + + // 5->3 + // 3->6 + // 6->5 + // depends: [1,2,0] + + // + + while !params_left.is_empty() { + let count = params_left.len(); + + // Check if any non-dependent block argument can be written + let mut i = 0; + while i < params_left.len() { + let param_idx = params_left[i] as usize; + + if !depends[param_idx].is_empty() { + i += 1; + continue; + } + + params_left.swap_remove(i); + /*for depend_idx in depends_rev[param_idx] { + depends[depend_idx].re + }*/ + for entry in &mut depends { + entry.retain(|idx| *idx as usize != param_idx); + } + + let out_vreg = out_params[param_idx]; + let in_vreg = in_params[param_idx]; + let out_slot = match remap[param_idx] { + Some(idx) => idx, + None => state.vregs[out_vreg.vreg()].slot_idx.unwrap(), + }; + let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap(); + + assert_eq!(out_vreg.class(), in_vreg.class()); + let tmp_reg = if out_vreg.class() == RegClass::Int { + tmp_reg_int + } else { + tmp_reg_float + }; + + trace!( + "Move {} from slot {} to slot {} for {}", + out_vreg, + out_slot, + in_slot, + in_vreg + ); + if out_slot == in_slot { + continue; + } + state.edits.push(( + ProgPoint::before(last_inst), + Edit::Move { + from: Allocation::stack(SpillSlot::new(out_slot as usize)), + to: Allocation::reg(tmp_reg), + }, + )); + state.edits.push(( + ProgPoint::before(last_inst), + Edit::Move { + from: Allocation::reg(tmp_reg), + to: Allocation::stack(SpillSlot::new(in_slot as usize)), + }, + )); + } + + if params_left.len() == count { + // only cycles left, break first element + let param_idx = params_left[0] as usize; + for i in ¶ms_left { + assert_eq!(depends[*i as usize].len(), 1); + } + + debug_assert_eq!(depends[param_idx].len(), 1); + let depend_idx = depends[param_idx][0] as usize; + + // need to move the out_slot of the dependency to a temporary slot + let depend_vreg = out_params[depend_idx]; + let depend_out_slot = state.vregs[depend_vreg.vreg()].slot_idx.unwrap(); + + let tmp_slot = state.create_stack_slot(depend_vreg.class()); + let tmp_reg = if depend_vreg.class() == RegClass::Int { + tmp_reg_int + } else { + tmp_reg_float + }; + + trace!( + "Breaking cycle for {} by moving {} from slot {} to slot {}", + param_idx, + depend_idx, + depend_out_slot, + tmp_slot + ); + + state.edits.push(( + ProgPoint::before(last_inst), + Edit::Move { + from: Allocation::stack(SpillSlot::new(depend_out_slot as usize)), + to: Allocation::reg(tmp_reg), + }, + )); + state.edits.push(( + ProgPoint::before(last_inst), + Edit::Move { + from: Allocation::reg(tmp_reg), + to: Allocation::stack(SpillSlot::new(tmp_slot as usize)), + }, + )); + + // TODO: assert!(remap[depend_idx].is_none()) + remap[depend_idx] = Some(tmp_slot); + + depends[param_idx].clear(); + } + } + + /*let mut depend_count: SmallVec<[u8; 8]> = SmallVec::new(); + let mut params_left: SmallVec<[u8; 8]> = SmallVec::new(); + let mut remap: SmallVec<[Option; 8]> = SmallVec::new(); depend_count.resize(out_params.len(), 0); - //remap.resize(out_params.len(), None); + remap.resize(out_params.len(), None); for i in 0..in_params.len() { params_left.push(i as u8); } @@ -958,6 +1172,7 @@ fn handle_out_block_params<'a, F: Function>( for j in 0..params_left.len() { let idx = params_left[j] as usize; let in_slot = state.vregs[in_params[idx].vreg()].slot_idx.unwrap(); + // TODO: this decreses its own depend_count if in_slot == out_slot { depend_count[idx] -= 1; } @@ -974,11 +1189,11 @@ fn handle_out_block_params<'a, F: Function>( } else { tmp_reg_float }; - /*let out_slot = match remap[idx] { + let out_slot = match remap[idx] { Some(idx) => idx, None => state.vregs[out_vreg.vreg()].slot_idx.unwrap(), - };*/ - let out_slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); + }; + //let out_slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap(); trace!( @@ -1033,7 +1248,7 @@ fn handle_out_block_params<'a, F: Function>( }, )); // TODO: mark out_slot as free - let new_slot = state.alloc_stack_slot(out_params[idx]); + let new_slot = state.create_stack_slot(out_params[idx].class()); trace!( "Cycle detected. Breaking by allocating new slot {} for {}", new_slot, @@ -1046,6 +1261,8 @@ fn handle_out_block_params<'a, F: Function>( to: Allocation::stack(SpillSlot::new(new_slot as usize)), }, )); + debug_assert!(remap[idx].is_none()); + remap[idx] = Some(new_slot); for j in 0..params_left.len() { let in_slot = state.vregs[in_params[params_left[j] as usize].vreg()] @@ -1056,7 +1273,7 @@ fn handle_out_block_params<'a, F: Function>( } } } - } + }*/ // TODO: need to break cycles // e.g. @@ -1141,6 +1358,13 @@ fn handle_out_block_params<'a, F: Function>( debug_assert!(state.vregs[out_vreg.vreg()].slot_idx.is_some()); let out_slot_idx = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); + if out_vreg == VReg::invalid() { + panic!("") + } + if in_vreg == VReg::invalid() { + panic!("") + } + state.vregs[in_vreg.vreg()].def_block = Some(succ); // TODO: if out_vreg dies at this edge, we could reuse its stack slot @@ -1159,20 +1383,22 @@ fn handle_out_block_params<'a, F: Function>( break; } + vregs_passed.push(out_vreg); if alloced { continue; } - vregs_passed.push(out_vreg); if !state.liveouts[block.index()].get(out_vreg.vreg()) { + let slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); trace!( - "{} died at the edge, reuse stack slot for {}", + "{} died at the edge, reuse stack slot {} for {}", out_vreg, + slot, in_vreg ); + // we can reuse the stack slot since the variable dies - state.vregs[in_vreg.vreg()].slot_idx = - Some(state.vregs[out_vreg.vreg()].slot_idx.unwrap()); + state.vregs[in_vreg.vreg()].slot_idx = Some(slot); continue; } @@ -1287,21 +1513,21 @@ impl BlockBitmap { } fn set(&mut self, idx: usize) { - let idx = idx / 64; + let storage_idx = idx / 64; let bit = 1u64 << (idx % 64); - self.storage[idx] |= bit; + self.storage[storage_idx] |= bit; } fn un_set(&mut self, idx: usize) { - let idx = idx / 64; + let storage_idx = idx / 64; let bit = 1u64 << (idx % 64); - self.storage[idx] &= !bit; + self.storage[storage_idx] &= !bit; } fn is_set(&mut self, idx: usize) -> bool { - let idx = idx / 64; + let storage_idx = idx / 64; let bit = 1u64 << (idx % 64); - (self.storage[idx] & bit) != 0 + (self.storage[storage_idx] & bit) != 0 } } @@ -1346,11 +1572,7 @@ fn calc_live_bitmaps<'a, F: Function>( } } - // TODO: evaluate if this generates better code than insns.rev().iter() - let last_idx = insns.last().index(); - let len = last_idx - insns.first().index() + 1; - for inst_rev_idx in 0..len { - let inst = Inst::new(last_idx - inst_rev_idx); + for inst in insns.rev().iter() { // TODO: this differs from the algo in liveranges.rs by not iterating through the positions // as in SSA it should make no difference as there can be no vreg that is both a use and def at // a single instruction