diff --git a/src/ion/fast_alloc.rs b/src/ion/fast_alloc.rs index d20aa9c..dea22a8 100644 --- a/src/ion/fast_alloc.rs +++ b/src/ion/fast_alloc.rs @@ -931,6 +931,9 @@ fn handle_out_block_params<'a, F: Function>( in_slot, in_vreg ); + if out_slot == in_slot { + continue; + } state.edits.push(( ProgPoint::before(last_inst), Edit::Move { @@ -975,7 +978,6 @@ fn handle_out_block_params<'a, F: Function>( state.vregs[in_vreg.vreg()].def_block = Some(succ); // TODO: if out_vreg dies at this edge, we could reuse its stack slot - // TODO: we should also be able to reuse the slot if the successor only has one predecessor (us); check with AE let mut no_alias = false; if !vregs_passed.contains(&out_vreg) { let mut alloced = false; @@ -995,6 +997,14 @@ fn handle_out_block_params<'a, F: Function>( continue; } vregs_passed.push(out_vreg); + + if !state.liveouts[block.index()].get(out_vreg.vreg()) { + // we can reuse the stack slot since the variable dies + state.vregs[in_vreg.vreg()].slot_idx = + Some(state.vregs[out_vreg.vreg()].slot_idx.unwrap()); + continue; + } + no_alias = true; } @@ -1126,6 +1136,7 @@ impl BlockBitmap { // currently, copy from liveranges.rs // don't inline for better perf stats +// this is currently very expensive, takes 3% of 10.2% total time for the register allocator #[inline(never)] fn calc_live_bitmaps<'a, F: Function>( state: &mut FastAllocState<'a, F>,