* Switch duplicate loads w/ dynamic memories test to `min_size = 0` This test was accidentally hitting a special case for bounds checks for when we know that `offset + access_size < min_size` and can skip some steps. This commit changes the `min_size` of the memory to zero so that we are forced to do fully general bounds checks. * Cranelift: Mark `uadd_overflow_trap` as okay for GVN Although this improves our test sequence for duplicate loads with dynamic memories, it unfortunately doesn't have any effect on sightglass benchmarks: ``` instantiation :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm No difference in performance. [34448 35607.23 37158] gvn_uadd_overflow_trap.so [34566 35734.05 36585] main.so instantiation :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [44101 60449.62 92712] gvn_uadd_overflow_trap.so [44011 60436.37 92690] main.so instantiation :: instructions-retired :: benchmarks/bz2/benchmark.wasm No difference in performance. [35595 36675.72 38153] gvn_uadd_overflow_trap.so [35440 36670.42 37993] main.so compilation :: instructions-retired :: benchmarks/bz2/benchmark.wasm No difference in performance. [17370195 17405125.62 17471222] gvn_uadd_overflow_trap.so [17369324 17404859.43 17470725] main.so execution :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [7055720520 7055886880.32 7056265930] gvn_uadd_overflow_trap.so [7055719554 7055843809.33 7056193289] main.so compilation :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [683589861 683767276.00 684098366] gvn_uadd_overflow_trap.so [683590024 683767998.02 684097885] main.so execution :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm No difference in performance. [46436883 46437135.10 46437823] gvn_uadd_overflow_trap.so [46436883 46437087.67 46437785] main.so compilation :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm No difference in performance. [126522461 126565812.58 126647044] gvn_uadd_overflow_trap.so [126522176 126565757.75 126647522] main.so execution :: instructions-retired :: benchmarks/bz2/benchmark.wasm No difference in performance. [653010531 653010533.03 653010544] gvn_uadd_overflow_trap.so [653010531 653010533.18 653010537] main.so ``` * cranelift-codegen-meta: Rename `side_effects_okay_for_gvn` to `side_effects_idempotent` * cranelift-filetests: Ensure there is a trailing newline for blessed Wasm tests
150 lines
5.2 KiB
Rust
150 lines
5.2 KiB
Rust
//! A simple GVN pass.
|
|
|
|
use crate::cursor::{Cursor, FuncCursor};
|
|
use crate::dominator_tree::DominatorTree;
|
|
use crate::ir::{Function, Inst, InstructionData, Opcode, Type};
|
|
use crate::scoped_hash_map::ScopedHashMap;
|
|
use crate::timing;
|
|
use alloc::vec::Vec;
|
|
use core::cell::{Ref, RefCell};
|
|
use core::hash::{Hash, Hasher};
|
|
|
|
/// Test whether the given opcode is unsafe to even consider for GVN.
|
|
fn trivially_unsafe_for_gvn(opcode: Opcode) -> bool {
|
|
opcode.is_call()
|
|
|| opcode.is_branch()
|
|
|| opcode.is_terminator()
|
|
|| opcode.is_return()
|
|
|| opcode.can_store()
|
|
|| (opcode.can_trap() && !opcode.side_effects_idempotent())
|
|
|| (opcode.other_side_effects() && !opcode.side_effects_idempotent())
|
|
}
|
|
|
|
/// Test that, if the specified instruction is a load, it doesn't have the `readonly` memflag.
|
|
fn is_load_and_not_readonly(inst_data: &InstructionData) -> bool {
|
|
match *inst_data {
|
|
InstructionData::Load { flags, .. } => !flags.readonly(),
|
|
_ => inst_data.opcode().can_load(),
|
|
}
|
|
}
|
|
|
|
/// Wrapper around `InstructionData` which implements `Eq` and `Hash`
|
|
#[derive(Clone)]
|
|
struct HashKey<'a, 'f: 'a> {
|
|
inst: InstructionData,
|
|
ty: Type,
|
|
pos: &'a RefCell<FuncCursor<'f>>,
|
|
}
|
|
impl<'a, 'f: 'a> Hash for HashKey<'a, 'f> {
|
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
|
let pool = &self.pos.borrow().func.dfg.value_lists;
|
|
self.inst.hash(state, pool, |value| value);
|
|
self.ty.hash(state);
|
|
}
|
|
}
|
|
impl<'a, 'f: 'a> PartialEq for HashKey<'a, 'f> {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
let pool = &self.pos.borrow().func.dfg.value_lists;
|
|
self.inst.eq(&other.inst, pool, |value| value) && self.ty == other.ty
|
|
}
|
|
}
|
|
impl<'a, 'f: 'a> Eq for HashKey<'a, 'f> {}
|
|
|
|
/// Perform simple GVN on `func`.
|
|
///
|
|
pub fn do_simple_gvn(func: &mut Function, domtree: &mut DominatorTree) {
|
|
let _tt = timing::gvn();
|
|
debug_assert!(domtree.is_valid());
|
|
|
|
// Visit blocks in a reverse post-order.
|
|
//
|
|
// The RefCell here is a bit ugly since the HashKeys in the ScopedHashMap
|
|
// need a reference to the function.
|
|
let pos = RefCell::new(FuncCursor::new(func));
|
|
|
|
let mut visible_values: ScopedHashMap<HashKey, Inst> = ScopedHashMap::new();
|
|
let mut scope_stack: Vec<Inst> = Vec::new();
|
|
|
|
for &block in domtree.cfg_postorder().iter().rev() {
|
|
{
|
|
// Pop any scopes that we just exited.
|
|
let layout = &pos.borrow().func.layout;
|
|
loop {
|
|
if let Some(current) = scope_stack.last() {
|
|
if domtree.dominates(*current, block, layout) {
|
|
break;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
scope_stack.pop();
|
|
visible_values.decrement_depth();
|
|
}
|
|
|
|
// Push a scope for the current block.
|
|
scope_stack.push(layout.first_inst(block).unwrap());
|
|
visible_values.increment_depth();
|
|
}
|
|
|
|
pos.borrow_mut().goto_top(block);
|
|
while let Some(inst) = {
|
|
let mut pos = pos.borrow_mut();
|
|
pos.next_inst()
|
|
} {
|
|
// Resolve aliases, particularly aliases we created earlier.
|
|
pos.borrow_mut().func.dfg.resolve_aliases_in_arguments(inst);
|
|
|
|
let func = Ref::map(pos.borrow(), |pos| &pos.func);
|
|
|
|
let opcode = func.dfg.insts[inst].opcode();
|
|
|
|
if opcode.is_branch() && !opcode.is_terminator() {
|
|
scope_stack.push(func.layout.next_inst(inst).unwrap());
|
|
visible_values.increment_depth();
|
|
}
|
|
|
|
if trivially_unsafe_for_gvn(opcode) {
|
|
continue;
|
|
}
|
|
|
|
// These are split up to separate concerns.
|
|
if is_load_and_not_readonly(&func.dfg.insts[inst]) {
|
|
continue;
|
|
}
|
|
|
|
let ctrl_typevar = func.dfg.ctrl_typevar(inst);
|
|
let key = HashKey {
|
|
inst: func.dfg.insts[inst],
|
|
ty: ctrl_typevar,
|
|
pos: &pos,
|
|
};
|
|
use crate::scoped_hash_map::Entry::*;
|
|
match visible_values.entry(key) {
|
|
Occupied(entry) => {
|
|
#[allow(clippy::debug_assert_with_mut_call)]
|
|
{
|
|
// Clippy incorrectly believes `&func.layout` should not be used here:
|
|
// https://github.com/rust-lang/rust-clippy/issues/4737
|
|
debug_assert!(domtree.dominates(*entry.get(), inst, &func.layout));
|
|
}
|
|
|
|
// If the redundant instruction is representing the current
|
|
// scope, pick a new representative.
|
|
let old = scope_stack.last_mut().unwrap();
|
|
if *old == inst {
|
|
*old = func.layout.next_inst(inst).unwrap();
|
|
}
|
|
// Replace the redundant instruction and remove it.
|
|
drop(func);
|
|
let mut pos = pos.borrow_mut();
|
|
pos.func.dfg.replace_with_aliases(inst, *entry.get());
|
|
pos.remove_inst_and_step_back();
|
|
}
|
|
Vacant(entry) => {
|
|
entry.insert(inst);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|