* Cranelift: Add the `DataFlowGraph::display_value_inst` convenience method * Cranelift: Add some `trace!` logs to some parts of legalization * Cranelift: de-duplicate bounds checks in legalizations When both (1) "dynamic" memories that need explicit bounds checks and (2) spectre mitigations that perform bounds checks are enabled, reuse the same bounds checks between the two legalizations. This reduces the overhead of explicit bounds checks and spectre mitigations over using virtual memory guard pages with spectre mitigations from ~1.9-2.1x overhead to ~1.6-1.8x overhead. That is about a 14-19% speed up for when dynamic memories and spectre mitigations are enabled. <details> ``` execution :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm Δ = 3422455129.47 ± 120159.49 (confidence = 99%) virtual-memory-guards.so is 2.09x to 2.09x faster than bounds-checks.so! [6563931659 6564063496.07 6564301535] bounds-checks.so [3141492675 3141608366.60 3141895249] virtual-memory-guards.so execution :: instructions-retired :: benchmarks/bz2/benchmark.wasm Δ = 338716136.87 ± 1.38 (confidence = 99%) virtual-memory-guards.so is 2.08x to 2.08x faster than bounds-checks.so! [651961494 651961495.47 651961497] bounds-checks.so [313245357 313245358.60 313245362] virtual-memory-guards.so execution :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 22742944.07 ± 331.73 (confidence = 99%) virtual-memory-guards.so is 1.87x to 1.87x faster than bounds-checks.so! [48841295 48841567.33 48842139] bounds-checks.so [26098439 26098623.27 26099479] virtual-memory-guards.so ``` </details> <details> ``` execution :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm Δ = 2465900207.27 ± 146476.61 (confidence = 99%) virtual-memory-guards.so is 1.78x to 1.78x faster than de-duped-bounds-checks.so! [5607275431 5607442989.13 5607838342] de-duped-bounds-checks.so [3141445345 3141542781.87 3141711213] virtual-memory-guards.so execution :: instructions-retired :: benchmarks/bz2/benchmark.wasm Δ = 234253620.20 ± 2.33 (confidence = 99%) virtual-memory-guards.so is 1.75x to 1.75x faster than de-duped-bounds-checks.so! [547498977 547498980.93 547498985] de-duped-bounds-checks.so [313245357 313245360.73 313245363] virtual-memory-guards.so execution :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 16605659.13 ± 315.78 (confidence = 99%) virtual-memory-guards.so is 1.64x to 1.64x faster than de-duped-bounds-checks.so! [42703971 42704284.40 42704787] de-duped-bounds-checks.so [26098432 26098625.27 26099234] virtual-memory-guards.so ``` </details> <details> ``` execution :: instructions-retired :: benchmarks/bz2/benchmark.wasm Δ = 104462517.13 ± 7.32 (confidence = 99%) de-duped-bounds-checks.so is 1.19x to 1.19x faster than bounds-checks.so! [651961493 651961500.80 651961532] bounds-checks.so [547498981 547498983.67 547498989] de-duped-bounds-checks.so execution :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm Δ = 956556982.80 ± 103034.59 (confidence = 99%) de-duped-bounds-checks.so is 1.17x to 1.17x faster than bounds-checks.so! [6563930590 6564019842.40 6564243651] bounds-checks.so [5607307146 5607462859.60 5607677763] de-duped-bounds-checks.so execution :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 6137307.87 ± 247.75 (confidence = 99%) de-duped-bounds-checks.so is 1.14x to 1.14x faster than bounds-checks.so! [48841303 48841472.93 48842000] bounds-checks.so [42703965 42704165.07 42704718] de-duped-bounds-checks.so ``` </details> * Update test expectations * Add a test for deduplicating bounds checks between dynamic memories and spectre mitigations * Define a struct for the Spectre comparison instead of using a tuple * More trace logging for heap legalization
148 lines
4.9 KiB
Rust
148 lines
4.9 KiB
Rust
//! Legalization of global values.
|
|
//!
|
|
//! This module exports the `expand_global_value` function which transforms a `global_value`
|
|
//! instruction into code that depends on the kind of global value referenced.
|
|
|
|
use crate::cursor::{Cursor, FuncCursor};
|
|
use crate::ir::{self, InstBuilder};
|
|
use crate::isa::TargetIsa;
|
|
|
|
/// Expand a `global_value` instruction according to the definition of the global value.
|
|
pub fn expand_global_value(
|
|
inst: ir::Inst,
|
|
func: &mut ir::Function,
|
|
isa: &dyn TargetIsa,
|
|
global_value: ir::GlobalValue,
|
|
) {
|
|
crate::trace!(
|
|
"expanding global value: {:?}: {}",
|
|
inst,
|
|
func.dfg.display_inst(inst)
|
|
);
|
|
|
|
match func.global_values[global_value] {
|
|
ir::GlobalValueData::VMContext => vmctx_addr(inst, func),
|
|
ir::GlobalValueData::IAddImm {
|
|
base,
|
|
offset,
|
|
global_type,
|
|
} => iadd_imm_addr(inst, func, base, offset.into(), global_type),
|
|
ir::GlobalValueData::Load {
|
|
base,
|
|
offset,
|
|
global_type,
|
|
readonly,
|
|
} => load_addr(inst, func, base, offset, global_type, readonly, isa),
|
|
ir::GlobalValueData::Symbol { tls, .. } => symbol(inst, func, global_value, isa, tls),
|
|
ir::GlobalValueData::DynScaleTargetConst { vector_type } => {
|
|
const_vector_scale(inst, func, vector_type, isa)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn const_vector_scale(inst: ir::Inst, func: &mut ir::Function, ty: ir::Type, isa: &dyn TargetIsa) {
|
|
assert!(ty.bytes() <= 16);
|
|
|
|
// Use a minimum of 128-bits for the base type.
|
|
let base_bytes = std::cmp::max(ty.bytes(), 16);
|
|
let scale = (isa.dynamic_vector_bytes(ty) / base_bytes) as i64;
|
|
assert!(scale > 0);
|
|
let pos = FuncCursor::new(func).at_inst(inst);
|
|
pos.func.dfg.replace(inst).iconst(isa.pointer_type(), scale);
|
|
}
|
|
|
|
/// Expand a `global_value` instruction for a vmctx global.
|
|
fn vmctx_addr(inst: ir::Inst, func: &mut ir::Function) {
|
|
// Get the value representing the `vmctx` argument.
|
|
let vmctx = func
|
|
.special_param(ir::ArgumentPurpose::VMContext)
|
|
.expect("Missing vmctx parameter");
|
|
|
|
// Replace the `global_value` instruction's value with an alias to the vmctx arg.
|
|
let result = func.dfg.first_result(inst);
|
|
func.dfg.clear_results(inst);
|
|
func.dfg.change_to_alias(result, vmctx);
|
|
func.layout.remove_inst(inst);
|
|
}
|
|
|
|
/// Expand a `global_value` instruction for an iadd_imm global.
|
|
fn iadd_imm_addr(
|
|
inst: ir::Inst,
|
|
func: &mut ir::Function,
|
|
base: ir::GlobalValue,
|
|
offset: i64,
|
|
global_type: ir::Type,
|
|
) {
|
|
let mut pos = FuncCursor::new(func).at_inst(inst);
|
|
|
|
// Get the value for the lhs. For tidiness, expand VMContext here so that we avoid
|
|
// `vmctx_addr` which creates an otherwise unneeded value alias.
|
|
let lhs = if let ir::GlobalValueData::VMContext = pos.func.global_values[base] {
|
|
pos.func
|
|
.special_param(ir::ArgumentPurpose::VMContext)
|
|
.expect("Missing vmctx parameter")
|
|
} else {
|
|
pos.ins().global_value(global_type, base)
|
|
};
|
|
|
|
// Simply replace the `global_value` instruction with an `iadd_imm`, reusing the result value.
|
|
pos.func.dfg.replace(inst).iadd_imm(lhs, offset);
|
|
}
|
|
|
|
/// Expand a `global_value` instruction for a load global.
|
|
fn load_addr(
|
|
inst: ir::Inst,
|
|
func: &mut ir::Function,
|
|
base: ir::GlobalValue,
|
|
offset: ir::immediates::Offset32,
|
|
global_type: ir::Type,
|
|
readonly: bool,
|
|
isa: &dyn TargetIsa,
|
|
) {
|
|
// We need to load a pointer from the `base` global value, so insert a new `global_value`
|
|
// instruction. This depends on the iterative legalization loop. Note that the IR verifier
|
|
// detects any cycles in the `load` globals.
|
|
let ptr_ty = isa.pointer_type();
|
|
let mut pos = FuncCursor::new(func).at_inst(inst);
|
|
pos.use_srcloc(inst);
|
|
|
|
// Get the value for the base. For tidiness, expand VMContext here so that we avoid
|
|
// `vmctx_addr` which creates an otherwise unneeded value alias.
|
|
let base_addr = if let ir::GlobalValueData::VMContext = pos.func.global_values[base] {
|
|
pos.func
|
|
.special_param(ir::ArgumentPurpose::VMContext)
|
|
.expect("Missing vmctx parameter")
|
|
} else {
|
|
pos.ins().global_value(ptr_ty, base)
|
|
};
|
|
|
|
// Global-value loads are always notrap and aligned. They may be readonly.
|
|
let mut mflags = ir::MemFlags::trusted();
|
|
if readonly {
|
|
mflags.set_readonly();
|
|
}
|
|
|
|
// Perform the load.
|
|
pos.func
|
|
.dfg
|
|
.replace(inst)
|
|
.load(global_type, mflags, base_addr, offset);
|
|
}
|
|
|
|
/// Expand a `global_value` instruction for a symbolic name global.
|
|
fn symbol(
|
|
inst: ir::Inst,
|
|
func: &mut ir::Function,
|
|
gv: ir::GlobalValue,
|
|
isa: &dyn TargetIsa,
|
|
tls: bool,
|
|
) {
|
|
let ptr_ty = isa.pointer_type();
|
|
|
|
if tls {
|
|
func.dfg.replace(inst).tls_value(ptr_ty, gv);
|
|
} else {
|
|
func.dfg.replace(inst).symbol_value(ptr_ty, gv);
|
|
}
|
|
}
|