Cranelift: GVN spectre guards and run redundant load elimination twice (#5517)
* Cranelift: Make spectre guards GVN-able While these instructions have a side effect that is otherwise invisible to the optimizer, the side effect in question is idempotent, so it can be de-duplicated by GVN. * Cranelift: Run redundant load replacement and GVN twice This allows us to actually replace redundant Wasm loads with dynamic memories. While this improves our hand-crafted test sequences, it doesn't seem to have any improvement on sightglass benchmarks run with dynamic memories, however it also isn't a hit to compilation times, so seems generally good to land anyways: ``` $ cargo run --release -- benchmark -e ~/scratch/once.so -e ~/scratch/twice.so -m insts-retired --processes 20 --iterations-per-process 3 --engine-flags="--static-memory-maximum-size 0" -- benchmarks/default.suite compilation :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [683595240 683768610.53 684097577] once.so [683597068 700115966.83 1664907164] twice.so instantiation :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [44107 60411.07 92785] once.so [44138 59552.32 92097] twice.so compilation :: instructions-retired :: benchmarks/bz2/benchmark.wasm No difference in performance. [17369916 17404839.78 17471458] once.so [17369935 17625713.87 30700150] twice.so compilation :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm No difference in performance. [126523640 126566170.80 126648265] once.so [126523076 127174580.30 163145149] twice.so instantiation :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm No difference in performance. [34569 35686.25 36513] once.so [34651 35749.97 36953] twice.so instantiation :: instructions-retired :: benchmarks/bz2/benchmark.wasm No difference in performance. [35146 36639.10 37707] once.so [34472 36580.82 38431] twice.so execution :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [7055720115 7055841324.82 7056180024] once.so [7055717681 7055877095.85 7056225217] twice.so execution :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm No difference in performance. [46436881 46437081.28 46437691] once.so [46436883 46437127.68 46437766] twice.so execution :: instructions-retired :: benchmarks/bz2/benchmark.wasm No difference in performance. [653010530 653010533.27 653010539] once.so [653010531 653010532.95 653010538] twice.so ```
This commit is contained in:
@@ -73,6 +73,8 @@ pub(crate) struct InstructionContent {
|
||||
pub can_trap: bool,
|
||||
/// Does this instruction have other side effects besides can_* flags?
|
||||
pub other_side_effects: bool,
|
||||
/// Despite having other side effects, is this instruction okay to GVN?
|
||||
pub side_effects_okay_for_gvn: bool,
|
||||
}
|
||||
|
||||
impl InstructionContent {
|
||||
@@ -133,6 +135,7 @@ pub(crate) struct InstructionBuilder {
|
||||
can_store: bool,
|
||||
can_trap: bool,
|
||||
other_side_effects: bool,
|
||||
side_effects_okay_for_gvn: bool,
|
||||
}
|
||||
|
||||
impl InstructionBuilder {
|
||||
@@ -152,6 +155,7 @@ impl InstructionBuilder {
|
||||
can_store: false,
|
||||
can_trap: false,
|
||||
other_side_effects: false,
|
||||
side_effects_okay_for_gvn: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -211,6 +215,11 @@ impl InstructionBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn side_effects_okay_for_gvn(mut self, val: bool) -> Self {
|
||||
self.side_effects_okay_for_gvn = val;
|
||||
self
|
||||
}
|
||||
|
||||
fn build(self) -> Instruction {
|
||||
let operands_in = self.operands_in.unwrap_or_else(Vec::new);
|
||||
let operands_out = self.operands_out.unwrap_or_else(Vec::new);
|
||||
@@ -259,6 +268,7 @@ impl InstructionBuilder {
|
||||
can_store: self.can_store,
|
||||
can_trap: self.can_trap,
|
||||
other_side_effects: self.other_side_effects,
|
||||
side_effects_okay_for_gvn: self.side_effects_okay_for_gvn,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -512,6 +512,13 @@ fn gen_opcodes(all_inst: &AllInstructions, fmt: &mut Formatter) {
|
||||
"Does this instruction have other side effects besides can_* flags?",
|
||||
fmt,
|
||||
);
|
||||
gen_bool_accessor(
|
||||
all_inst,
|
||||
|inst| inst.side_effects_okay_for_gvn,
|
||||
"side_effects_okay_for_gvn",
|
||||
"Despite `self.other_side_effects() == true`, is this instruction okay to GVN?",
|
||||
fmt,
|
||||
)
|
||||
});
|
||||
fmt.line("}");
|
||||
fmt.empty_line();
|
||||
|
||||
@@ -1389,7 +1389,10 @@ pub(crate) fn define(
|
||||
)
|
||||
.operands_in(vec![c, x, y])
|
||||
.operands_out(vec![a])
|
||||
.other_side_effects(true),
|
||||
.other_side_effects(true)
|
||||
// We can de-duplicate spectre selects since the side effect is
|
||||
// idempotent.
|
||||
.side_effects_okay_for_gvn(true),
|
||||
);
|
||||
|
||||
let c = &Operand::new("c", Any).with_doc("Controlling value to test");
|
||||
|
||||
@@ -194,8 +194,10 @@ impl Context {
|
||||
if isa.flags().use_egraphs() {
|
||||
self.egraph_pass()?;
|
||||
} else if opt_level != OptLevel::None && isa.flags().enable_alias_analysis() {
|
||||
self.replace_redundant_loads()?;
|
||||
self.simple_gvn(isa)?;
|
||||
for _ in 0..2 {
|
||||
self.replace_redundant_loads()?;
|
||||
self.simple_gvn(isa)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -16,8 +16,8 @@ fn trivially_unsafe_for_gvn(opcode: Opcode) -> bool {
|
||||
|| opcode.is_terminator()
|
||||
|| opcode.is_return()
|
||||
|| opcode.can_trap()
|
||||
|| opcode.other_side_effects()
|
||||
|| opcode.can_store()
|
||||
|| (opcode.other_side_effects() && !opcode.side_effects_okay_for_gvn())
|
||||
}
|
||||
|
||||
/// Test that, if the specified instruction is a load, it doesn't have the `readonly` memflag.
|
||||
|
||||
@@ -65,15 +65,15 @@
|
||||
;; @0057 v10 = icmp ugt v4, v6
|
||||
;; v19 -> v10
|
||||
;; @0057 v11 = select_spectre_guard v10, v9, v8 ; v9 = 0
|
||||
;; v20 -> v11
|
||||
;; @0057 v12 = load.i32 little heap v11
|
||||
;; v2 -> v12
|
||||
;; @005c v20 = select_spectre_guard v10, v9, v8 ; v9 = 0
|
||||
;; @005c v21 = load.i32 little heap v20
|
||||
;; v21 -> v12
|
||||
;; v3 -> v21
|
||||
;; @005f jump block1
|
||||
;;
|
||||
;; block1:
|
||||
;; @005f return v12, v21
|
||||
;; @005f return v12, v12
|
||||
;; }
|
||||
;;
|
||||
;; function u0:1(i32, i64 vmctx) -> i32, i32 fast {
|
||||
@@ -103,13 +103,13 @@
|
||||
;; @0064 v11 = icmp ugt v4, v6
|
||||
;; v21 -> v11
|
||||
;; @0064 v12 = select_spectre_guard v11, v10, v9 ; v10 = 0
|
||||
;; v22 -> v12
|
||||
;; @0064 v13 = load.i32 little heap v12
|
||||
;; v2 -> v13
|
||||
;; @006a v22 = select_spectre_guard v11, v10, v9 ; v10 = 0
|
||||
;; @006a v23 = load.i32 little heap v22
|
||||
;; v23 -> v13
|
||||
;; v3 -> v23
|
||||
;; @006e jump block1
|
||||
;;
|
||||
;; block1:
|
||||
;; @006e return v13, v23
|
||||
;; }
|
||||
;; @006e return v13, v13
|
||||
;; }
|
||||
Reference in New Issue
Block a user