Cranelift: GVN spectre guards and run redundant load elimination twice (#5517)
* Cranelift: Make spectre guards GVN-able While these instructions have a side effect that is otherwise invisible to the optimizer, the side effect in question is idempotent, so it can be de-duplicated by GVN. * Cranelift: Run redundant load replacement and GVN twice This allows us to actually replace redundant Wasm loads with dynamic memories. While this improves our hand-crafted test sequences, it doesn't seem to have any improvement on sightglass benchmarks run with dynamic memories, however it also isn't a hit to compilation times, so seems generally good to land anyways: ``` $ cargo run --release -- benchmark -e ~/scratch/once.so -e ~/scratch/twice.so -m insts-retired --processes 20 --iterations-per-process 3 --engine-flags="--static-memory-maximum-size 0" -- benchmarks/default.suite compilation :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [683595240 683768610.53 684097577] once.so [683597068 700115966.83 1664907164] twice.so instantiation :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [44107 60411.07 92785] once.so [44138 59552.32 92097] twice.so compilation :: instructions-retired :: benchmarks/bz2/benchmark.wasm No difference in performance. [17369916 17404839.78 17471458] once.so [17369935 17625713.87 30700150] twice.so compilation :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm No difference in performance. [126523640 126566170.80 126648265] once.so [126523076 127174580.30 163145149] twice.so instantiation :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm No difference in performance. [34569 35686.25 36513] once.so [34651 35749.97 36953] twice.so instantiation :: instructions-retired :: benchmarks/bz2/benchmark.wasm No difference in performance. [35146 36639.10 37707] once.so [34472 36580.82 38431] twice.so execution :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [7055720115 7055841324.82 7056180024] once.so [7055717681 7055877095.85 7056225217] twice.so execution :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm No difference in performance. [46436881 46437081.28 46437691] once.so [46436883 46437127.68 46437766] twice.so execution :: instructions-retired :: benchmarks/bz2/benchmark.wasm No difference in performance. [653010530 653010533.27 653010539] once.so [653010531 653010532.95 653010538] twice.so ```
This commit is contained in:
@@ -73,6 +73,8 @@ pub(crate) struct InstructionContent {
|
|||||||
pub can_trap: bool,
|
pub can_trap: bool,
|
||||||
/// Does this instruction have other side effects besides can_* flags?
|
/// Does this instruction have other side effects besides can_* flags?
|
||||||
pub other_side_effects: bool,
|
pub other_side_effects: bool,
|
||||||
|
/// Despite having other side effects, is this instruction okay to GVN?
|
||||||
|
pub side_effects_okay_for_gvn: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl InstructionContent {
|
impl InstructionContent {
|
||||||
@@ -133,6 +135,7 @@ pub(crate) struct InstructionBuilder {
|
|||||||
can_store: bool,
|
can_store: bool,
|
||||||
can_trap: bool,
|
can_trap: bool,
|
||||||
other_side_effects: bool,
|
other_side_effects: bool,
|
||||||
|
side_effects_okay_for_gvn: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl InstructionBuilder {
|
impl InstructionBuilder {
|
||||||
@@ -152,6 +155,7 @@ impl InstructionBuilder {
|
|||||||
can_store: false,
|
can_store: false,
|
||||||
can_trap: false,
|
can_trap: false,
|
||||||
other_side_effects: false,
|
other_side_effects: false,
|
||||||
|
side_effects_okay_for_gvn: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -211,6 +215,11 @@ impl InstructionBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn side_effects_okay_for_gvn(mut self, val: bool) -> Self {
|
||||||
|
self.side_effects_okay_for_gvn = val;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
fn build(self) -> Instruction {
|
fn build(self) -> Instruction {
|
||||||
let operands_in = self.operands_in.unwrap_or_else(Vec::new);
|
let operands_in = self.operands_in.unwrap_or_else(Vec::new);
|
||||||
let operands_out = self.operands_out.unwrap_or_else(Vec::new);
|
let operands_out = self.operands_out.unwrap_or_else(Vec::new);
|
||||||
@@ -259,6 +268,7 @@ impl InstructionBuilder {
|
|||||||
can_store: self.can_store,
|
can_store: self.can_store,
|
||||||
can_trap: self.can_trap,
|
can_trap: self.can_trap,
|
||||||
other_side_effects: self.other_side_effects,
|
other_side_effects: self.other_side_effects,
|
||||||
|
side_effects_okay_for_gvn: self.side_effects_okay_for_gvn,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -512,6 +512,13 @@ fn gen_opcodes(all_inst: &AllInstructions, fmt: &mut Formatter) {
|
|||||||
"Does this instruction have other side effects besides can_* flags?",
|
"Does this instruction have other side effects besides can_* flags?",
|
||||||
fmt,
|
fmt,
|
||||||
);
|
);
|
||||||
|
gen_bool_accessor(
|
||||||
|
all_inst,
|
||||||
|
|inst| inst.side_effects_okay_for_gvn,
|
||||||
|
"side_effects_okay_for_gvn",
|
||||||
|
"Despite `self.other_side_effects() == true`, is this instruction okay to GVN?",
|
||||||
|
fmt,
|
||||||
|
)
|
||||||
});
|
});
|
||||||
fmt.line("}");
|
fmt.line("}");
|
||||||
fmt.empty_line();
|
fmt.empty_line();
|
||||||
|
|||||||
@@ -1389,7 +1389,10 @@ pub(crate) fn define(
|
|||||||
)
|
)
|
||||||
.operands_in(vec![c, x, y])
|
.operands_in(vec![c, x, y])
|
||||||
.operands_out(vec![a])
|
.operands_out(vec![a])
|
||||||
.other_side_effects(true),
|
.other_side_effects(true)
|
||||||
|
// We can de-duplicate spectre selects since the side effect is
|
||||||
|
// idempotent.
|
||||||
|
.side_effects_okay_for_gvn(true),
|
||||||
);
|
);
|
||||||
|
|
||||||
let c = &Operand::new("c", Any).with_doc("Controlling value to test");
|
let c = &Operand::new("c", Any).with_doc("Controlling value to test");
|
||||||
|
|||||||
@@ -194,8 +194,10 @@ impl Context {
|
|||||||
if isa.flags().use_egraphs() {
|
if isa.flags().use_egraphs() {
|
||||||
self.egraph_pass()?;
|
self.egraph_pass()?;
|
||||||
} else if opt_level != OptLevel::None && isa.flags().enable_alias_analysis() {
|
} else if opt_level != OptLevel::None && isa.flags().enable_alias_analysis() {
|
||||||
self.replace_redundant_loads()?;
|
for _ in 0..2 {
|
||||||
self.simple_gvn(isa)?;
|
self.replace_redundant_loads()?;
|
||||||
|
self.simple_gvn(isa)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -16,8 +16,8 @@ fn trivially_unsafe_for_gvn(opcode: Opcode) -> bool {
|
|||||||
|| opcode.is_terminator()
|
|| opcode.is_terminator()
|
||||||
|| opcode.is_return()
|
|| opcode.is_return()
|
||||||
|| opcode.can_trap()
|
|| opcode.can_trap()
|
||||||
|| opcode.other_side_effects()
|
|
||||||
|| opcode.can_store()
|
|| opcode.can_store()
|
||||||
|
|| (opcode.other_side_effects() && !opcode.side_effects_okay_for_gvn())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Test that, if the specified instruction is a load, it doesn't have the `readonly` memflag.
|
/// Test that, if the specified instruction is a load, it doesn't have the `readonly` memflag.
|
||||||
|
|||||||
@@ -65,15 +65,15 @@
|
|||||||
;; @0057 v10 = icmp ugt v4, v6
|
;; @0057 v10 = icmp ugt v4, v6
|
||||||
;; v19 -> v10
|
;; v19 -> v10
|
||||||
;; @0057 v11 = select_spectre_guard v10, v9, v8 ; v9 = 0
|
;; @0057 v11 = select_spectre_guard v10, v9, v8 ; v9 = 0
|
||||||
|
;; v20 -> v11
|
||||||
;; @0057 v12 = load.i32 little heap v11
|
;; @0057 v12 = load.i32 little heap v11
|
||||||
;; v2 -> v12
|
;; v2 -> v12
|
||||||
;; @005c v20 = select_spectre_guard v10, v9, v8 ; v9 = 0
|
;; v21 -> v12
|
||||||
;; @005c v21 = load.i32 little heap v20
|
|
||||||
;; v3 -> v21
|
;; v3 -> v21
|
||||||
;; @005f jump block1
|
;; @005f jump block1
|
||||||
;;
|
;;
|
||||||
;; block1:
|
;; block1:
|
||||||
;; @005f return v12, v21
|
;; @005f return v12, v12
|
||||||
;; }
|
;; }
|
||||||
;;
|
;;
|
||||||
;; function u0:1(i32, i64 vmctx) -> i32, i32 fast {
|
;; function u0:1(i32, i64 vmctx) -> i32, i32 fast {
|
||||||
@@ -103,13 +103,13 @@
|
|||||||
;; @0064 v11 = icmp ugt v4, v6
|
;; @0064 v11 = icmp ugt v4, v6
|
||||||
;; v21 -> v11
|
;; v21 -> v11
|
||||||
;; @0064 v12 = select_spectre_guard v11, v10, v9 ; v10 = 0
|
;; @0064 v12 = select_spectre_guard v11, v10, v9 ; v10 = 0
|
||||||
|
;; v22 -> v12
|
||||||
;; @0064 v13 = load.i32 little heap v12
|
;; @0064 v13 = load.i32 little heap v12
|
||||||
;; v2 -> v13
|
;; v2 -> v13
|
||||||
;; @006a v22 = select_spectre_guard v11, v10, v9 ; v10 = 0
|
;; v23 -> v13
|
||||||
;; @006a v23 = load.i32 little heap v22
|
|
||||||
;; v3 -> v23
|
;; v3 -> v23
|
||||||
;; @006e jump block1
|
;; @006e jump block1
|
||||||
;;
|
;;
|
||||||
;; block1:
|
;; block1:
|
||||||
;; @006e return v13, v23
|
;; @006e return v13, v13
|
||||||
;; }
|
;; }
|
||||||
Reference in New Issue
Block a user