* egraph support: rewrite to work in terms of CLIF data structures. This work rewrites the "egraph"-based optimization framework in Cranelift to operate on aegraphs (acyclic egraphs) represented in the CLIF itself rather than as a separate data structure to which and from which we translate the CLIF. The basic idea is to add a new kind of value, a "union", that is like an alias but refers to two other values rather than one. This allows us to represent an eclass of enodes (values) as a tree. The union node allows for a value to have *multiple representations*: either constituent value could be used, and (in well-formed CLIF produced by correct optimization rules) they must be equivalent. Like the old egraph infrastructure, we take advantage of acyclicity and eager rule application to do optimization in a single pass. Like before, we integrate GVN (during the optimization pass) and LICM (during elaboration). Unlike the old egraph infrastructure, everything stays in the DataFlowGraph. "Pure" enodes are represented as instructions that have values attached, but that are not placed into the function layout. When entering "egraph" form, we remove them from the layout while optimizing. When leaving "egraph" form, during elaboration, we can place an instruction back into the layout the first time we elaborate the enode; if we elaborate it more than once, we clone the instruction. The implementation performs two passes overall: - One, a forward pass in RPO (to see defs before uses), that (i) removes "pure" instructions from the layout and (ii) optimizes as it goes. As before, we eagerly optimize, so we form the entire union of optimized forms of a value before we see any uses of that value. This lets us rewrite uses to use the most "up-to-date" form of the value and canonicalize and optimize that form. The eager rewriting and acyclic representation make each other work (we could not eagerly rewrite if there were cycles; and acyclicity does not miss optimization opportunities only because the first time we introduce a value, we immediately produce its "best" form). This design choice is also what allows us to avoid the "parent pointers" and fixpoint loop of traditional egraphs. This forward optimization pass keeps a scoped hashmap to "intern" nodes (thus performing GVN), and also interleaves on a per-instruction level with alias analysis. The interleaving with alias analysis allows alias analysis to see the most optimized form of each address (so it can see equivalences), and allows the next value to see any equivalences (reuses of loads or stored values) that alias analysis uncovers. - Two, a forward pass in domtree preorder, that "elaborates" pure enodes back into the layout, possibly in multiple places if needed. This tracks the loop nest and hoists nodes as needed, performing LICM as it goes. Note that by doing this in forward order, we avoid the "fixpoint" that traditional LICM needs: we hoist a def before its uses, so when we place a node, we place it in the right place the first time rather than moving later. This PR replaces the old (a)egraph implementation. It removes both the cranelift-egraph crate and the logic in cranelift-codegen that uses it. On `spidermonkey.wasm` running a simple recursive Fibonacci microbenchmark, this work shows 5.5% compile-time reduction and 7.7% runtime improvement (speedup). Most of this implementation was done in (very productive) pair programming sessions with Jamey Sharp, thus: Co-authored-by: Jamey Sharp <jsharp@fastly.com> * Review feedback. * Review feedback. * Review feedback. * Bugfix: cprop rule: `(x + k1) - k2` becomes `x - (k2 - k1)`, not `x - (k1 - k2)`. Co-authored-by: Jamey Sharp <jsharp@fastly.com>
116 lines
3.4 KiB
TOML
116 lines
3.4 KiB
TOML
[package]
|
|
authors = ["The Cranelift Project Developers"]
|
|
name = "cranelift-codegen"
|
|
version = "0.92.0"
|
|
description = "Low-level code generator library"
|
|
license = "Apache-2.0 WITH LLVM-exception"
|
|
documentation = "https://docs.rs/cranelift-codegen"
|
|
repository = "https://github.com/bytecodealliance/wasmtime"
|
|
categories = ["no-std"]
|
|
readme = "README.md"
|
|
keywords = ["compile", "compiler", "jit"]
|
|
build = "build.rs"
|
|
edition.workspace = true
|
|
|
|
[dependencies]
|
|
arrayvec = "0.7"
|
|
bumpalo = "3"
|
|
cranelift-codegen-shared = { path = "./shared", version = "0.92.0" }
|
|
cranelift-entity = { workspace = true }
|
|
cranelift-bforest = { workspace = true }
|
|
hashbrown = { workspace = true }
|
|
target-lexicon = { workspace = true }
|
|
log = { workspace = true }
|
|
serde = { version = "1.0.94", features = ["derive"], optional = true }
|
|
bincode = { version = "1.2.1", optional = true }
|
|
gimli = { workspace = true, features = ["write"], optional = true }
|
|
smallvec = { workspace = true }
|
|
regalloc2 = { version = "0.5.0", features = ["checker"] }
|
|
souper-ir = { version = "2.1.0", optional = true }
|
|
sha2 = { version = "0.10.2", optional = true }
|
|
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
|
|
# Please don't add any unless they are essential to the task of creating binary
|
|
# machine code. Integration tests that need external dependencies can be
|
|
# accomodated in `tests`.
|
|
|
|
[dev-dependencies]
|
|
criterion = "0.3"
|
|
|
|
[build-dependencies]
|
|
cranelift-codegen-meta = { path = "meta", version = "0.92.0" }
|
|
cranelift-isle = { path = "../isle/isle", version = "=0.92.0" }
|
|
|
|
[features]
|
|
default = ["std", "unwind", "trace-log"]
|
|
|
|
# The "std" feature enables use of libstd. The "core" feature enables use
|
|
# of some minimal std-like replacement libraries. At least one of these two
|
|
# features need to be enabled.
|
|
std = []
|
|
|
|
# The "core" feature used to enable a hashmap workaround, but is now
|
|
# deprecated (we (i) always use hashbrown, and (ii) don't support a
|
|
# no_std build anymore). The feature remains for backward
|
|
# compatibility as a no-op.
|
|
core = []
|
|
|
|
# This enables some additional functions useful for writing tests, but which
|
|
# can significantly increase the size of the library.
|
|
testing_hooks = []
|
|
|
|
# Enables detailed logging which can be somewhat expensive.
|
|
trace-log = []
|
|
|
|
# This enables unwind info generation functionality.
|
|
unwind = ["gimli"]
|
|
|
|
# ISA targets for which we should build.
|
|
# If no ISA targets are explicitly enabled, the ISA target for the host machine is enabled.
|
|
x86 = []
|
|
arm64 = []
|
|
s390x = []
|
|
riscv64 = []
|
|
# Stub feature that does nothing, for Cargo-features compatibility: the new
|
|
# backend is the default now.
|
|
experimental_x64 = []
|
|
|
|
# Option to enable all architectures.
|
|
all-arch = [
|
|
"x86",
|
|
"arm64",
|
|
"s390x",
|
|
"riscv64"
|
|
]
|
|
|
|
# For dependent crates that want to serialize some parts of cranelift
|
|
enable-serde = [
|
|
"serde",
|
|
"cranelift-entity/enable-serde",
|
|
"regalloc2/enable-serde",
|
|
"smallvec/serde"
|
|
]
|
|
|
|
# Enable the incremental compilation cache for hot-reload use cases.
|
|
incremental-cache = [
|
|
"enable-serde",
|
|
"bincode",
|
|
"sha2"
|
|
]
|
|
|
|
# Enable support for the Souper harvester.
|
|
souper-harvest = ["souper-ir", "souper-ir/stringify"]
|
|
|
|
# Report any ISLE errors in pretty-printed style.
|
|
isle-errors = ["cranelift-isle/fancy-errors"]
|
|
|
|
# Put ISLE generated files in isle_generated_code/, for easier
|
|
# inspection, rather than inside of target/.
|
|
isle-in-source-tree = []
|
|
|
|
[badges]
|
|
maintenance = { status = "experimental" }
|
|
|
|
[[bench]]
|
|
name = "x64-evex-encoding"
|
|
harness = false
|