Compare commits
39 Commits
376294e828
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
db8e06d3a8 | ||
|
|
3f379b9d69 | ||
|
|
c1f2b0f3a3 | ||
|
|
7a7dc20731 | ||
|
|
9f7d6bb3b4 | ||
|
|
6e6e408a05 | ||
|
|
b6cccb7ecb | ||
|
|
8746af2882 | ||
|
|
8fb8aa15b0 | ||
|
|
b6cc306d7a | ||
|
|
12e996f7de | ||
|
|
ef1e46d8ef | ||
|
|
a0404ec851 | ||
|
|
a0e2851620 | ||
|
|
d9bbbcfbe2 | ||
|
|
f5f984c81a | ||
|
|
74873feb96 | ||
|
|
75fdc9d3a4 | ||
|
|
84a1e58b97 | ||
|
|
8b724e1796 | ||
|
|
74f8e9a1fd | ||
|
|
d31dbaaa16 | ||
|
|
9d1dbadd04 | ||
|
|
c4a0d85b72 | ||
|
|
dcb95541a7 | ||
|
|
2c8b9a680f | ||
|
|
e2061d2e04 | ||
|
|
993074a974 | ||
|
|
706c44513e | ||
|
|
f0e9cde328 | ||
|
|
9c6d6dc9aa | ||
|
|
2bd03256b3 | ||
|
|
7354cfedde | ||
|
|
54f074e507 | ||
|
|
34a9ae7379 | ||
|
|
1e8da4f99b | ||
|
|
7bb83a3361 | ||
|
|
c3e513c4cb | ||
|
|
50b9cf8fe2 |
10
.github/workflows/rust.yml
vendored
10
.github/workflows/rust.yml
vendored
@@ -37,6 +37,16 @@ jobs:
|
||||
- name: Check with all features
|
||||
run: cargo check --all-features
|
||||
|
||||
# Make sure the code and its dependencies compile without std.
|
||||
no_std:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install thumbv6m-none-eabi target
|
||||
run: rustup target add thumbv6m-none-eabi
|
||||
- name: Check no_std build
|
||||
run: cargo check --target thumbv6m-none-eabi --no-default-features --features trace-log,checker,enable-serde
|
||||
|
||||
# Lint dependency graph for security advisories, duplicate versions, and
|
||||
# incompatible licences.
|
||||
cargo_deny:
|
||||
|
||||
9
.vscode/settings.json
vendored
Normal file
9
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"editor.formatOnSave": true,
|
||||
"[rust]": {
|
||||
"editor.defaultFormatter": "rust-lang.rust-analyzer"
|
||||
},
|
||||
"rust-analyzer.cargo.features": [
|
||||
"default"
|
||||
]
|
||||
}
|
||||
19
Cargo.toml
19
Cargo.toml
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "regalloc2"
|
||||
version = "0.5.1"
|
||||
version = "0.6.1"
|
||||
authors = [
|
||||
"Chris Fallin <chris@cfallin.org>",
|
||||
"Mozilla SpiderMonkey Developers",
|
||||
@@ -12,12 +12,16 @@ repository = "https://github.com/bytecodealliance/regalloc2"
|
||||
|
||||
[dependencies]
|
||||
log = { version = "0.4.8", default-features = false }
|
||||
smallvec = { version = "1.6.1", features = ["union"] }
|
||||
fxhash = "0.2.1"
|
||||
slice-group-by = "0.3.0"
|
||||
smallvec = { version = "1.6.1", features = ["union", "const_generics"] }
|
||||
rustc-hash = { version = "1.1.0", default-features = false }
|
||||
slice-group-by = { version = "0.3.0", default-features = false }
|
||||
hashbrown = "0.13.2"
|
||||
|
||||
# Optional serde support, enabled by feature below.
|
||||
serde = { version = "1.0.136", features = ["derive"], optional = true }
|
||||
serde = { version = "1.0.136", features = [
|
||||
"derive",
|
||||
"alloc",
|
||||
], default-features = false, optional = true }
|
||||
|
||||
# The below are only needed for fuzzing.
|
||||
libfuzzer-sys = { version = "0.4.2", optional = true }
|
||||
@@ -29,7 +33,10 @@ debug-assertions = true
|
||||
overflow-checks = true
|
||||
|
||||
[features]
|
||||
default = []
|
||||
default = ["std"]
|
||||
|
||||
# Enables std-specific features such as the Error trait for RegAllocError.
|
||||
std = []
|
||||
|
||||
# Enables generation of DefAlloc edits for the checker.
|
||||
checker = []
|
||||
|
||||
@@ -3,10 +3,7 @@
|
||||
This is a register allocator that started life as, and is about 50%
|
||||
still, a port of IonMonkey's backtracking register allocator to
|
||||
Rust. In many regards, it has been generalized, optimized, and
|
||||
improved since the initial port, and now supports both SSA and non-SSA
|
||||
use-cases. (However, non-SSA should be considered deprecated; we want to
|
||||
move to SSA-only in the future, to enable some performance improvements.
|
||||
See #4.)
|
||||
improved since the initial port.
|
||||
|
||||
In addition, it contains substantial amounts of testing infrastructure
|
||||
(fuzzing harnesses and checkers) that does not exist in the original
|
||||
|
||||
1
bench_res/9d1dbadd0453acc9698840ddac1fb4e7671a32dc.json
Normal file
1
bench_res/9d1dbadd0453acc9698840ddac1fb4e7671a32dc.json
Normal file
File diff suppressed because one or more lines are too long
257
bench_res/dcb95541a7a16564db4b15f9248ceab990a18135.txt
Normal file
257
bench_res/dcb95541a7a16564db4b15f9248ceab990a18135.txt
Normal file
@@ -0,0 +1,257 @@
|
||||
compilation :: cycles :: benchmarks/shootout-keccak/benchmark.wasm
|
||||
|
||||
Δ = 82716376.00 ± 10452013.97 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 3.22x to 3.86x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[109368640 115289268.00 121224220] tmp/wasmtime_main.so
|
||||
[26120680 32572892.00 38319220] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-seqhash/benchmark.wasm
|
||||
|
||||
Δ = 35218172.00 ± 10139196.17 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 2.64x to 3.97x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[45573440 50487252.00 57536460] tmp/wasmtime_main.so
|
||||
[14076800 15269080.00 17370880] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-ed25519/benchmark.wasm
|
||||
|
||||
Δ = 230472644.00 ± 14955335.34 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 3.13x to 3.42x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[324059620 331874540.00 343721760] tmp/wasmtime_main.so
|
||||
[92807900 101401896.00 111293420] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/bz2/benchmark.wasm
|
||||
|
||||
Δ = 148462916.00 ± 74415954.89 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.79x to 3.37x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[203553120 242451052.00 291165780] tmp/wasmtime_main.so
|
||||
[87747740 93988136.00 99525600] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-ackermann/benchmark.wasm
|
||||
|
||||
Δ = 38242648.00 ± 16283101.77 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.63x to 2.56x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[68094780 73289288.00 78249120] tmp/wasmtime_main.so
|
||||
[25462000 35046640.00 47911900] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/blake3-simd/benchmark.wasm
|
||||
|
||||
Δ = 21865932.00 ± 4836633.92 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.76x to 2.19x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[42437960 44226636.00 46472140] tmp/wasmtime_main.so
|
||||
[19230460 22360704.00 25217580] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-base64/benchmark.wasm
|
||||
|
||||
Δ = 30766524.00 ± 8057535.50 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.71x to 2.22x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[58863940 62678716.00 64941420] tmp/wasmtime_main.so
|
||||
[28483860 31912192.00 39027860] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-ctype/benchmark.wasm
|
||||
|
||||
Δ = 26468612.00 ± 16769880.25 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.35x to 2.54x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[44742000 54515672.00 65715120] tmp/wasmtime_main.so
|
||||
[24041300 28047060.00 34958480] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-switch/benchmark.wasm
|
||||
|
||||
Δ = 44993524.00 ± 15374857.68 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.60x to 2.23x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[87974340 93954372.00 106466280] tmp/wasmtime_main.so
|
||||
[46232300 48960848.00 51231600] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-nestedloop/benchmark.wasm
|
||||
|
||||
Δ = 21867216.00 ± 5291064.57 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.62x to 2.01x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[45180760 48782624.00 52403220] tmp/wasmtime_main.so
|
||||
[23893540 26915408.00 29864760] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-fib2/benchmark.wasm
|
||||
|
||||
Δ = 21156820.00 ± 11632344.98 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.33x to 2.14x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[47489900 49865496.00 57817820] tmp/wasmtime_main.so
|
||||
[20262340 28708676.00 37136600] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/blind-sig/benchmark.wasm
|
||||
|
||||
Δ = 206300520.00 ± 49456972.18 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.53x to 1.87x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[468801420 501884140.00 533785860] tmp/wasmtime_main.so
|
||||
[280069560 295583620.00 313052880] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-random/benchmark.wasm
|
||||
|
||||
Δ = 17866328.00 ± 5058990.61 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.45x to 1.80x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[42136400 46350848.00 49072180] tmp/wasmtime_main.so
|
||||
[26323300 28484520.00 30908400] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/regex/benchmark.wasm
|
||||
|
||||
Δ = 306213000.00 ± 149004995.32 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.31x to 1.89x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[737215040 814849904.00 927360140] tmp/wasmtime_main.so
|
||||
[434576640 508636904.00 605740280] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-sieve/benchmark.wasm
|
||||
|
||||
Δ = 15761008.00 ± 9513156.34 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.23x to 1.94x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[40248860 42579220.00 45042100] tmp/wasmtime_main.so
|
||||
[21795160 26818212.00 34187840] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/meshoptimizer/benchmark.wasm
|
||||
|
||||
Δ = 25719108.00 ± 8595140.79 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.32x to 1.64x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[75542900 79202808.00 84619440] tmp/wasmtime_main.so
|
||||
[50271620 53483700.00 55800220] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-matrix/benchmark.wasm
|
||||
|
||||
Δ = 15708244.00 ± 10889969.92 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.14x to 1.78x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[46643700 49724296.00 53101840] tmp/wasmtime_main.so
|
||||
[26710660 34016052.00 39995200] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/spidermonkey/benchmark.wasm
|
||||
|
||||
Δ = 2514174744.00 ± 1857722729.16 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.11x to 1.74x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[7653278120 8460320132.00 9199229920] tmp/wasmtime_main.so
|
||||
[4197742280 5946145388.00 6556552620] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm
|
||||
|
||||
Δ = 95998836.00 ± 94947313.64 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.00x to 1.71x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[333846840 364658356.00 438691720] tmp/wasmtime_main.so
|
||||
[216037300 268659520.00 322037740] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-gimli/benchmark.wasm
|
||||
|
||||
Δ = 1553848.00 ± 1106236.68 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.09x to 1.53x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[5962360 6564812.00 7277160] tmp/wasmtime_main.so
|
||||
[4538420 5010964.00 5912740] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-xblabla20/benchmark.wasm
|
||||
|
||||
Δ = 5108496.00 ± 3867184.58 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.07x to 1.53x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[20042160 22127584.00 23927120] tmp/wasmtime_main.so
|
||||
[14784460 17019088.00 19205440] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-xchacha20/benchmark.wasm
|
||||
|
||||
Δ = 4067360.00 ± 2084422.61 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.14x to 1.43x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[17229340 18357984.00 19519960] tmp/wasmtime_main.so
|
||||
[12904240 14290624.00 15326220] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/noop/benchmark.wasm
|
||||
|
||||
Δ = 2802768.00 ± 1567262.30 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.09x to 1.31x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[16538500 16982392.00 18070820] tmp/wasmtime_main.so
|
||||
[13400280 14179624.00 15124040] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/hex-simd/benchmark.wasm
|
||||
|
||||
Δ = 22611720.00 ± 15198082.61 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.06x to 1.31x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[135396000 143348280.00 154255820] tmp/wasmtime_main.so
|
||||
[112891420 120736560.00 126274080] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-ratelimit/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[49158240 61531312.00 102319960] tmp/wasmtime_main.so
|
||||
[28723540 33731256.00 36821820] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-memmove/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[16707500 20553252.00 27217200] tmp/wasmtime_main.so
|
||||
[13315020 16224728.00 21330760] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/intgemm-simd/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[324449640 352576952.00 413652920] tmp/wasmtime_main.so
|
||||
[273646660 281514868.00 285492580] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-minicsv/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[8602740 9606232.00 11417000] tmp/wasmtime_main.so
|
||||
[7234340 8023400.00 8844940] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-heapsort/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[20328440 21526748.00 22546480] tmp/wasmtime_main.so
|
||||
[15103220 18372012.00 22779260] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/blake3-scalar/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[93674620 133889868.00 183640420] tmp/wasmtime_main.so
|
||||
[106974240 122942228.00 152750560] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
746
bench_res/first_impl.txt
Normal file
746
bench_res/first_impl.txt
Normal file
@@ -0,0 +1,746 @@
|
||||
|
||||
execution :: cycles :: benchmarks/hex-simd/benchmark.wasm
|
||||
|
||||
Δ = 345792.24 ± 11833.61 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 7.36x to 7.81x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[47340 52501.32 190872] tmp/wasmtime_main.so
|
||||
[365580 398293.56 542808] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-base64/benchmark.wasm
|
||||
|
||||
Δ = 1970853027.76 ± 6979113.24 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 5.72x to 5.76x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[409600548 415637278.00 500391972] tmp/wasmtime_main.so
|
||||
[2366821945 2386490305.76 2513644956] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-seqhash/benchmark.wasm
|
||||
|
||||
Δ = 46825190455.59 ± 127821876.94 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 5.61x to 5.63x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[9956663856 10133140692.71 10544512428] tmp/wasmtime_main.so
|
||||
[56352845593 56958331148.30 58657017995] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/meshoptimizer/benchmark.wasm
|
||||
|
||||
Δ = 20106010623.89 ± 139125939.44 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 5.00x to 5.06x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[4815750421 4986455753.06 5219471557] tmp/wasmtime_main.so
|
||||
[24698961323 25092466376.95 28372067424] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/blake3-scalar/benchmark.wasm
|
||||
|
||||
Δ = 1575653.08 ± 47716.10 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 4.89x to 5.14x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[368532 392537.50 565414] tmp/wasmtime_main.so
|
||||
[1832400 1968190.58 3389364] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-xchacha20/benchmark.wasm
|
||||
|
||||
Δ = 24043961.50 ± 125310.65 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 4.86x to 4.91x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[5992344 6189280.03 8772444] tmp/wasmtime_main.so
|
||||
[29597724 30233241.53 31265100] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/intgemm-simd/benchmark.wasm
|
||||
|
||||
Δ = 11790836237.58 ± 49258123.99 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 4.77x to 4.80x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[3095937432 3117382542.95 3205026611] tmp/wasmtime_main.so
|
||||
[14643634139 14908218780.53 15634756441] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/blake3-simd/benchmark.wasm
|
||||
|
||||
Δ = 1519308.88 ± 43278.47 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 4.60x to 4.81x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[387108 410404.35 723458] tmp/wasmtime_main.so
|
||||
[1825704 1929713.23 2913948] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/regex/benchmark.wasm
|
||||
|
||||
Δ = 881482054.62 ± 3682662.85 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 4.56x to 4.59x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[243603396 246887000.62 255784177] tmp/wasmtime_main.so
|
||||
[1114811388 1128369055.24 1169802000] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/bz2/benchmark.wasm
|
||||
|
||||
Δ = 357450293.22 ± 1549780.77 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 4.37x to 4.40x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[103914719 105636434.33 114783229] tmp/wasmtime_main.so
|
||||
[457542179 463086727.55 486926532] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-xblabla20/benchmark.wasm
|
||||
|
||||
Δ = 11882263.43 ± 71185.74 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 4.29x to 4.33x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[3426984 3590238.35 4005036] tmp/wasmtime_main.so
|
||||
[15081912 15472501.78 16709212] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-gimli/benchmark.wasm
|
||||
|
||||
Δ = 18851736.16 ± 108983.69 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 4.27x to 4.31x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[5576796 5731722.69 6796368] tmp/wasmtime_main.so
|
||||
[24021649 24583458.85 26077788] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-ratelimit/benchmark.wasm
|
||||
|
||||
Δ = 153466317.39 ± 987719.38 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 4.09x to 4.13x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[47873412 49380867.56 51816349] tmp/wasmtime_main.so
|
||||
[198883511 202847184.95 230991985] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-sieve/benchmark.wasm
|
||||
|
||||
Δ = 2917751368.96 ± 57306722.93 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 3.89x to 4.01x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[909005867 990025561.72 1153883089] tmp/wasmtime_main.so
|
||||
[3782853326 3907776930.68 5243343479] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-keccak/benchmark.wasm
|
||||
|
||||
Δ = 119732620.34 ± 7294642.74 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 3.70x to 4.05x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[147621024 161371775.17 242314921] tmp/wasmtime_main.so
|
||||
[34151940 41639154.83 84386880] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/blind-sig/benchmark.wasm
|
||||
|
||||
Δ = 761849961.52 ± 2634639.77 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 3.42x to 3.44x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[307939679 313264599.69 338735987] tmp/wasmtime_main.so
|
||||
[1066340736 1075114561.21 1133205407] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-ctype/benchmark.wasm
|
||||
|
||||
Δ = 2051594010.51 ± 10906506.10 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 3.27x to 3.29x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[889577173 899301369.62 941784589] tmp/wasmtime_main.so
|
||||
[2911972285 2950895380.13 3124423188] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-ed25519/benchmark.wasm
|
||||
|
||||
Δ = 26578151841.89 ± 149545429.75 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 3.22x to 3.25x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[11730048733 11882966637.26 14017886712] tmp/wasmtime_main.so
|
||||
[37780663608 38461118479.15 40601000196] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/spidermonkey/benchmark.wasm
|
||||
|
||||
Δ = 2276822920.39 ± 18631445.19 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 3.13x to 3.16x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[1043458487 1062503164.66 1125259668] tmp/wasmtime_main.so
|
||||
[3268311912 3339326085.05 3827897531] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-heapsort/benchmark.wasm
|
||||
|
||||
Δ = 6602600891.49 ± 27508477.21 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 2.92x to 2.94x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[3407309028 3425050525.40 3470297579] tmp/wasmtime_main.so
|
||||
[9912739643 10027651416.89 10301668992] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-ed25519/benchmark.wasm
|
||||
|
||||
Δ = 288104513.37 ± 9735499.60 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 2.81x to 2.94x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[418829472 441878125.69 536600161] tmp/wasmtime_main.so
|
||||
[138407509 153773612.32 240393780] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-keccak/benchmark.wasm
|
||||
|
||||
Δ = 58212400.30 ± 1705214.89 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 2.81x to 2.92x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[29494764 31245282.73 47847564] tmp/wasmtime_main.so
|
||||
[83949804 89457683.03 119340505] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-minicsv/benchmark.wasm
|
||||
|
||||
Δ = 3205249194.33 ± 18911258.58 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 2.69x to 2.71x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[1851027229 1882842939.40 1942785576] tmp/wasmtime_main.so
|
||||
[5022056881 5088092133.73 5357629800] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-matrix/benchmark.wasm
|
||||
|
||||
Δ = 1231745436.50 ± 5707664.58 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 2.65x to 2.67x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[735814657 741951718.69 767631096] tmp/wasmtime_main.so
|
||||
[1958603184 1973697155.19 2110201200] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm
|
||||
|
||||
Δ = 11897294.54 ± 122079.69 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 2.41x to 2.44x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[8045424 8373467.87 10410839] tmp/wasmtime_main.so
|
||||
[18899892 20270762.41 21215088] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-switch/benchmark.wasm
|
||||
|
||||
Δ = 204609415.39 ± 1598010.13 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 2.40x to 2.42x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[142621019 145191091.72 152865612] tmp/wasmtime_main.so
|
||||
[342664380 349800507.11 374884265] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-fib2/benchmark.wasm
|
||||
|
||||
Δ = 3236916155.06 ± 31613056.68 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 1.89x to 1.91x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[3467702447 3591461293.98 3742357248] tmp/wasmtime_main.so
|
||||
[6737314536 6828377449.04 7322136227] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/bz2/benchmark.wasm
|
||||
|
||||
Δ = 128770793.59 ± 7597401.75 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.83x to 1.94x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[253517005 274203142.89 353141172] tmp/wasmtime_main.so
|
||||
[128667349 145432349.30 217816236] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-switch/benchmark.wasm
|
||||
|
||||
Δ = 56438333.94 ± 7491692.69 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.58x to 1.76x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[128765772 140553794.72 224904593] tmp/wasmtime_main.so
|
||||
[69970176 84115460.78 148415723] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-sieve/benchmark.wasm
|
||||
|
||||
Δ = 23774967.71 ± 6707285.75 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.33x to 1.59x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[63403956 75258605.97 137083860] tmp/wasmtime_main.so
|
||||
[40798729 51483638.26 112740804] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-seqhash/benchmark.wasm
|
||||
|
||||
Δ = 12569754.53 ± 7044529.04 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.20x to 1.71x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[30574403 40371559.79 114436297] tmp/wasmtime_main.so
|
||||
[20335572 27801805.26 96580117] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-fib2/benchmark.wasm
|
||||
|
||||
Δ = 23443643.25 ± 6736282.13 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.32x to 1.58x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[65146465 75878581.65 138438253] tmp/wasmtime_main.so
|
||||
[41823324 52434938.40 112445784] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-nestedloop/benchmark.wasm
|
||||
|
||||
Δ = 23131358.99 ± 7040047.00 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.31x to 1.58x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[64246896 75196786.96 137354796] tmp/wasmtime_main.so
|
||||
[41746068 52065427.97 115099595] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-random/benchmark.wasm
|
||||
|
||||
Δ = 22180731.35 ± 7399359.46 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.28x to 1.57x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[64062720 74368751.23 143287308] tmp/wasmtime_main.so
|
||||
[41346179 52188019.88 122821552] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-matrix/benchmark.wasm
|
||||
|
||||
Δ = 23690644.52 ± 6102793.29 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.30x to 1.52x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[70124616 81369306.83 143019432] tmp/wasmtime_main.so
|
||||
[47633327 57678662.31 117778860] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-ctype/benchmark.wasm
|
||||
|
||||
Δ = 22184906.81 ± 6133213.98 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.30x to 1.52x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[66025727 76273529.68 133334568] tmp/wasmtime_main.so
|
||||
[44490997 54088622.87 117528480] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-base64/benchmark.wasm
|
||||
|
||||
Δ = 24097448.13 ± 6012729.31 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.30x to 1.51x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[73622664 83718801.21 143594028] tmp/wasmtime_main.so
|
||||
[50266079 59621353.08 112276475] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-ratelimit/benchmark.wasm
|
||||
|
||||
Δ = 23856950.87 ± 6442900.19 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.29x to 1.50x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[72478980 83870223.68 143493840] tmp/wasmtime_main.so
|
||||
[48298068 60013272.81 120159900] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-ackermann/benchmark.wasm
|
||||
|
||||
Δ = 25699694.19 ± 6260900.16 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.29x to 1.48x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[74991707 92036959.79 150451669] tmp/wasmtime_main.so
|
||||
[50091336 66337265.60 124640460] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/blake3-simd/benchmark.wasm
|
||||
|
||||
Δ = 11917241.79 ± 6120474.86 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.13x to 1.41x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[45112464 56382136.04 124882526] tmp/wasmtime_main.so
|
||||
[35716282 44464894.25 91276271] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/meshoptimizer/benchmark.wasm
|
||||
|
||||
Δ = 18268175.66 ± 12995516.57 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.06x to 1.35x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[90282564 106583377.46 172790496] tmp/wasmtime_main.so
|
||||
[68407524 88315201.80 509777639] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/blake3-simd/benchmark.wasm
|
||||
|
||||
Δ = 27081.34 ± 12431.92 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.11x to 1.29x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[130644 165156.84 363528] tmp/wasmtime_main.so
|
||||
[104508 138075.50 262080] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/regex/benchmark.wasm
|
||||
|
||||
Δ = 168480153.80 ± 16692141.15 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 1.17x to 1.21x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[814605301 881137381.33 1113401592] tmp/wasmtime_main.so
|
||||
[986134213 1049617535.13 1163637937] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/regex/benchmark.wasm
|
||||
|
||||
Δ = 75630.26 ± 29814.65 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 1.08x to 1.19x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[442296 542271.59 863567] tmp/wasmtime_main.so
|
||||
[465264 617901.85 877788] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/intgemm-simd/benchmark.wasm
|
||||
|
||||
Δ = 66461228.45 ± 8191971.57 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 1.12x to 1.15x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[447294061 482654101.73 581954652] tmp/wasmtime_main.so
|
||||
[515086921 549115330.18 605796372] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-xchacha20/benchmark.wasm
|
||||
|
||||
Δ = 17534.86 ± 16060.09 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.01x to 1.25x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[107316 153205.19 385056] tmp/wasmtime_main.so
|
||||
[91044 135670.33 278172] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-xblabla20/benchmark.wasm
|
||||
|
||||
Δ = 17051.40 ± 14734.77 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.02x to 1.23x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[107388 153577.80 470376] tmp/wasmtime_main.so
|
||||
[89028 136526.40 303156] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/spidermonkey/benchmark.wasm
|
||||
|
||||
Δ = 943126119.95 ± 164284193.32 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 1.08x to 1.12x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[9021892788 9599833086.61 11546087076] tmp/wasmtime_main.so
|
||||
[9978139369 10542959206.56 11402534341] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-sieve/benchmark.wasm
|
||||
|
||||
Δ = 15852.97 ± 11475.80 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.03x to 1.16x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[147888 182349.73 321804] tmp/wasmtime_main.so
|
||||
[134604 166496.76 312156] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-ackermann/benchmark.wasm
|
||||
|
||||
Δ = 17296.92 ± 15752.90 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.01x to 1.18x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[164268 205161.84 472392] tmp/wasmtime_main.so
|
||||
[144648 187864.92 385272] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/blake3-scalar/benchmark.wasm
|
||||
|
||||
Δ = 19482138.65 ± 6431347.52 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 1.06x to 1.12x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[199113301 217289885.09 288788372] tmp/wasmtime_main.so
|
||||
[220745124 236772023.74 306132408] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-ctype/benchmark.wasm
|
||||
|
||||
Δ = 14145.49 ± 10664.87 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.02x to 1.14x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[148536 187047.01 297001] tmp/wasmtime_main.so
|
||||
[140796 172901.52 304200] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-fib2/benchmark.wasm
|
||||
|
||||
Δ = 13499.29 ± 13201.25 (confidence = 99%)
|
||||
|
||||
wasmtime/target/release/libwasmtime_bench_api.so is 1.00x to 1.16x faster than tmp/wasmtime_main.so!
|
||||
|
||||
[152316 184395.97 362197] tmp/wasmtime_main.so
|
||||
[131760 170896.68 335880] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/hex-simd/benchmark.wasm
|
||||
|
||||
Δ = 14701654.36 ± 7722355.67 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 1.03x to 1.11x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[187211376 205180475.08 282106044] tmp/wasmtime_main.so
|
||||
[198510264 219882129.44 284877252] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-memmove/benchmark.wasm
|
||||
|
||||
Δ = 2750310.45 ± 142406.89 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 1.07x to 1.07x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[38182931 39016919.77 40971169] tmp/wasmtime_main.so
|
||||
[41085541 41767230.22 42774553] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-random/benchmark.wasm
|
||||
|
||||
Δ = 36038424.04 ± 857900.63 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 1.05x to 1.05x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[669296593 673652082.43 682587649] tmp/wasmtime_main.so
|
||||
[705019320 709690506.47 716079853] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm
|
||||
|
||||
Δ = 16340886.19 ± 7483652.81 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 1.02x to 1.06x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[365185836 401650579.46 470100098] tmp/wasmtime_main.so
|
||||
[392953752 417991465.65 484354441] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/blind-sig/benchmark.wasm
|
||||
|
||||
Δ = 20118786.07 ± 13423183.44 (confidence = 99%)
|
||||
|
||||
tmp/wasmtime_main.so is 1.01x to 1.06x faster than wasmtime/target/release/libwasmtime_bench_api.so!
|
||||
|
||||
[515765845 573135907.05 772132716] tmp/wasmtime_main.so
|
||||
[550619425 593254693.12 704204705] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-ackermann/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[504 2611.08 183780] tmp/wasmtime_main.so
|
||||
[540 751.68 1296] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-gimli/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[7092684 15453006.13 84934656] tmp/wasmtime_main.so
|
||||
[5726555 10028783.61 45153864] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-switch/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[149832 238550.40 6512688] tmp/wasmtime_main.so
|
||||
[134496 170645.40 450072] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-xchacha20/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[24658307 35091571.34 110428560] tmp/wasmtime_main.so
|
||||
[19204344 28347037.36 97184879] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/bz2/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[188748 273112.54 5585866] tmp/wasmtime_main.so
|
||||
[186048 224155.80 456408] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-memmove/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[24383809 34604974.24 110344320] tmp/wasmtime_main.so
|
||||
[19233251 28705933.78 97765704] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-xblabla20/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[24109811 35190977.32 111999528] tmp/wasmtime_main.so
|
||||
[19003032 29830501.31 108302112] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-heapsort/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[25825104 36284578.84 98705267] tmp/wasmtime_main.so
|
||||
[20983716 31298895.22 81193032] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/shootout-minicsv/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[10303488 19091732.71 95030243] tmp/wasmtime_main.so
|
||||
[9972324 17266338.21 86076144] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/noop/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[94788 124173.74 283140] tmp/wasmtime_main.so
|
||||
[85608 113814.01 316476] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-nestedloop/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[126900 173581.19 297072] tmp/wasmtime_main.so
|
||||
[105984 159178.68 334008] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-matrix/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[151956 181208.16 367992] tmp/wasmtime_main.so
|
||||
[142164 167950.09 523081] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-memmove/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[107568 151283.52 417996] tmp/wasmtime_main.so
|
||||
[96552 140387.04 551844] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-keccak/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[104436 161983.80 325260] tmp/wasmtime_main.so
|
||||
[90216 150572.50 284940] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-seqhash/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[133848 161197.20 268956] tmp/wasmtime_main.so
|
||||
[122652 152241.84 404784] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-minicsv/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[130500 156812.06 297000] tmp/wasmtime_main.so
|
||||
[120384 148217.39 255168] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-ed25519/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[145584 173084.40 341136] tmp/wasmtime_main.so
|
||||
[135252 165701.52 326484] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/blake3-scalar/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[184392 212838.50 532908] tmp/wasmtime_main.so
|
||||
[165528 203829.85 354708] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/hex-simd/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[209052 273107.88 423720] tmp/wasmtime_main.so
|
||||
[187704 261877.34 427392] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-random/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[145620 177143.04 297648] tmp/wasmtime_main.so
|
||||
[135648 169880.05 291132] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/blind-sig/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[192348 226756.79 469619] tmp/wasmtime_main.so
|
||||
[182232 217715.76 380268] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-base64/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[149328 174356.64 295560] tmp/wasmtime_main.so
|
||||
[138492 167680.81 302436] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/intgemm-simd/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[178056 207135.36 320040] tmp/wasmtime_main.so
|
||||
[167760 200227.68 337680] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/noop/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[432 615.96 1008] tmp/wasmtime_main.so
|
||||
[396 637.20 1116] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/meshoptimizer/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[179676 212660.64 351072] tmp/wasmtime_main.so
|
||||
[167292 219323.16 435815] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-ratelimit/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[140976 172092.24 285480] tmp/wasmtime_main.so
|
||||
[135180 167450.04 334512] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/spidermonkey/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[579672 632428.92 826056] tmp/wasmtime_main.so
|
||||
[545976 645584.76 1196496] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
compilation :: cycles :: benchmarks/noop/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[17553708 26606839.96 100542240] tmp/wasmtime_main.so
|
||||
[18061812 26108957.05 84559104] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[237672 275609.15 440675] tmp/wasmtime_main.so
|
||||
[215172 279336.61 616680] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-heapsort/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[130644 158666.76 299736] tmp/wasmtime_main.so
|
||||
[121464 157300.20 329328] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
instantiation :: cycles :: benchmarks/shootout-gimli/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[84456 123928.57 425556] tmp/wasmtime_main.so
|
||||
[86580 122936.41 249768] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
|
||||
execution :: cycles :: benchmarks/shootout-nestedloop/benchmark.wasm
|
||||
|
||||
No difference in performance.
|
||||
|
||||
[468 739.08 1476] tmp/wasmtime_main.so
|
||||
[468 736.56 2700] wasmtime/target/release/libwasmtime_bench_api.so
|
||||
@@ -11,6 +11,6 @@ fuzz_target!(|func: Func| {
|
||||
let _ = env_logger::try_init();
|
||||
log::trace!("func:\n{:?}", func);
|
||||
let env = regalloc2::fuzzing::func::machine_env();
|
||||
let _out =
|
||||
regalloc2::fuzzing::ion::run(&func, &env, false, false).expect("regalloc did not succeed");
|
||||
let _out = regalloc2::fuzzing::ion::run(&func, &env, false, false, true)
|
||||
.expect("regalloc did not succeed");
|
||||
});
|
||||
|
||||
@@ -40,8 +40,8 @@ fuzz_target!(|testcase: TestCase| {
|
||||
let _ = env_logger::try_init();
|
||||
log::trace!("func:\n{:?}", func);
|
||||
let env = regalloc2::fuzzing::func::machine_env();
|
||||
let out =
|
||||
regalloc2::fuzzing::ion::run(&func, &env, true, false).expect("regalloc did not succeed");
|
||||
let out = regalloc2::fuzzing::ion::run(&func, &env, true, false, true)
|
||||
.expect("regalloc did not succeed");
|
||||
|
||||
let mut checker = Checker::new(&func, &env);
|
||||
checker.prepare(&out);
|
||||
|
||||
@@ -37,6 +37,6 @@ impl Arbitrary<'_> for TestCase {
|
||||
}
|
||||
|
||||
fuzz_target!(|t: TestCase| {
|
||||
let cfginfo = CFGInfo::new(&t.f).expect("could not create CFG info");
|
||||
let cfginfo = CFGInfo::new(&t.f, true).expect("could not create CFG info");
|
||||
validate_ssa(&t.f, &cfginfo).expect("invalid SSA");
|
||||
});
|
||||
|
||||
34
src/cfg.rs
34
src/cfg.rs
@@ -6,6 +6,8 @@
|
||||
//! Lightweight CFG analyses.
|
||||
|
||||
use crate::{domtree, postorder, Block, Function, Inst, ProgPoint, RegAllocError};
|
||||
use alloc::vec;
|
||||
use alloc::vec::Vec;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -31,7 +33,7 @@ pub struct CFGInfo {
|
||||
}
|
||||
|
||||
impl CFGInfo {
|
||||
pub fn new<F: Function>(f: &F) -> Result<CFGInfo, RegAllocError> {
|
||||
pub fn new<F: Function>(f: &F, needs_loop_depth: bool) -> Result<CFGInfo, RegAllocError> {
|
||||
let postorder = postorder::calculate(f.num_blocks(), f.entry_block(), |block| {
|
||||
f.block_succs(block)
|
||||
});
|
||||
@@ -96,22 +98,24 @@ impl CFGInfo {
|
||||
}
|
||||
|
||||
let mut approx_loop_depth = vec![];
|
||||
let mut backedge_stack: SmallVec<[usize; 4]> = smallvec![];
|
||||
let mut cur_depth = 0;
|
||||
for block in 0..f.num_blocks() {
|
||||
if backedge_in[block] > 0 {
|
||||
cur_depth += 1;
|
||||
backedge_stack.push(backedge_in[block]);
|
||||
}
|
||||
if needs_loop_depth {
|
||||
let mut backedge_stack: SmallVec<[usize; 4]> = smallvec![];
|
||||
let mut cur_depth = 0;
|
||||
for block in 0..f.num_blocks() {
|
||||
if backedge_in[block] > 0 {
|
||||
cur_depth += 1;
|
||||
backedge_stack.push(backedge_in[block]);
|
||||
}
|
||||
|
||||
approx_loop_depth.push(cur_depth);
|
||||
approx_loop_depth.push(cur_depth);
|
||||
|
||||
while backedge_stack.len() > 0 && backedge_out[block] > 0 {
|
||||
backedge_out[block] -= 1;
|
||||
*backedge_stack.last_mut().unwrap() -= 1;
|
||||
if *backedge_stack.last().unwrap() == 0 {
|
||||
cur_depth -= 1;
|
||||
backedge_stack.pop();
|
||||
while backedge_stack.len() > 0 && backedge_out[block] > 0 {
|
||||
backedge_out[block] -= 1;
|
||||
*backedge_stack.last_mut().unwrap() -= 1;
|
||||
if *backedge_stack.last().unwrap() == 0 {
|
||||
cur_depth -= 1;
|
||||
backedge_stack.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
120
src/checker.rs
120
src/checker.rs
@@ -96,14 +96,16 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
use crate::{
|
||||
Allocation, AllocationKind, Block, Edit, Function, Inst, InstOrEdit, InstPosition, MachineEnv,
|
||||
Operand, OperandConstraint, OperandKind, OperandPos, Output, PReg, PRegSet, VReg,
|
||||
Allocation, AllocationKind, Block, Edit, Function, FxHashMap, FxHashSet, Inst, InstOrEdit,
|
||||
InstPosition, MachineEnv, Operand, OperandConstraint, OperandKind, OperandPos, Output, PReg,
|
||||
PRegSet, VReg,
|
||||
};
|
||||
use fxhash::{FxHashMap, FxHashSet};
|
||||
use alloc::vec::Vec;
|
||||
use alloc::{format, vec};
|
||||
use core::default::Default;
|
||||
use core::hash::Hash;
|
||||
use core::result::Result;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::default::Default;
|
||||
use std::hash::Hash;
|
||||
use std::result::Result;
|
||||
|
||||
/// A set of errors detected by the regalloc checker.
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -230,7 +232,7 @@ impl CheckerValue {
|
||||
}
|
||||
|
||||
fn from_reg(reg: VReg) -> CheckerValue {
|
||||
CheckerValue::VRegs(std::iter::once(reg).collect())
|
||||
CheckerValue::VRegs(core::iter::once(reg).collect())
|
||||
}
|
||||
|
||||
fn remove_vreg(&mut self, reg: VReg) {
|
||||
@@ -269,10 +271,6 @@ fn visit_all_vregs<F: Function, V: FnMut(VReg)>(f: &F, mut v: V) {
|
||||
for op in f.inst_operands(inst) {
|
||||
v(op.vreg());
|
||||
}
|
||||
if let Some((src, dst)) = f.is_move(inst) {
|
||||
v(src.vreg());
|
||||
v(dst.vreg());
|
||||
}
|
||||
if f.is_branch(inst) {
|
||||
for succ_idx in 0..f.block_succs(block).len() {
|
||||
for ¶m in f.branch_blockparams(block, inst, succ_idx) {
|
||||
@@ -377,8 +375,8 @@ impl Default for CheckerState {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for CheckerValue {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
impl core::fmt::Display for CheckerValue {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
match self {
|
||||
CheckerValue::Universe => {
|
||||
write!(f, "top")
|
||||
@@ -565,25 +563,6 @@ impl CheckerState {
|
||||
// according to the move semantics in the step
|
||||
// function below.
|
||||
}
|
||||
&CheckerInst::ProgramMove { inst, src, dst: _ } => {
|
||||
// Validate that the fixed-reg constraint, if any, on
|
||||
// `src` is satisfied.
|
||||
if let OperandConstraint::FixedReg(preg) = src.constraint() {
|
||||
let alloc = Allocation::reg(preg);
|
||||
let val = self.get_value(&alloc).unwrap_or(&default_val);
|
||||
trace!(
|
||||
"checker: checkinst {:?}: cheker value in {:?} is {:?}",
|
||||
checkinst,
|
||||
alloc,
|
||||
val
|
||||
);
|
||||
self.check_val(inst, src, alloc, val, &[alloc], checker)?;
|
||||
}
|
||||
// Note that we don't do anything with `dst`
|
||||
// here. That is implicitly checked whenever `dst` is
|
||||
// used; the `update()` step below adds the symbolic
|
||||
// vreg for `dst` into wherever `src` may be stored.
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -686,15 +665,6 @@ impl CheckerState {
|
||||
}
|
||||
}
|
||||
}
|
||||
&CheckerInst::ProgramMove { inst: _, src, dst } => {
|
||||
// Remove all earlier instances of `dst`: this vreg is
|
||||
// now stale (it is being overwritten).
|
||||
self.remove_vreg(dst.vreg());
|
||||
// Define `dst` wherever `src` occurs.
|
||||
for (_, value) in self.get_mappings_mut() {
|
||||
value.copy_vreg(src.vreg(), dst.vreg());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -786,23 +756,6 @@ pub(crate) enum CheckerInst {
|
||||
/// A safepoint, with the given Allocations specified as containing
|
||||
/// reftyped values. All other reftyped values become invalid.
|
||||
Safepoint { inst: Inst, allocs: Vec<Allocation> },
|
||||
|
||||
/// An op with one source operand, and one dest operand, that
|
||||
/// copies any symbolic values from the source to the dest, in
|
||||
/// addition to adding the symbolic value of the dest vreg to the
|
||||
/// set. This "program move" is distinguished from the above
|
||||
/// `Move` by being semantically relevant in the original
|
||||
/// (pre-regalloc) program.
|
||||
///
|
||||
/// We transform checker values as follows: for any vreg-set that
|
||||
/// contains `dst`'s vreg, we first delete that vreg (because it
|
||||
/// is being redefined). Then, for any vreg-set with `src`
|
||||
/// present, we add `dst`.
|
||||
ProgramMove {
|
||||
inst: Inst,
|
||||
src: Operand,
|
||||
dst: Operand,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -903,35 +856,10 @@ impl<'a, F: Function> Checker<'a, F> {
|
||||
self.bb_insts.get_mut(&block).unwrap().push(checkinst);
|
||||
}
|
||||
|
||||
// If this is a move, handle specially. Note that the
|
||||
// regalloc2-inserted moves are not semantically present in
|
||||
// the original program and so do not modify the sets of
|
||||
// symbolic values at all, but rather just move them around;
|
||||
// but "program moves" *are* present, and have the following
|
||||
// semantics: they define the destination vreg, but also
|
||||
// retain any symbolic values in the source.
|
||||
//
|
||||
// regalloc2 reifies all moves into edits in its unified
|
||||
// move/edit framework, so we don't get allocs for these moves
|
||||
// in the post-regalloc output, and the embedder is not
|
||||
// supposed to emit the moves. But we *do* want to check the
|
||||
// semantic implications, namely definition of new vregs. So
|
||||
// we emit `ProgramMove` ops that do just this.
|
||||
if let Some((src, dst)) = self.f.is_move(inst) {
|
||||
let src_op = Operand::any_use(src.vreg());
|
||||
let dst_op = Operand::any_def(dst.vreg());
|
||||
let checkinst = CheckerInst::ProgramMove {
|
||||
inst,
|
||||
src: src_op,
|
||||
dst: dst_op,
|
||||
};
|
||||
trace!("checker: adding inst {:?}", checkinst);
|
||||
self.bb_insts.get_mut(&block).unwrap().push(checkinst);
|
||||
}
|
||||
// Skip normal checks if this is a branch: the blockparams do
|
||||
// not exist in post-regalloc code, and the edge-moves have to
|
||||
// be inserted before the branch rather than after.
|
||||
else if !self.f.is_branch(inst) {
|
||||
if !self.f.is_branch(inst) {
|
||||
let operands: Vec<_> = self.f.inst_operands(inst).iter().cloned().collect();
|
||||
let allocs: Vec<_> = out.inst_allocs(inst).iter().cloned().collect();
|
||||
let clobbers: Vec<_> = self.f.inst_clobbers(inst).into_iter().collect();
|
||||
@@ -987,11 +915,21 @@ impl<'a, F: Function> Checker<'a, F> {
|
||||
let mut queue = Vec::new();
|
||||
let mut queue_set = FxHashSet::default();
|
||||
|
||||
queue.push(self.f.entry_block());
|
||||
queue_set.insert(self.f.entry_block());
|
||||
// Put every block in the queue to start with, to ensure
|
||||
// everything is visited even if the initial state remains
|
||||
// `Top` after preds update it.
|
||||
//
|
||||
// We add blocks in reverse order so that when we process
|
||||
// back-to-front below, we do our initial pass in input block
|
||||
// order, which is (usually) RPO order or at least a
|
||||
// reasonable visit order.
|
||||
for block in (0..self.f.num_blocks()).rev() {
|
||||
let block = Block::new(block);
|
||||
queue.push(block);
|
||||
queue_set.insert(block);
|
||||
}
|
||||
|
||||
while !queue.is_empty() {
|
||||
let block = queue.pop().unwrap();
|
||||
while let Some(block) = queue.pop() {
|
||||
queue_set.remove(&block);
|
||||
let mut state = self.bb_in.get(&block).cloned().unwrap();
|
||||
trace!("analyze: block {} has state {:?}", block.index(), state);
|
||||
@@ -1032,9 +970,8 @@ impl<'a, F: Function> Checker<'a, F> {
|
||||
new_state
|
||||
);
|
||||
self.bb_in.insert(succ, new_state);
|
||||
if !queue_set.contains(&succ) {
|
||||
if queue_set.insert(succ) {
|
||||
queue.push(succ);
|
||||
queue_set.insert(succ);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1119,9 +1056,6 @@ impl<'a, F: Function> Checker<'a, F> {
|
||||
}
|
||||
trace!(" safepoint: {}", slotargs.join(", "));
|
||||
}
|
||||
&CheckerInst::ProgramMove { inst, src, dst } => {
|
||||
trace!(" inst{}: prog_move {} -> {}", inst.index(), src, dst);
|
||||
}
|
||||
&CheckerInst::ParallelMove { .. } => {
|
||||
panic!("unexpected parallel_move in body (non-edge)")
|
||||
}
|
||||
|
||||
@@ -12,6 +12,9 @@
|
||||
// TR-06-33870
|
||||
// https://www.cs.rice.edu/~keith/EMBED/dom.pdf
|
||||
|
||||
use alloc::vec;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use crate::Block;
|
||||
|
||||
// Helper
|
||||
|
||||
@@ -8,6 +8,9 @@ use crate::{
|
||||
OperandConstraint, OperandKind, OperandPos, PReg, PRegSet, RegClass, VReg,
|
||||
};
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use alloc::{format, vec};
|
||||
|
||||
use super::arbitrary::Result as ArbitraryResult;
|
||||
use super::arbitrary::{Arbitrary, Unstructured};
|
||||
|
||||
@@ -124,10 +127,6 @@ impl Function for Func {
|
||||
&self.debug_value_labels[..]
|
||||
}
|
||||
|
||||
fn is_move(&self, _: Inst) -> Option<(Operand, Operand)> {
|
||||
None
|
||||
}
|
||||
|
||||
fn inst_operands(&self, insn: Inst) -> &[Operand] {
|
||||
&self.insts[insn.index()].operands[..]
|
||||
}
|
||||
@@ -279,7 +278,7 @@ pub struct Options {
|
||||
pub reftypes: bool,
|
||||
}
|
||||
|
||||
impl std::default::Default for Options {
|
||||
impl core::default::Default for Options {
|
||||
fn default() -> Self {
|
||||
Options {
|
||||
reused_inputs: false,
|
||||
@@ -408,7 +407,7 @@ impl Func {
|
||||
}
|
||||
vregs_by_block.push(vregs.clone());
|
||||
vregs_by_block_to_be_defined.push(vec![]);
|
||||
let mut max_block_params = u.int_in_range(0..=std::cmp::min(3, vregs.len() / 3))?;
|
||||
let mut max_block_params = u.int_in_range(0..=core::cmp::min(3, vregs.len() / 3))?;
|
||||
for &vreg in &vregs {
|
||||
if block > 0 && opts.block_params && bool::arbitrary(u)? && max_block_params > 0 {
|
||||
block_params[block].push(vreg);
|
||||
@@ -595,8 +594,8 @@ impl Func {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Func {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
impl core::fmt::Debug for Func {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
write!(f, "{{\n")?;
|
||||
for vreg in self.reftype_vregs() {
|
||||
write!(f, " REF: {}\n", vreg)?;
|
||||
@@ -657,16 +656,18 @@ impl std::fmt::Debug for Func {
|
||||
}
|
||||
|
||||
pub fn machine_env() -> MachineEnv {
|
||||
fn regs(r: std::ops::Range<usize>) -> Vec<PReg> {
|
||||
fn regs(r: core::ops::Range<usize>) -> Vec<PReg> {
|
||||
r.map(|i| PReg::new(i, RegClass::Int)).collect()
|
||||
}
|
||||
let preferred_regs_by_class: [Vec<PReg>; 2] = [regs(0..24), vec![]];
|
||||
let non_preferred_regs_by_class: [Vec<PReg>; 2] = [regs(24..32), vec![]];
|
||||
let scratch_by_class: [Option<PReg>; 2] = [None, None];
|
||||
let fixed_stack_slots = regs(32..63);
|
||||
// Register 63 is reserved for use as a fixed non-allocatable register.
|
||||
MachineEnv {
|
||||
preferred_regs_by_class,
|
||||
non_preferred_regs_by_class,
|
||||
scratch_by_class,
|
||||
fixed_stack_slots,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,11 +50,11 @@ macro_rules! define_index {
|
||||
};
|
||||
}
|
||||
|
||||
pub trait ContainerIndex: Clone + Copy + std::fmt::Debug + PartialEq + Eq {}
|
||||
pub trait ContainerIndex: Clone + Copy + core::fmt::Debug + PartialEq + Eq {}
|
||||
|
||||
pub trait ContainerComparator {
|
||||
type Ix: ContainerIndex;
|
||||
fn compare(&self, a: Self::Ix, b: Self::Ix) -> std::cmp::Ordering;
|
||||
fn compare(&self, a: Self::Ix, b: Self::Ix) -> core::cmp::Ordering;
|
||||
}
|
||||
|
||||
define_index!(Inst);
|
||||
@@ -146,6 +146,9 @@ impl Iterator for InstRangeIter {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use alloc::vec;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -5,8 +5,10 @@
|
||||
|
||||
//! Index sets: sets of integers that represent indices into a space.
|
||||
|
||||
use fxhash::FxHashMap;
|
||||
use std::cell::Cell;
|
||||
use alloc::vec::Vec;
|
||||
use core::cell::Cell;
|
||||
|
||||
use crate::FxHashMap;
|
||||
|
||||
const SMALL_ELEMS: usize = 12;
|
||||
|
||||
@@ -151,10 +153,10 @@ impl AdaptiveMap {
|
||||
|
||||
enum AdaptiveMapIter<'a> {
|
||||
Small(&'a [u32], &'a [u64]),
|
||||
Large(std::collections::hash_map::Iter<'a, u32, u64>),
|
||||
Large(hashbrown::hash_map::Iter<'a, u32, u64>),
|
||||
}
|
||||
|
||||
impl<'a> std::iter::Iterator for AdaptiveMapIter<'a> {
|
||||
impl<'a> core::iter::Iterator for AdaptiveMapIter<'a> {
|
||||
type Item = (u32, u64);
|
||||
|
||||
#[inline]
|
||||
@@ -292,7 +294,7 @@ impl Iterator for SetBitsIter {
|
||||
// Build an `Option<NonZeroU64>` so that on the nonzero path,
|
||||
// the compiler can optimize the trailing-zeroes operator
|
||||
// using that knowledge.
|
||||
std::num::NonZeroU64::new(self.0).map(|nz| {
|
||||
core::num::NonZeroU64::new(self.0).map(|nz| {
|
||||
let bitidx = nz.trailing_zeros();
|
||||
self.0 &= self.0 - 1; // clear highest set bit
|
||||
bitidx as usize
|
||||
@@ -300,8 +302,8 @@ impl Iterator for SetBitsIter {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for IndexSet {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
impl core::fmt::Debug for IndexSet {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
let vals = self.iter().collect::<Vec<_>>();
|
||||
write!(f, "{:?}", vals)
|
||||
}
|
||||
|
||||
@@ -17,14 +17,16 @@ use crate::cfg::CFGInfo;
|
||||
use crate::index::ContainerComparator;
|
||||
use crate::indexset::IndexSet;
|
||||
use crate::{
|
||||
define_index, Allocation, Block, Edit, Function, Inst, MachineEnv, Operand, PReg, ProgPoint,
|
||||
RegClass, VReg,
|
||||
define_index, Allocation, Block, Edit, Function, FxHashSet, Inst, MachineEnv, Operand, PReg,
|
||||
ProgPoint, RegClass, VReg,
|
||||
};
|
||||
use fxhash::FxHashSet;
|
||||
use alloc::collections::BTreeMap;
|
||||
use alloc::string::String;
|
||||
use alloc::vec::Vec;
|
||||
use core::cmp::Ordering;
|
||||
use core::fmt::Debug;
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use smallvec::SmallVec;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::fmt::Debug;
|
||||
|
||||
/// A range from `from` (inclusive) to `to` (exclusive).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
@@ -64,13 +66,13 @@ impl CodeRange {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::cmp::PartialOrd for CodeRange {
|
||||
impl core::cmp::PartialOrd for CodeRange {
|
||||
#[inline(always)]
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
impl std::cmp::Ord for CodeRange {
|
||||
impl core::cmp::Ord for CodeRange {
|
||||
#[inline(always)]
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
if self.to <= other.from {
|
||||
@@ -278,7 +280,7 @@ const fn no_bloat_capacity<T>() -> usize {
|
||||
//
|
||||
// So if `size_of([T; N]) == size_of(pointer) + size_of(capacity)` then we
|
||||
// get the maximum inline capacity without bloat.
|
||||
std::mem::size_of::<usize>() * 2 / std::mem::size_of::<T>()
|
||||
core::mem::size_of::<usize>() * 2 / core::mem::size_of::<T>()
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -405,21 +407,6 @@ pub struct Env<'a, F: Function> {
|
||||
pub extra_spillslots_by_class: [SmallVec<[Allocation; 2]>; 2],
|
||||
pub preferred_victim_by_class: [PReg; 2],
|
||||
|
||||
// Program moves: these are moves in the provided program that we
|
||||
// handle with our internal machinery, in order to avoid the
|
||||
// overhead of ordinary operand processing. We expect the client
|
||||
// to not generate any code for instructions that return
|
||||
// `Some(..)` for `.is_move()`, and instead use the edits that we
|
||||
// provide to implement those moves (or some simplified version of
|
||||
// them) post-regalloc.
|
||||
//
|
||||
// (from-vreg, inst, from-alloc), sorted by (from-vreg, inst)
|
||||
pub prog_move_srcs: Vec<((VRegIndex, Inst), Allocation)>,
|
||||
// (to-vreg, inst, to-alloc), sorted by (to-vreg, inst)
|
||||
pub prog_move_dsts: Vec<((VRegIndex, Inst), Allocation)>,
|
||||
// (from-vreg, to-vreg) for bundle-merging.
|
||||
pub prog_move_merges: Vec<(LiveRangeIndex, LiveRangeIndex)>,
|
||||
|
||||
// When multiple fixed-register constraints are present on a
|
||||
// single VReg at a single program point (this can happen for,
|
||||
// e.g., call args that use the same value multiple times), we
|
||||
@@ -446,7 +433,7 @@ pub struct Env<'a, F: Function> {
|
||||
|
||||
// For debug output only: a list of textual annotations at every
|
||||
// ProgPoint to insert into the final allocated program listing.
|
||||
pub debug_annotations: std::collections::HashMap<ProgPoint, Vec<String>>,
|
||||
pub debug_annotations: hashbrown::HashMap<ProgPoint, Vec<String>>,
|
||||
pub annotations_enabled: bool,
|
||||
|
||||
// Cached allocation for `try_to_allocate_bundle_to_reg` to avoid allocating
|
||||
@@ -507,7 +494,7 @@ impl SpillSlotList {
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PrioQueue {
|
||||
pub heap: std::collections::BinaryHeap<PrioQueueEntry>,
|
||||
pub heap: alloc::collections::BinaryHeap<PrioQueueEntry>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
@@ -546,28 +533,28 @@ impl LiveRangeKey {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::cmp::PartialEq for LiveRangeKey {
|
||||
impl core::cmp::PartialEq for LiveRangeKey {
|
||||
#[inline(always)]
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.to > other.from && self.from < other.to
|
||||
}
|
||||
}
|
||||
impl std::cmp::Eq for LiveRangeKey {}
|
||||
impl std::cmp::PartialOrd for LiveRangeKey {
|
||||
impl core::cmp::Eq for LiveRangeKey {}
|
||||
impl core::cmp::PartialOrd for LiveRangeKey {
|
||||
#[inline(always)]
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
impl std::cmp::Ord for LiveRangeKey {
|
||||
impl core::cmp::Ord for LiveRangeKey {
|
||||
#[inline(always)]
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
|
||||
if self.to <= other.from {
|
||||
std::cmp::Ordering::Less
|
||||
core::cmp::Ordering::Less
|
||||
} else if self.from >= other.to {
|
||||
std::cmp::Ordering::Greater
|
||||
core::cmp::Ordering::Greater
|
||||
} else {
|
||||
std::cmp::Ordering::Equal
|
||||
core::cmp::Ordering::Equal
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -577,7 +564,7 @@ pub struct PrioQueueComparator<'a> {
|
||||
}
|
||||
impl<'a> ContainerComparator for PrioQueueComparator<'a> {
|
||||
type Ix = LiveBundleIndex;
|
||||
fn compare(&self, a: Self::Ix, b: Self::Ix) -> std::cmp::Ordering {
|
||||
fn compare(&self, a: Self::Ix, b: Self::Ix) -> core::cmp::Ordering {
|
||||
self.prios[a.index()].cmp(&self.prios[b.index()])
|
||||
}
|
||||
}
|
||||
@@ -585,7 +572,7 @@ impl<'a> ContainerComparator for PrioQueueComparator<'a> {
|
||||
impl PrioQueue {
|
||||
pub fn new() -> Self {
|
||||
PrioQueue {
|
||||
heap: std::collections::BinaryHeap::new(),
|
||||
heap: alloc::collections::BinaryHeap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -628,9 +615,7 @@ pub struct InsertedMove {
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum InsertMovePrio {
|
||||
InEdgeMoves,
|
||||
BlockParam,
|
||||
Regular,
|
||||
PostRegular,
|
||||
MultiFixedRegInitial,
|
||||
MultiFixedRegSecondary,
|
||||
ReusedInput,
|
||||
@@ -660,10 +645,6 @@ pub struct Stats {
|
||||
pub livein_iterations: usize,
|
||||
pub initial_liverange_count: usize,
|
||||
pub merged_bundle_count: usize,
|
||||
pub prog_moves: usize,
|
||||
pub prog_moves_dead_src: usize,
|
||||
pub prog_move_merge_attempt: usize,
|
||||
pub prog_move_merge_success: usize,
|
||||
pub process_bundle_count: usize,
|
||||
pub process_bundle_reg_probes_fixed: usize,
|
||||
pub process_bundle_reg_success_fixed: usize,
|
||||
@@ -705,3 +686,34 @@ pub fn u64_key(b: u32, a: u32) -> u64 {
|
||||
pub fn u128_key(d: u32, c: u32, b: u32, a: u32) -> u128 {
|
||||
a as u128 | (b as u128) << 32 | (c as u128) << 64 | (d as u128) << 96
|
||||
}
|
||||
|
||||
pub struct Bitmap {
|
||||
storage: SmallVec<[u64; 2]>,
|
||||
}
|
||||
|
||||
impl Bitmap {
|
||||
pub fn init(entry_count: usize) -> Self {
|
||||
let u64_count = (entry_count + 63) / 64;
|
||||
let mut storage = SmallVec::<[u64; 2]>::with_capacity(u64_count);
|
||||
storage.resize(u64_count, 0);
|
||||
Self { storage }
|
||||
}
|
||||
|
||||
pub fn set(&mut self, idx: usize) {
|
||||
let storage_idx = idx / 64;
|
||||
let bit = 1u64 << (idx % 64);
|
||||
self.storage[storage_idx] |= bit;
|
||||
}
|
||||
|
||||
pub fn un_set(&mut self, idx: usize) {
|
||||
let storage_idx = idx / 64;
|
||||
let bit = 1u64 << (idx % 64);
|
||||
self.storage[storage_idx] &= !bit;
|
||||
}
|
||||
|
||||
pub fn is_set(&mut self, idx: usize) -> bool {
|
||||
let storage_idx = idx / 64;
|
||||
let bit = 1u64 << (idx % 64);
|
||||
(self.storage[storage_idx] & bit) != 0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
//! Debugging output.
|
||||
|
||||
use alloc::string::ToString;
|
||||
use alloc::{format, vec};
|
||||
use alloc::{string::String, vec::Vec};
|
||||
|
||||
use super::Env;
|
||||
use crate::{Block, Function, ProgPoint};
|
||||
|
||||
|
||||
2563
src/ion/fast_alloc.rs
Normal file
2563
src/ion/fast_alloc.rs
Normal file
File diff suppressed because it is too large
Load Diff
594
src/ion/fast_alloc.rs.bak
Normal file
594
src/ion/fast_alloc.rs.bak
Normal file
@@ -0,0 +1,594 @@
|
||||
use alloc::vec;
|
||||
use alloc::vec::Vec;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
use crate::{
|
||||
cfg::CFGInfo, Allocation, Block, Edit, Function, Inst, MachineEnv, Operand, OperandConstraint,
|
||||
OperandKind, OperandPos, Output, PReg, ProgPoint, RegAllocError, RegClass, SpillSlot, VReg,
|
||||
};
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct VRegInfo {
|
||||
pub stack_slot: Option<u32>,
|
||||
pub alloc_at_inst: Inst,
|
||||
pub cur_reg: Option<PReg>,
|
||||
}
|
||||
|
||||
impl Default for VRegInfo {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
stack_slot: None,
|
||||
alloc_at_inst: Inst::invalid(),
|
||||
cur_reg: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct AllocState {
|
||||
pub vregs: Vec<VRegInfo>,
|
||||
pub preorder: Vec<Block>,
|
||||
pub allocs: Vec<Allocation>,
|
||||
pub inst_alloc_offsets: Vec<u32>,
|
||||
pub edits: Vec<(ProgPoint, Edit)>,
|
||||
pub cur_stack_slot_idx: u32,
|
||||
pub safepoint_slots: Vec<(ProgPoint, Allocation)>,
|
||||
}
|
||||
|
||||
impl AllocState {
|
||||
pub fn init<F: Function>(func: &F) -> Self {
|
||||
let mut vregs = Vec::with_capacity(func.num_vregs());
|
||||
vregs.resize(func.num_vregs(), VRegInfo::default());
|
||||
|
||||
let mut inst_alloc_offsets = Vec::with_capacity(func.num_insts());
|
||||
inst_alloc_offsets.resize(func.num_insts(), 0);
|
||||
|
||||
Self {
|
||||
vregs,
|
||||
preorder: Self::calc_preorder(func),
|
||||
allocs: Vec::new(),
|
||||
inst_alloc_offsets,
|
||||
edits: Vec::new(),
|
||||
cur_stack_slot_idx: 0,
|
||||
safepoint_slots: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn calc_preorder<F: Function>(func: &F) -> Vec<Block> {
|
||||
let entry = func.entry_block();
|
||||
let mut ret = vec![entry];
|
||||
|
||||
struct State<'a> {
|
||||
block: Block,
|
||||
succs: &'a [Block],
|
||||
next_succ: usize,
|
||||
}
|
||||
let mut stack: SmallVec<[State; 64]> = smallvec![];
|
||||
|
||||
stack.push(State {
|
||||
block: entry,
|
||||
succs: func.block_succs(entry),
|
||||
next_succ: 0,
|
||||
});
|
||||
|
||||
while let Some(ref mut state) = stack.last_mut() {
|
||||
if state.next_succ >= state.succs.len() {
|
||||
stack.pop();
|
||||
continue;
|
||||
}
|
||||
|
||||
let block = state.succs[state.next_succ];
|
||||
let succs = func.block_succs(block);
|
||||
ret.push(block);
|
||||
|
||||
state.next_succ += 1;
|
||||
if state.next_succ >= state.succs.len() {
|
||||
stack.pop();
|
||||
}
|
||||
|
||||
if !succs.is_empty() {
|
||||
stack.push(State {
|
||||
block,
|
||||
succs: func.block_succs(block),
|
||||
next_succ: 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run<F: Function>(
|
||||
func: &F,
|
||||
mach_env: &MachineEnv,
|
||||
cfg: CFGInfo,
|
||||
) -> Result<Output, RegAllocError> {
|
||||
let mut state = AllocState::init(func);
|
||||
|
||||
let reg_order_int = {
|
||||
let class = RegClass::Int as usize;
|
||||
let amount = mach_env.preferred_regs_by_class[class].len()
|
||||
+ mach_env.non_preferred_regs_by_class[class].len();
|
||||
let mut reg_order = Vec::with_capacity(amount);
|
||||
reg_order.extend_from_slice(&mach_env.preferred_regs_by_class[class]);
|
||||
reg_order.extend_from_slice(&mach_env.non_preferred_regs_by_class[class]);
|
||||
reg_order
|
||||
};
|
||||
|
||||
let reg_order_float = {
|
||||
let class = RegClass::Float as usize;
|
||||
let amount = mach_env.preferred_regs_by_class[class].len()
|
||||
+ mach_env.non_preferred_regs_by_class[class].len();
|
||||
let mut reg_order = Vec::with_capacity(amount);
|
||||
reg_order.extend_from_slice(&mach_env.preferred_regs_by_class[class]);
|
||||
reg_order.extend_from_slice(&mach_env.non_preferred_regs_by_class[class]);
|
||||
reg_order
|
||||
};
|
||||
|
||||
let slot_size_int = func.spillslot_size(RegClass::Int) as u32;
|
||||
let slot_size_float = func.spillslot_size(RegClass::Float) as u32;
|
||||
|
||||
// TODO: this currently does not handle multiple defs/uses of the same vreg properly
|
||||
|
||||
// TODO: move to AllocState, needs to also contain if this is a pseudo-preg and refers to the stack
|
||||
let mut preg_state: Vec<VReg> = Vec::with_capacity(PReg::NUM_INDEX);
|
||||
preg_state.resize(PReg::NUM_INDEX, VReg::invalid());
|
||||
|
||||
for block in &state.preorder {
|
||||
for inst in func.block_insns(*block).iter() {
|
||||
let operands = func.inst_operands(inst);
|
||||
let alloc_idx = state.allocs.len();
|
||||
state.inst_alloc_offsets[inst.index()] = alloc_idx as u32;
|
||||
|
||||
state
|
||||
.allocs
|
||||
.resize(alloc_idx + operands.len(), Allocation::none());
|
||||
|
||||
// both of these smallvecs could hold 16 entries
|
||||
// late uses may not be chosen as a target for writes
|
||||
let mut late_use_regs: SmallVec<[PReg; 4]> = smallvec![];
|
||||
let mut early_write_regs: SmallVec<[PReg; 4]> = smallvec![];
|
||||
|
||||
// TODO: wouldnt need this if we look up the inst a vreg was allocated at
|
||||
let mut regs_allocated: SmallVec<[PReg; 8]> = smallvec![];
|
||||
|
||||
let mut cur_idx_int = 0;
|
||||
let mut cur_idx_float = 0;
|
||||
let mut has_early_write = false;
|
||||
|
||||
// allocate uses; fixed reg first, then the others
|
||||
for (i, op) in operands.iter().enumerate() {
|
||||
if op.kind() != OperandKind::Use {
|
||||
if op.pos() != OperandPos::Early {
|
||||
continue;
|
||||
}
|
||||
has_early_write = true;
|
||||
}
|
||||
|
||||
let vreg = op.vreg();
|
||||
|
||||
match op.constraint() {
|
||||
OperandConstraint::Any | OperandConstraint::Reg | OperandConstraint::Stack => {
|
||||
continue
|
||||
}
|
||||
OperandConstraint::FixedReg(reg) => {
|
||||
if preg_state[reg.index()] != VReg::invalid()
|
||||
&& preg_state[reg.index()] != vreg
|
||||
{
|
||||
return Err(RegAllocError::TooManyLiveRegs);
|
||||
}
|
||||
|
||||
state.allocs[alloc_idx + i] = Allocation::reg(reg);
|
||||
preg_state[reg.index()] = vreg;
|
||||
state.vregs[vreg.vreg()].alloc_at_inst = inst;
|
||||
state.vregs[vreg.vreg()].cur_reg = Some(reg);
|
||||
regs_allocated.push(reg);
|
||||
if op.pos() == OperandPos::Late {
|
||||
late_use_regs.push(reg);
|
||||
}
|
||||
|
||||
if op.kind() == OperandKind::Use {
|
||||
// Early Defs do not need to be moved
|
||||
match state.vregs[vreg.vreg()].stack_slot {
|
||||
Some(slot) => {
|
||||
state.edits.push((
|
||||
ProgPoint::before(inst),
|
||||
Edit::Move {
|
||||
from: Allocation::stack(SpillSlot::new(slot as usize)),
|
||||
to: Allocation::reg(reg),
|
||||
},
|
||||
));
|
||||
}
|
||||
None => {
|
||||
return Err(RegAllocError::SSA(vreg, inst));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
early_write_regs.push(reg);
|
||||
let slot = match state.vregs[vreg.vreg()].stack_slot {
|
||||
Some(slot) => slot,
|
||||
None => {
|
||||
let size = if op.class() == RegClass::Int {
|
||||
slot_size_int
|
||||
} else {
|
||||
slot_size_float
|
||||
};
|
||||
let slot = state.cur_stack_slot_idx;
|
||||
state.cur_stack_slot_idx += size;
|
||||
state.vregs[vreg.vreg()].stack_slot = Some(slot);
|
||||
slot
|
||||
}
|
||||
};
|
||||
state.edits.push((
|
||||
ProgPoint::after(inst),
|
||||
Edit::Move {
|
||||
from: Allocation::reg(reg),
|
||||
to: Allocation::stack(SpillSlot::new(slot as usize)),
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
OperandConstraint::Reuse(_) => panic!("Reuse constraint for Use/Early-Write"),
|
||||
}
|
||||
}
|
||||
|
||||
for (i, op) in operands.iter().enumerate() {
|
||||
if op.kind() != OperandKind::Use {
|
||||
if op.pos() != OperandPos::Early {
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let vreg = op.vreg();
|
||||
let vreg_idx = vreg.vreg();
|
||||
|
||||
match op.constraint() {
|
||||
OperandConstraint::Stack => {
|
||||
// values need to be on stack already
|
||||
match state.vregs[vreg_idx].stack_slot {
|
||||
Some(slot) => {
|
||||
state.allocs[alloc_idx + i] =
|
||||
Allocation::stack(SpillSlot::new(slot as usize));
|
||||
}
|
||||
None => {
|
||||
if op.kind() == OperandKind::Use {
|
||||
return Err(RegAllocError::SSA(vreg, inst));
|
||||
}
|
||||
// alloc stack slot
|
||||
let size = if op.class() == RegClass::Int {
|
||||
slot_size_int
|
||||
} else {
|
||||
slot_size_float
|
||||
};
|
||||
let slot = state.cur_stack_slot_idx;
|
||||
state.cur_stack_slot_idx += size;
|
||||
state.allocs[alloc_idx + i] =
|
||||
Allocation::stack(SpillSlot::new(slot as usize));
|
||||
state.vregs[vreg_idx].stack_slot = Some(slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: do any on stack?
|
||||
OperandConstraint::Any | OperandConstraint::Reg => {
|
||||
// check if vreg has been allocated before
|
||||
let mut alloc = None;
|
||||
for (j, op) in operands.iter().enumerate() {
|
||||
if j >= i {
|
||||
break;
|
||||
}
|
||||
|
||||
if op.vreg() == vreg {
|
||||
alloc = Some(state.allocs[alloc_idx + j].clone());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(a) = alloc {
|
||||
state.allocs[alloc_idx + i] = a;
|
||||
continue;
|
||||
}
|
||||
|
||||
// find first non-allocated register
|
||||
let reg_order = if op.class() == RegClass::Int {
|
||||
®_order_int
|
||||
} else {
|
||||
®_order_float
|
||||
};
|
||||
let reg_order_idx = if op.class() == RegClass::Int {
|
||||
&mut cur_idx_int
|
||||
} else {
|
||||
&mut cur_idx_float
|
||||
};
|
||||
loop {
|
||||
if *reg_order_idx >= reg_order.len() {
|
||||
break;
|
||||
}
|
||||
|
||||
if preg_state[reg_order[*reg_order_idx].index()] != VReg::invalid() {
|
||||
*reg_order_idx += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let reg = reg_order[*reg_order_idx];
|
||||
|
||||
// TODO: helper func for alloc
|
||||
state.allocs[alloc_idx + i] = Allocation::reg(reg);
|
||||
preg_state[reg.index()] = vreg;
|
||||
state.vregs[vreg.vreg()].alloc_at_inst = inst;
|
||||
state.vregs[vreg.vreg()].cur_reg = Some(reg);
|
||||
regs_allocated.push(reg);
|
||||
if op.pos() == OperandPos::Late {
|
||||
late_use_regs.push(reg);
|
||||
}
|
||||
|
||||
if op.kind() == OperandKind::Use {
|
||||
match state.vregs[vreg_idx].stack_slot {
|
||||
Some(slot) => {
|
||||
state.edits.push((
|
||||
ProgPoint::before(inst),
|
||||
Edit::Move {
|
||||
from: Allocation::stack(SpillSlot::new(
|
||||
slot as usize,
|
||||
)),
|
||||
to: Allocation::reg(reg),
|
||||
},
|
||||
));
|
||||
}
|
||||
None => {
|
||||
return Err(RegAllocError::SSA(vreg, inst));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
early_write_regs.push(reg);
|
||||
let slot = match state.vregs[vreg.vreg()].stack_slot {
|
||||
Some(slot) => slot,
|
||||
None => {
|
||||
let size = if op.class() == RegClass::Int {
|
||||
slot_size_int
|
||||
} else {
|
||||
slot_size_float
|
||||
};
|
||||
let slot = state.cur_stack_slot_idx;
|
||||
state.cur_stack_slot_idx += size;
|
||||
state.vregs[vreg.vreg()].stack_slot = Some(slot);
|
||||
slot
|
||||
}
|
||||
};
|
||||
state.edits.push((
|
||||
ProgPoint::after(inst),
|
||||
Edit::Move {
|
||||
from: Allocation::reg(reg),
|
||||
to: Allocation::stack(SpillSlot::new(slot as usize)),
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// No register available
|
||||
// TODO: try to evict vreg that does not need to be in a preg
|
||||
if op.constraint() == OperandConstraint::Reg {
|
||||
return Err(RegAllocError::TooManyLiveRegs);
|
||||
}
|
||||
|
||||
// any can live on the stack
|
||||
match state.vregs[vreg_idx].stack_slot {
|
||||
Some(slot) => {
|
||||
state.allocs[alloc_idx + i] =
|
||||
Allocation::stack(SpillSlot::new(slot as usize));
|
||||
}
|
||||
None => {
|
||||
if op.kind() == OperandKind::Use {
|
||||
return Err(RegAllocError::SSA(vreg, inst));
|
||||
}
|
||||
// alloc stack slot
|
||||
let size = if op.class() == RegClass::Int {
|
||||
slot_size_int
|
||||
} else {
|
||||
slot_size_float
|
||||
};
|
||||
let slot = state.cur_stack_slot_idx;
|
||||
state.cur_stack_slot_idx += size;
|
||||
state.allocs[alloc_idx + i] =
|
||||
Allocation::stack(SpillSlot::new(slot as usize));
|
||||
state.vregs[vreg_idx].stack_slot = Some(slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
OperandConstraint::FixedReg(_) => continue,
|
||||
OperandConstraint::Reuse(_) => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
// Handle writes
|
||||
for (i, op) in operands.iter().enumerate() {
|
||||
if op.kind() != OperandKind::Def || op.pos() == OperandPos::Early {
|
||||
continue;
|
||||
}
|
||||
|
||||
let vreg = op.vreg();
|
||||
let vreg_idx = vreg.vreg();
|
||||
|
||||
match op.constraint() {
|
||||
OperandConstraint::FixedReg(reg) => {
|
||||
let reg_idx = reg.index();
|
||||
|
||||
if preg_state[reg_idx] != VReg::invalid() {
|
||||
// if the register is occupied by a late use we abort
|
||||
// TODO: move the allocation, if possible
|
||||
if late_use_regs.contains(®) || early_write_regs.contains(®) {
|
||||
todo!("trying to allocate fixed reg def on late use/early write");
|
||||
}
|
||||
|
||||
// overwrite
|
||||
let vreg = preg_state[reg_idx].vreg();
|
||||
state.vregs[vreg].cur_reg = None;
|
||||
}
|
||||
|
||||
preg_state[reg_idx] = vreg;
|
||||
state.vregs[vreg_idx].cur_reg = Some(reg);
|
||||
state.allocs[alloc_idx + i] = Allocation::reg(reg);
|
||||
|
||||
match state.vregs[vreg_idx].stack_slot {
|
||||
None => {
|
||||
// alloc stack slot
|
||||
let size = if op.class() == RegClass::Int {
|
||||
slot_size_int
|
||||
} else {
|
||||
slot_size_float
|
||||
};
|
||||
let slot = state.cur_stack_slot_idx;
|
||||
state.cur_stack_slot_idx += size;
|
||||
state.vregs[vreg_idx].stack_slot = Some(slot);
|
||||
|
||||
// move to stack after inst
|
||||
state.edits.push((
|
||||
ProgPoint::after(inst),
|
||||
Edit::Move {
|
||||
from: Allocation::reg(reg),
|
||||
to: Allocation::stack(SpillSlot::new(slot as usize)),
|
||||
},
|
||||
));
|
||||
}
|
||||
Some(_) => {
|
||||
return Err(RegAllocError::SSA(vreg, inst));
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
|
||||
cur_idx_int = 0;
|
||||
cur_idx_float = 0;
|
||||
for (i, op) in operands.iter().enumerate() {
|
||||
if op.kind() != OperandKind::Def || op.pos() == OperandPos::Early {
|
||||
continue;
|
||||
}
|
||||
|
||||
let vreg = op.vreg();
|
||||
let vreg_idx = vreg.vreg();
|
||||
|
||||
match op.constraint() {
|
||||
OperandConstraint::Reg => {
|
||||
// find first non-allocated register
|
||||
let reg_order = if op.class() == RegClass::Int {
|
||||
®_order_int
|
||||
} else {
|
||||
®_order_float
|
||||
};
|
||||
let reg_order_idx = if op.class() == RegClass::Int {
|
||||
&mut cur_idx_int
|
||||
} else {
|
||||
&mut cur_idx_float
|
||||
};
|
||||
loop {
|
||||
if *reg_order_idx >= reg_order.len() {
|
||||
break;
|
||||
}
|
||||
let reg = reg_order[*reg_order_idx];
|
||||
|
||||
if late_use_regs.contains(®) || early_write_regs.contains(®) {
|
||||
*reg_order_idx += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// remove allocated
|
||||
if preg_state[reg.index()] != VReg::invalid() {
|
||||
let vreg_idx = preg_state[reg.index()].vreg();
|
||||
state.vregs[vreg_idx].cur_reg = None;
|
||||
} else {
|
||||
regs_allocated.push(reg);
|
||||
}
|
||||
|
||||
// TODO: helper func for alloc
|
||||
state.allocs[alloc_idx + i] = Allocation::reg(reg);
|
||||
preg_state[reg.index()] = vreg;
|
||||
state.vregs[vreg.vreg()].alloc_at_inst = inst;
|
||||
state.vregs[vreg.vreg()].cur_reg = Some(reg);
|
||||
|
||||
// alloc stack slot
|
||||
let size = if op.class() == RegClass::Int {
|
||||
slot_size_int
|
||||
} else {
|
||||
slot_size_float
|
||||
};
|
||||
let slot = state.cur_stack_slot_idx;
|
||||
state.cur_stack_slot_idx += size;
|
||||
state.vregs[vreg_idx].stack_slot = Some(slot);
|
||||
|
||||
match state.vregs[vreg_idx].stack_slot {
|
||||
Some(slot) => {
|
||||
state.edits.push((
|
||||
ProgPoint::after(inst),
|
||||
Edit::Move {
|
||||
from: Allocation::reg(reg),
|
||||
to: Allocation::stack(SpillSlot::new(slot as usize)),
|
||||
},
|
||||
));
|
||||
}
|
||||
None => {
|
||||
return Err(RegAllocError::SSA(vreg, inst));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No register available
|
||||
// TODO: try to evict vreg that does not need to be in a preg
|
||||
return Err(RegAllocError::TooManyLiveRegs);
|
||||
}
|
||||
OperandConstraint::Any | OperandConstraint::Stack => {
|
||||
// alloc stack slot
|
||||
let size = if op.class() == RegClass::Int {
|
||||
slot_size_int
|
||||
} else {
|
||||
slot_size_float
|
||||
};
|
||||
let slot = state.cur_stack_slot_idx;
|
||||
state.cur_stack_slot_idx += size;
|
||||
state.vregs[vreg_idx].stack_slot = Some(slot);
|
||||
state.allocs[alloc_idx + i] =
|
||||
Allocation::stack(SpillSlot::new(slot as usize));
|
||||
}
|
||||
OperandConstraint::Reuse(op_idx) => {
|
||||
let alloc = state.allocs[alloc_idx + op_idx];
|
||||
if alloc.is_none() || operands[op_idx].pos() == OperandPos::Late {
|
||||
panic!("Invalid reuse");
|
||||
}
|
||||
|
||||
state.allocs[alloc_idx + i] = alloc;
|
||||
if let Some(alloc) = alloc.as_stack() {
|
||||
state.vregs[vreg_idx].stack_slot = Some(alloc.index() as u32);
|
||||
state.vregs[operands[op_idx].vreg().vreg()].stack_slot = None;
|
||||
} else if let Some(reg) = alloc.as_reg() {
|
||||
state.vregs[operands[op_idx].vreg().vreg()].cur_reg = None;
|
||||
state.vregs[vreg_idx].cur_reg = Some(reg);
|
||||
|
||||
// alloc stack slot
|
||||
let size = if op.class() == RegClass::Int {
|
||||
slot_size_int
|
||||
} else {
|
||||
slot_size_float
|
||||
};
|
||||
let slot = state.cur_stack_slot_idx;
|
||||
state.cur_stack_slot_idx += size;
|
||||
state.vregs[vreg_idx].stack_slot = Some(slot);
|
||||
|
||||
// move to stack after inst
|
||||
state.edits.push((
|
||||
ProgPoint::after(inst),
|
||||
Edit::Move {
|
||||
from: Allocation::reg(reg),
|
||||
to: Allocation::stack(SpillSlot::new(slot as usize)),
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
OperandConstraint::FixedReg(_) => continue,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
todo!("")
|
||||
}
|
||||
2232
src/ion/fast_alloc2.rs
Normal file
2232
src/ion/fast_alloc2.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -22,13 +22,15 @@ use crate::ion::data_structures::{
|
||||
BlockparamIn, BlockparamOut, FixedRegFixupLevel, MultiFixedRegFixup,
|
||||
};
|
||||
use crate::{
|
||||
Allocation, Block, Function, Inst, InstPosition, Operand, OperandConstraint, OperandKind,
|
||||
OperandPos, PReg, ProgPoint, RegAllocError, VReg,
|
||||
Allocation, Block, Function, FxHashMap, FxHashSet, Inst, InstPosition, Operand,
|
||||
OperandConstraint, OperandKind, OperandPos, PReg, ProgPoint, RegAllocError, VReg,
|
||||
};
|
||||
use fxhash::{FxHashMap, FxHashSet};
|
||||
use alloc::collections::VecDeque;
|
||||
use alloc::vec;
|
||||
use alloc::vec::Vec;
|
||||
use hashbrown::HashSet;
|
||||
use slice_group_by::GroupByMut;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::collections::{HashSet, VecDeque};
|
||||
|
||||
/// A spill weight computed for a certain Use.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
@@ -42,7 +44,7 @@ pub fn spill_weight_from_constraint(
|
||||
) -> SpillWeight {
|
||||
// A bonus of 1000 for one loop level, 4000 for two loop levels,
|
||||
// 16000 for three loop levels, etc. Avoids exponentiation.
|
||||
let loop_depth = std::cmp::min(10, loop_depth);
|
||||
let loop_depth = core::cmp::min(10, loop_depth);
|
||||
let hot_bonus: f32 = (0..loop_depth).fold(1000.0, |a, _| a * 4.0);
|
||||
let def_bonus: f32 = if is_def { 2000.0 } else { 0.0 };
|
||||
let constraint_bonus: f32 = match constraint {
|
||||
@@ -91,7 +93,7 @@ impl SpillWeight {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Add<SpillWeight> for SpillWeight {
|
||||
impl core::ops::Add<SpillWeight> for SpillWeight {
|
||||
type Output = SpillWeight;
|
||||
fn add(self, other: SpillWeight) -> Self {
|
||||
SpillWeight(self.0 + other.0)
|
||||
@@ -334,8 +336,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
workqueue_set.insert(block);
|
||||
}
|
||||
|
||||
while !workqueue.is_empty() {
|
||||
let block = workqueue.pop_front().unwrap();
|
||||
while let Some(block) = workqueue.pop_front() {
|
||||
workqueue_set.remove(&block);
|
||||
let insns = self.func.block_insns(block);
|
||||
|
||||
@@ -357,13 +358,6 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
}
|
||||
|
||||
for inst in insns.rev().iter() {
|
||||
if let Some((src, dst)) = self.func.is_move(inst) {
|
||||
live.set(dst.vreg().vreg(), false);
|
||||
live.set(src.vreg().vreg(), true);
|
||||
self.observe_vreg_class(src.vreg());
|
||||
self.observe_vreg_class(dst.vreg());
|
||||
}
|
||||
|
||||
for pos in &[OperandPos::Late, OperandPos::Early] {
|
||||
for op in self.func.inst_operands(inst) {
|
||||
if op.as_fixed_nonallocatable().is_some() {
|
||||
@@ -520,147 +514,6 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
}
|
||||
}
|
||||
|
||||
// If this is a move, handle specially.
|
||||
if let Some((src, dst)) = self.func.is_move(inst) {
|
||||
// We can completely skip the move if it is
|
||||
// trivial (vreg to same vreg).
|
||||
if src.vreg() != dst.vreg() {
|
||||
trace!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst);
|
||||
|
||||
debug_assert_eq!(src.class(), dst.class());
|
||||
debug_assert_eq!(src.kind(), OperandKind::Use);
|
||||
debug_assert_eq!(src.pos(), OperandPos::Early);
|
||||
debug_assert_eq!(dst.kind(), OperandKind::Def);
|
||||
debug_assert_eq!(dst.pos(), OperandPos::Late);
|
||||
|
||||
// Redefine src and dst operands to have
|
||||
// positions of After and Before respectively
|
||||
// (see note below), and to have Any
|
||||
// constraints if they were originally Reg.
|
||||
let src_constraint = match src.constraint() {
|
||||
OperandConstraint::Reg => OperandConstraint::Any,
|
||||
x => x,
|
||||
};
|
||||
let dst_constraint = match dst.constraint() {
|
||||
OperandConstraint::Reg => OperandConstraint::Any,
|
||||
x => x,
|
||||
};
|
||||
let src = Operand::new(
|
||||
src.vreg(),
|
||||
src_constraint,
|
||||
OperandKind::Use,
|
||||
OperandPos::Late,
|
||||
);
|
||||
let dst = Operand::new(
|
||||
dst.vreg(),
|
||||
dst_constraint,
|
||||
OperandKind::Def,
|
||||
OperandPos::Early,
|
||||
);
|
||||
|
||||
if self.annotations_enabled {
|
||||
self.annotate(
|
||||
ProgPoint::after(inst),
|
||||
format!(
|
||||
" prog-move v{} ({:?}) -> v{} ({:?})",
|
||||
src.vreg().vreg(),
|
||||
src_constraint,
|
||||
dst.vreg().vreg(),
|
||||
dst_constraint,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
// N.B.: in order to integrate with the move
|
||||
// resolution that joins LRs in general, we
|
||||
// conceptually treat the move as happening
|
||||
// between the move inst's After and the next
|
||||
// inst's Before. Thus the src LR goes up to
|
||||
// (exclusive) next-inst-pre, and the dst LR
|
||||
// starts at next-inst-pre. We have to take
|
||||
// care in our move insertion to handle this
|
||||
// like other inter-inst moves, i.e., at
|
||||
// `Regular` priority, so it properly happens
|
||||
// in parallel with other inter-LR moves.
|
||||
//
|
||||
// Why the progpoint between move and next
|
||||
// inst, and not the progpoint between prev
|
||||
// inst and move? Because a move can be the
|
||||
// first inst in a block, but cannot be the
|
||||
// last; so the following progpoint is always
|
||||
// within the same block, while the previous
|
||||
// one may be an inter-block point (and the
|
||||
// After of the prev inst in a different
|
||||
// block).
|
||||
|
||||
// Handle the def w.r.t. liveranges: trim the
|
||||
// start of the range and mark it dead at this
|
||||
// point in our backward scan.
|
||||
let pos = ProgPoint::before(inst.next());
|
||||
let mut dst_lr = vreg_ranges[dst.vreg().vreg()];
|
||||
if !live.get(dst.vreg().vreg()) {
|
||||
let from = pos;
|
||||
let to = pos.next();
|
||||
dst_lr = self.add_liverange_to_vreg(
|
||||
VRegIndex::new(dst.vreg().vreg()),
|
||||
CodeRange { from, to },
|
||||
);
|
||||
trace!(" -> invalid LR for def; created {:?}", dst_lr);
|
||||
}
|
||||
trace!(" -> has existing LR {:?}", dst_lr);
|
||||
// Trim the LR to start here.
|
||||
if self.ranges[dst_lr.index()].range.from
|
||||
== self.cfginfo.block_entry[block.index()]
|
||||
{
|
||||
trace!(" -> started at block start; trimming to {:?}", pos);
|
||||
self.ranges[dst_lr.index()].range.from = pos;
|
||||
}
|
||||
self.ranges[dst_lr.index()].set_flag(LiveRangeFlag::StartsAtDef);
|
||||
live.set(dst.vreg().vreg(), false);
|
||||
vreg_ranges[dst.vreg().vreg()] = LiveRangeIndex::invalid();
|
||||
|
||||
// Handle the use w.r.t. liveranges: make it live
|
||||
// and create an initial LR back to the start of
|
||||
// the block.
|
||||
let pos = ProgPoint::after(inst);
|
||||
let src_lr = if !live.get(src.vreg().vreg()) {
|
||||
let range = CodeRange {
|
||||
from: self.cfginfo.block_entry[block.index()],
|
||||
to: pos.next(),
|
||||
};
|
||||
let src_lr = self
|
||||
.add_liverange_to_vreg(VRegIndex::new(src.vreg().vreg()), range);
|
||||
vreg_ranges[src.vreg().vreg()] = src_lr;
|
||||
src_lr
|
||||
} else {
|
||||
vreg_ranges[src.vreg().vreg()]
|
||||
};
|
||||
|
||||
trace!(" -> src LR {:?}", src_lr);
|
||||
|
||||
// Add to live-set.
|
||||
let src_is_dead_after_move = !live.get(src.vreg().vreg());
|
||||
live.set(src.vreg().vreg(), true);
|
||||
|
||||
// Add to program-moves lists.
|
||||
self.prog_move_srcs.push((
|
||||
(VRegIndex::new(src.vreg().vreg()), inst),
|
||||
Allocation::none(),
|
||||
));
|
||||
self.prog_move_dsts.push((
|
||||
(VRegIndex::new(dst.vreg().vreg()), inst.next()),
|
||||
Allocation::none(),
|
||||
));
|
||||
self.stats.prog_moves += 1;
|
||||
if src_is_dead_after_move {
|
||||
self.stats.prog_moves_dead_src += 1;
|
||||
self.prog_move_merges.push((src_lr, dst_lr));
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Preprocess defs and uses. Specifically, if there
|
||||
// are any fixed-reg-constrained defs at Late position
|
||||
// and fixed-reg-constrained uses at Early position
|
||||
@@ -959,12 +812,9 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
}
|
||||
}
|
||||
|
||||
for range in 0..self.ranges.len() {
|
||||
self.ranges[range].uses.reverse();
|
||||
debug_assert!(self.ranges[range]
|
||||
.uses
|
||||
.windows(2)
|
||||
.all(|win| win[0].pos <= win[1].pos));
|
||||
for range in &mut self.ranges {
|
||||
range.uses.reverse();
|
||||
debug_assert!(range.uses.windows(2).all(|win| win[0].pos <= win[1].pos));
|
||||
}
|
||||
|
||||
// Insert safepoint virtual stack uses, if needed.
|
||||
@@ -1019,11 +869,6 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
|
||||
self.blockparam_ins.sort_unstable_by_key(|x| x.key());
|
||||
self.blockparam_outs.sort_unstable_by_key(|x| x.key());
|
||||
self.prog_move_srcs.sort_unstable_by_key(|(pos, _)| *pos);
|
||||
self.prog_move_dsts.sort_unstable_by_key(|(pos, _)| *pos);
|
||||
|
||||
trace!("prog_move_srcs = {:?}", self.prog_move_srcs);
|
||||
trace!("prog_move_dsts = {:?}", self.prog_move_dsts);
|
||||
|
||||
self.stats.initial_liverange_count = self.ranges.len();
|
||||
self.stats.blockparam_ins_count = self.blockparam_ins.len();
|
||||
@@ -1032,7 +877,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
|
||||
pub fn fixup_multi_fixed_vregs(&mut self) {
|
||||
// Do a fixed-reg cleanup pass: if there are any LiveRanges with
|
||||
// multiple uses (or defs) at the same ProgPoint and there is
|
||||
// multiple uses at the same ProgPoint and there is
|
||||
// more than one FixedReg constraint at that ProgPoint, we
|
||||
// need to record all but one of them in a special fixup list
|
||||
// and handle them later; otherwise, bundle-splitting to
|
||||
@@ -1154,15 +999,13 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
}
|
||||
}
|
||||
|
||||
for &(clobber, pos) in &extra_clobbers {
|
||||
for (clobber, pos) in extra_clobbers.drain(..) {
|
||||
let range = CodeRange {
|
||||
from: pos,
|
||||
to: pos.next(),
|
||||
};
|
||||
self.add_liverange_to_preg(range, clobber);
|
||||
}
|
||||
|
||||
extra_clobbers.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ use super::{
|
||||
use crate::{
|
||||
ion::data_structures::BlockparamOut, Function, Inst, OperandConstraint, OperandKind, PReg,
|
||||
};
|
||||
use alloc::format;
|
||||
use smallvec::smallvec;
|
||||
|
||||
impl<'a, F: Function> Env<'a, F> {
|
||||
@@ -132,7 +133,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
// `to` bundle is empty -- just move the list over from
|
||||
// `from` and set `bundle` up-link on all ranges.
|
||||
trace!(" -> to bundle{} is empty; trivial merge", to.index());
|
||||
let list = std::mem::replace(&mut self.bundles[from.index()].ranges, smallvec![]);
|
||||
let list = core::mem::replace(&mut self.bundles[from.index()].ranges, smallvec![]);
|
||||
for entry in &list {
|
||||
self.ranges[entry.index.index()].bundle = to;
|
||||
|
||||
@@ -170,7 +171,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
// Two non-empty lists of LiveRanges: concatenate and
|
||||
// sort. This is faster than a mergesort-like merge into a new
|
||||
// list, empirically.
|
||||
let from_list = std::mem::replace(&mut self.bundles[from.index()].ranges, smallvec![]);
|
||||
let from_list = core::mem::replace(&mut self.bundles[from.index()].ranges, smallvec![]);
|
||||
for entry in &from_list {
|
||||
self.ranges[entry.index.index()].bundle = to;
|
||||
}
|
||||
@@ -213,7 +214,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
}
|
||||
|
||||
if self.bundles[from.index()].spillset != self.bundles[to.index()].spillset {
|
||||
let from_vregs = std::mem::replace(
|
||||
let from_vregs = core::mem::replace(
|
||||
&mut self.spillsets[self.bundles[from.index()].spillset.index()].vregs,
|
||||
smallvec![],
|
||||
);
|
||||
@@ -351,28 +352,6 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
self.merge_bundles(from_bundle, to_bundle);
|
||||
}
|
||||
|
||||
// Attempt to merge move srcs/dsts.
|
||||
for i in 0..self.prog_move_merges.len() {
|
||||
let (src, dst) = self.prog_move_merges[i];
|
||||
trace!("trying to merge move src LR {:?} to dst LR {:?}", src, dst);
|
||||
let src = self.resolve_merged_lr(src);
|
||||
let dst = self.resolve_merged_lr(dst);
|
||||
trace!(
|
||||
"resolved LR-construction merging chains: move-merge is now src LR {:?} to dst LR {:?}",
|
||||
src,
|
||||
dst
|
||||
);
|
||||
|
||||
let src_bundle = self.ranges[src.index()].bundle;
|
||||
debug_assert!(src_bundle.is_valid());
|
||||
let dest_bundle = self.ranges[dst.index()].bundle;
|
||||
debug_assert!(dest_bundle.is_valid());
|
||||
self.stats.prog_move_merge_attempt += 1;
|
||||
if self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle) {
|
||||
self.stats.prog_move_merge_success += 1;
|
||||
}
|
||||
}
|
||||
|
||||
trace!("done merging bundles");
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,9 @@
|
||||
use crate::cfg::CFGInfo;
|
||||
use crate::ssa::validate_ssa;
|
||||
use crate::{Function, MachineEnv, Output, PReg, ProgPoint, RegAllocError, RegClass};
|
||||
use std::collections::HashMap;
|
||||
use alloc::vec;
|
||||
use alloc::vec::Vec;
|
||||
use hashbrown::HashMap;
|
||||
|
||||
pub(crate) mod data_structures;
|
||||
pub use data_structures::Stats;
|
||||
@@ -34,6 +36,8 @@ pub(crate) mod process;
|
||||
use process::*;
|
||||
use smallvec::smallvec;
|
||||
pub(crate) mod dump;
|
||||
mod fast_alloc;
|
||||
mod fast_alloc2;
|
||||
pub(crate) mod moves;
|
||||
pub(crate) mod spill;
|
||||
pub(crate) mod stackmap;
|
||||
@@ -71,10 +75,6 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
extra_spillslots_by_class: [smallvec![], smallvec![]],
|
||||
preferred_victim_by_class: [PReg::invalid(), PReg::invalid()],
|
||||
|
||||
prog_move_srcs: Vec::with_capacity(n / 2),
|
||||
prog_move_dsts: Vec::with_capacity(n / 2),
|
||||
prog_move_merges: Vec::with_capacity(n / 2),
|
||||
|
||||
multi_fixed_reg_fixups: vec![],
|
||||
inserted_moves: vec![],
|
||||
edits: Vec::with_capacity(n),
|
||||
@@ -86,7 +86,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
|
||||
stats: Stats::default(),
|
||||
|
||||
debug_annotations: std::collections::HashMap::new(),
|
||||
debug_annotations: hashbrown::HashMap::new(),
|
||||
annotations_enabled,
|
||||
|
||||
conflict_set: Default::default(),
|
||||
@@ -122,14 +122,24 @@ pub fn run<F: Function>(
|
||||
mach_env: &MachineEnv,
|
||||
enable_annotations: bool,
|
||||
enable_ssa_checker: bool,
|
||||
use_fast_alloc: bool,
|
||||
) -> Result<Output, RegAllocError> {
|
||||
let cfginfo = CFGInfo::new(func)?;
|
||||
let mut cfginfo = None;
|
||||
|
||||
if enable_ssa_checker {
|
||||
validate_ssa(func, &cfginfo)?;
|
||||
cfginfo = Some(CFGInfo::new(func, !use_fast_alloc)?);
|
||||
validate_ssa(func, cfginfo.as_ref().unwrap())?;
|
||||
}
|
||||
|
||||
let mut env = Env::new(func, mach_env, cfginfo, enable_annotations);
|
||||
if use_fast_alloc {
|
||||
return fast_alloc::run(func, mach_env);
|
||||
}
|
||||
|
||||
if cfginfo.is_none() {
|
||||
cfginfo = Some(CFGInfo::new(func, !use_fast_alloc)?);
|
||||
}
|
||||
|
||||
let mut env = Env::new(func, mach_env, cfginfo.take().unwrap(), enable_annotations);
|
||||
env.init()?;
|
||||
|
||||
env.run()?;
|
||||
|
||||
145
src/ion/moves.rs
145
src/ion/moves.rs
@@ -22,12 +22,13 @@ use crate::ion::data_structures::{
|
||||
use crate::ion::reg_traversal::RegTraversalIter;
|
||||
use crate::moves::{MoveAndScratchResolver, ParallelMoves};
|
||||
use crate::{
|
||||
Allocation, Block, Edit, Function, Inst, InstPosition, OperandConstraint, OperandKind,
|
||||
OperandPos, PReg, ProgPoint, RegClass, SpillSlot, VReg,
|
||||
Allocation, Block, Edit, Function, FxHashMap, Inst, InstPosition, OperandConstraint,
|
||||
OperandKind, OperandPos, PReg, ProgPoint, RegClass, SpillSlot, VReg,
|
||||
};
|
||||
use fxhash::FxHashMap;
|
||||
use alloc::vec::Vec;
|
||||
use alloc::{format, vec};
|
||||
use core::fmt::Debug;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::fmt::Debug;
|
||||
|
||||
impl<'a, F: Function> Env<'a, F> {
|
||||
pub fn is_start_of_block(&self, pos: ProgPoint) -> bool {
|
||||
@@ -179,8 +180,6 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
|
||||
let mut blockparam_in_idx = 0;
|
||||
let mut blockparam_out_idx = 0;
|
||||
let mut prog_move_src_idx = 0;
|
||||
let mut prog_move_dst_idx = 0;
|
||||
for vreg in 0..self.vregs.len() {
|
||||
let vreg = VRegIndex::new(vreg);
|
||||
if !self.is_vreg_used(vreg) {
|
||||
@@ -190,7 +189,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
// For each range in each vreg, insert moves or
|
||||
// half-moves. We also scan over `blockparam_ins` and
|
||||
// `blockparam_outs`, which are sorted by (block, vreg),
|
||||
// and over program-move srcs/dsts to fill in allocations.
|
||||
// to fill in allocations.
|
||||
let mut prev = LiveRangeIndex::invalid();
|
||||
for range_idx in 0..self.vregs[vreg.index()].ranges.len() {
|
||||
let entry = self.vregs[vreg.index()].ranges[range_idx];
|
||||
@@ -496,9 +495,9 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
// this case returns the index of the first
|
||||
// entry that is greater as an `Err`.
|
||||
if label_vreg.vreg() < vreg.index() {
|
||||
std::cmp::Ordering::Less
|
||||
core::cmp::Ordering::Less
|
||||
} else {
|
||||
std::cmp::Ordering::Greater
|
||||
core::cmp::Ordering::Greater
|
||||
}
|
||||
})
|
||||
.unwrap_err();
|
||||
@@ -517,96 +516,13 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
continue;
|
||||
}
|
||||
|
||||
let from = std::cmp::max(label_from, range.from);
|
||||
let to = std::cmp::min(label_to, range.to);
|
||||
let from = core::cmp::max(label_from, range.from);
|
||||
let to = core::cmp::min(label_to, range.to);
|
||||
|
||||
self.debug_locations.push((label, from, to, alloc));
|
||||
}
|
||||
}
|
||||
|
||||
// Scan over program move srcs/dsts to fill in allocations.
|
||||
|
||||
// Move srcs happen at `After` of a given
|
||||
// inst. Compute [from, to) semi-inclusive range of
|
||||
// inst indices for which we should fill in the source
|
||||
// with this LR's allocation.
|
||||
//
|
||||
// range from inst-Before or inst-After covers cur
|
||||
// inst's After; so includes move srcs from inst.
|
||||
let move_src_start = (vreg, range.from.inst());
|
||||
// range to (exclusive) inst-Before or inst-After
|
||||
// covers only prev inst's After; so includes move
|
||||
// srcs to (exclusive) inst.
|
||||
let move_src_end = (vreg, range.to.inst());
|
||||
trace!(
|
||||
"vreg {:?} range {:?}: looking for program-move sources from {:?} to {:?}",
|
||||
vreg,
|
||||
range,
|
||||
move_src_start,
|
||||
move_src_end
|
||||
);
|
||||
while prog_move_src_idx < self.prog_move_srcs.len()
|
||||
&& self.prog_move_srcs[prog_move_src_idx].0 < move_src_start
|
||||
{
|
||||
trace!(" -> skipping idx {}", prog_move_src_idx);
|
||||
prog_move_src_idx += 1;
|
||||
}
|
||||
while prog_move_src_idx < self.prog_move_srcs.len()
|
||||
&& self.prog_move_srcs[prog_move_src_idx].0 < move_src_end
|
||||
{
|
||||
trace!(
|
||||
" -> setting idx {} ({:?}) to alloc {:?}",
|
||||
prog_move_src_idx,
|
||||
self.prog_move_srcs[prog_move_src_idx].0,
|
||||
alloc
|
||||
);
|
||||
self.prog_move_srcs[prog_move_src_idx].1 = alloc;
|
||||
prog_move_src_idx += 1;
|
||||
}
|
||||
|
||||
// move dsts happen at Before point.
|
||||
//
|
||||
// Range from inst-Before includes cur inst, while inst-After includes only next inst.
|
||||
let move_dst_start = if range.from.pos() == InstPosition::Before {
|
||||
(vreg, range.from.inst())
|
||||
} else {
|
||||
(vreg, range.from.inst().next())
|
||||
};
|
||||
// Range to (exclusive) inst-Before includes prev
|
||||
// inst, so to (exclusive) cur inst; range to
|
||||
// (exclusive) inst-After includes cur inst, so to
|
||||
// (exclusive) next inst.
|
||||
let move_dst_end = if range.to.pos() == InstPosition::Before {
|
||||
(vreg, range.to.inst())
|
||||
} else {
|
||||
(vreg, range.to.inst().next())
|
||||
};
|
||||
trace!(
|
||||
"vreg {:?} range {:?}: looking for program-move dests from {:?} to {:?}",
|
||||
vreg,
|
||||
range,
|
||||
move_dst_start,
|
||||
move_dst_end
|
||||
);
|
||||
while prog_move_dst_idx < self.prog_move_dsts.len()
|
||||
&& self.prog_move_dsts[prog_move_dst_idx].0 < move_dst_start
|
||||
{
|
||||
trace!(" -> skipping idx {}", prog_move_dst_idx);
|
||||
prog_move_dst_idx += 1;
|
||||
}
|
||||
while prog_move_dst_idx < self.prog_move_dsts.len()
|
||||
&& self.prog_move_dsts[prog_move_dst_idx].0 < move_dst_end
|
||||
{
|
||||
trace!(
|
||||
" -> setting idx {} ({:?}) to alloc {:?}",
|
||||
prog_move_dst_idx,
|
||||
self.prog_move_dsts[prog_move_dst_idx].0,
|
||||
alloc
|
||||
);
|
||||
self.prog_move_dsts[prog_move_dst_idx].1 = alloc;
|
||||
prog_move_dst_idx += 1;
|
||||
}
|
||||
|
||||
prev = entry.index;
|
||||
}
|
||||
}
|
||||
@@ -714,7 +630,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
}
|
||||
|
||||
// Handle multi-fixed-reg constraints by copying.
|
||||
for fixup in std::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) {
|
||||
for fixup in core::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) {
|
||||
let from_alloc = self.get_alloc(fixup.pos.inst(), fixup.from_slot as usize);
|
||||
let to_alloc = Allocation::reg(PReg::from_index(fixup.to_preg.index()));
|
||||
trace!(
|
||||
@@ -820,42 +736,6 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
}
|
||||
}
|
||||
|
||||
// Sort the prog-moves lists and insert moves to reify the
|
||||
// input program's move operations.
|
||||
self.prog_move_srcs
|
||||
.sort_unstable_by_key(|((_, inst), _)| *inst);
|
||||
self.prog_move_dsts
|
||||
.sort_unstable_by_key(|((_, inst), _)| inst.prev());
|
||||
let prog_move_srcs = std::mem::replace(&mut self.prog_move_srcs, vec![]);
|
||||
let prog_move_dsts = std::mem::replace(&mut self.prog_move_dsts, vec![]);
|
||||
debug_assert_eq!(prog_move_srcs.len(), prog_move_dsts.len());
|
||||
for (&((_, from_inst), from_alloc), &((to_vreg, to_inst), to_alloc)) in
|
||||
prog_move_srcs.iter().zip(prog_move_dsts.iter())
|
||||
{
|
||||
trace!(
|
||||
"program move at inst {:?}: alloc {:?} -> {:?} (v{})",
|
||||
from_inst,
|
||||
from_alloc,
|
||||
to_alloc,
|
||||
to_vreg.index(),
|
||||
);
|
||||
debug_assert!(from_alloc.is_some());
|
||||
debug_assert!(to_alloc.is_some());
|
||||
debug_assert_eq!(from_inst, to_inst.prev());
|
||||
// N.B.: these moves happen with the *same* priority as
|
||||
// LR-to-LR moves, because they work just like them: they
|
||||
// connect a use at one progpoint (move-After) with a def
|
||||
// at an adjacent progpoint (move+1-Before), so they must
|
||||
// happen in parallel with all other LR-to-LR moves.
|
||||
self.insert_move(
|
||||
ProgPoint::before(to_inst),
|
||||
InsertMovePrio::Regular,
|
||||
from_alloc,
|
||||
to_alloc,
|
||||
self.vreg(to_vreg),
|
||||
);
|
||||
}
|
||||
|
||||
// Sort the debug-locations vector; we provide this
|
||||
// invariant to the client.
|
||||
self.debug_locations.sort_unstable();
|
||||
@@ -986,6 +866,9 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
to: pos_prio.pos.next(),
|
||||
});
|
||||
let get_reg = || {
|
||||
if let Some(reg) = self.env.scratch_by_class[regclass as usize] {
|
||||
return Some(Allocation::reg(reg));
|
||||
}
|
||||
while let Some(preg) = scratch_iter.next() {
|
||||
if !self.pregs[preg.index()]
|
||||
.allocations
|
||||
|
||||
@@ -22,12 +22,11 @@ use crate::{
|
||||
CodeRange, BUNDLE_MAX_NORMAL_SPILL_WEIGHT, MAX_SPLITS_PER_SPILLSET,
|
||||
MINIMAL_BUNDLE_SPILL_WEIGHT, MINIMAL_FIXED_BUNDLE_SPILL_WEIGHT,
|
||||
},
|
||||
Allocation, Function, Inst, InstPosition, OperandConstraint, OperandKind, PReg, ProgPoint,
|
||||
RegAllocError,
|
||||
Allocation, Function, FxHashSet, Inst, InstPosition, OperandConstraint, OperandKind, PReg,
|
||||
ProgPoint, RegAllocError,
|
||||
};
|
||||
use fxhash::FxHashSet;
|
||||
use core::fmt::Debug;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::fmt::Debug;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum AllocRegResult {
|
||||
@@ -159,7 +158,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
trace!(" -> conflict bundle {:?}", conflict_bundle);
|
||||
if self.conflict_set.insert(conflict_bundle) {
|
||||
conflicts.push(conflict_bundle);
|
||||
max_conflict_weight = std::cmp::max(
|
||||
max_conflict_weight = core::cmp::max(
|
||||
max_conflict_weight,
|
||||
self.bundles[conflict_bundle.index()].cached_spill_weight(),
|
||||
);
|
||||
@@ -172,7 +171,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
}
|
||||
|
||||
if first_conflict.is_none() {
|
||||
first_conflict = Some(ProgPoint::from_index(std::cmp::max(
|
||||
first_conflict = Some(ProgPoint::from_index(core::cmp::max(
|
||||
preg_key.from,
|
||||
key.from,
|
||||
)));
|
||||
@@ -334,7 +333,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
self.bundles[bundle.index()].prio,
|
||||
final_weight
|
||||
);
|
||||
std::cmp::min(BUNDLE_MAX_NORMAL_SPILL_WEIGHT, final_weight)
|
||||
core::cmp::min(BUNDLE_MAX_NORMAL_SPILL_WEIGHT, final_weight)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -824,7 +823,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
// (up to the Before of the next inst), *unless*
|
||||
// the original LR was only over the Before (up to
|
||||
// the After) of this inst.
|
||||
let to = std::cmp::min(ProgPoint::before(u.pos.inst().next()), lr_to);
|
||||
let to = core::cmp::min(ProgPoint::before(u.pos.inst().next()), lr_to);
|
||||
|
||||
// If the last bundle was at the same inst, add a new
|
||||
// LR to the same bundle; otherwise, create a LR and a
|
||||
@@ -863,7 +862,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
|
||||
// Otherwise, create a new LR.
|
||||
let pos = ProgPoint::before(u.pos.inst());
|
||||
let pos = std::cmp::max(lr_from, pos);
|
||||
let pos = core::cmp::max(lr_from, pos);
|
||||
let cr = CodeRange { from: pos, to };
|
||||
let lr = self.create_liverange(cr);
|
||||
new_lrs.push((vreg, lr));
|
||||
@@ -1036,7 +1035,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
self.get_or_create_spill_bundle(bundle, /* create_if_absent = */ false)
|
||||
{
|
||||
let mut list =
|
||||
std::mem::replace(&mut self.bundles[bundle.index()].ranges, smallvec![]);
|
||||
core::mem::replace(&mut self.bundles[bundle.index()].ranges, smallvec![]);
|
||||
for entry in &list {
|
||||
self.ranges[entry.index.index()].bundle = spill;
|
||||
}
|
||||
@@ -1107,7 +1106,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
lowest_cost_evict_conflict_cost,
|
||||
lowest_cost_split_conflict_cost,
|
||||
) {
|
||||
(Some(a), Some(b)) => Some(std::cmp::max(a, b)),
|
||||
(Some(a), Some(b)) => Some(core::cmp::max(a, b)),
|
||||
_ => None,
|
||||
};
|
||||
match self.try_to_allocate_bundle_to_reg(bundle, preg_idx, scan_limit_cost) {
|
||||
@@ -1291,7 +1290,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
);
|
||||
let bundle_start = self.bundles[bundle.index()].ranges[0].range.from;
|
||||
let mut split_at_point =
|
||||
std::cmp::max(lowest_cost_split_conflict_point, bundle_start);
|
||||
core::cmp::max(lowest_cost_split_conflict_point, bundle_start);
|
||||
let requeue_with_reg = lowest_cost_split_conflict_reg;
|
||||
|
||||
// Adjust `split_at_point` if it is within a deeper loop
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
//! Redundant-move elimination.
|
||||
|
||||
use crate::{Allocation, VReg};
|
||||
use fxhash::FxHashMap;
|
||||
use crate::{Allocation, FxHashMap, VReg};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
@@ -112,9 +111,9 @@ impl RedundantMoveEliminator {
|
||||
pub fn clear_alloc(&mut self, alloc: Allocation) {
|
||||
trace!(" redundant move eliminator: clear {:?}", alloc);
|
||||
if let Some(ref mut existing_copies) = self.reverse_allocs.get_mut(&alloc) {
|
||||
for to_inval in existing_copies.iter() {
|
||||
for to_inval in existing_copies.drain(..) {
|
||||
trace!(" -> clear existing copy: {:?}", to_inval);
|
||||
if let Some(val) = self.allocs.get_mut(to_inval) {
|
||||
if let Some(val) = self.allocs.get_mut(&to_inval) {
|
||||
match val {
|
||||
RedundantMoveState::Copy(_, Some(vreg)) => {
|
||||
*val = RedundantMoveState::Orig(*vreg);
|
||||
@@ -122,9 +121,8 @@ impl RedundantMoveEliminator {
|
||||
_ => *val = RedundantMoveState::None,
|
||||
}
|
||||
}
|
||||
self.allocs.remove(to_inval);
|
||||
self.allocs.remove(&to_inval);
|
||||
}
|
||||
existing_copies.clear();
|
||||
}
|
||||
self.allocs.remove(&alloc);
|
||||
}
|
||||
|
||||
@@ -78,7 +78,7 @@ impl<'a> RegTraversalIter<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::iter::Iterator for RegTraversalIter<'a> {
|
||||
impl<'a> core::iter::Iterator for RegTraversalIter<'a> {
|
||||
type Item = PReg;
|
||||
|
||||
fn next(&mut self) -> Option<PReg> {
|
||||
|
||||
@@ -138,7 +138,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
let mut success = false;
|
||||
// Never probe the same element more than once: limit the
|
||||
// attempt count to the number of slots in existence.
|
||||
for _attempt in 0..std::cmp::min(self.slots_by_size[size].slots.len(), MAX_ATTEMPTS) {
|
||||
for _attempt in 0..core::cmp::min(self.slots_by_size[size].slots.len(), MAX_ATTEMPTS) {
|
||||
// Note: this indexing of `slots` is always valid
|
||||
// because either the `slots` list is empty and the
|
||||
// iteration limit above consequently means we don't
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
|
||||
//! Stackmap computation.
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use super::{Env, ProgPoint, VRegIndex};
|
||||
use crate::{ion::data_structures::u64_key, Function};
|
||||
|
||||
|
||||
114
src/lib.rs
114
src/lib.rs
@@ -11,6 +11,12 @@
|
||||
*/
|
||||
|
||||
#![allow(dead_code)]
|
||||
#![no_std]
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
extern crate std;
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
// Even when trace logging is disabled, the trace macro has a significant
|
||||
// performance cost so we disable it in release builds.
|
||||
@@ -28,6 +34,11 @@ macro_rules! trace_enabled {
|
||||
};
|
||||
}
|
||||
|
||||
use core::hash::BuildHasherDefault;
|
||||
use rustc_hash::FxHasher;
|
||||
type FxHashMap<K, V> = hashbrown::HashMap<K, V, BuildHasherDefault<FxHasher>>;
|
||||
type FxHashSet<V> = hashbrown::HashSet<V, BuildHasherDefault<FxHasher>>;
|
||||
|
||||
pub(crate) mod cfg;
|
||||
pub(crate) mod domtree;
|
||||
pub mod indexset;
|
||||
@@ -38,6 +49,8 @@ pub mod ssa;
|
||||
|
||||
#[macro_use]
|
||||
mod index;
|
||||
|
||||
use alloc::vec::Vec;
|
||||
pub use index::{Block, Inst, InstRange, InstRangeIter};
|
||||
|
||||
pub mod checker;
|
||||
@@ -142,8 +155,8 @@ impl PReg {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for PReg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
impl core::fmt::Debug for PReg {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"PReg(hw = {}, class = {:?}, index = {})",
|
||||
@@ -154,8 +167,8 @@ impl std::fmt::Debug for PReg {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for PReg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
impl core::fmt::Display for PReg {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
let class = match self.class() {
|
||||
RegClass::Int => "i",
|
||||
RegClass::Float => "f",
|
||||
@@ -266,8 +279,7 @@ impl From<&MachineEnv> for PRegSet {
|
||||
/// A virtual register. Contains a virtual register number and a
|
||||
/// class.
|
||||
///
|
||||
/// A virtual register ("vreg") corresponds to an SSA value for SSA
|
||||
/// input, or just a register when we allow for non-SSA input. All
|
||||
/// A virtual register ("vreg") corresponds to an SSA value. All
|
||||
/// dataflow in the input program is specified via flow through a
|
||||
/// virtual register; even uses of specially-constrained locations,
|
||||
/// such as fixed physical registers, are done by using vregs, because
|
||||
@@ -312,8 +324,8 @@ impl VReg {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for VReg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
impl core::fmt::Debug for VReg {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"VReg(vreg = {}, class = {:?})",
|
||||
@@ -323,8 +335,8 @@ impl std::fmt::Debug for VReg {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for VReg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
impl core::fmt::Display for VReg {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
write!(f, "v{}", self.vreg())
|
||||
}
|
||||
}
|
||||
@@ -383,8 +395,8 @@ impl SpillSlot {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for SpillSlot {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
impl core::fmt::Display for SpillSlot {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
write!(f, "stack{}", self.index())
|
||||
}
|
||||
}
|
||||
@@ -414,8 +426,8 @@ pub enum OperandConstraint {
|
||||
Reuse(usize),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for OperandConstraint {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
impl core::fmt::Display for OperandConstraint {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
match self {
|
||||
Self::Any => write!(f, "any"),
|
||||
Self::Reg => write!(f, "reg"),
|
||||
@@ -797,14 +809,14 @@ impl Operand {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Operand {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
std::fmt::Display::fmt(self, f)
|
||||
impl core::fmt::Debug for Operand {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
core::fmt::Display::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Operand {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
impl core::fmt::Display for Operand {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
match (self.kind(), self.pos()) {
|
||||
(OperandKind::Def, OperandPos::Late) | (OperandKind::Use, OperandPos::Early) => {
|
||||
write!(f, "{:?}", self.kind())?;
|
||||
@@ -837,14 +849,14 @@ pub struct Allocation {
|
||||
bits: u32,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Allocation {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
std::fmt::Display::fmt(self, f)
|
||||
impl core::fmt::Debug for Allocation {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
core::fmt::Display::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Allocation {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
impl core::fmt::Display for Allocation {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
match self.kind() {
|
||||
AllocationKind::None => write!(f, "none"),
|
||||
AllocationKind::Reg => write!(f, "{}", self.as_reg().unwrap()),
|
||||
@@ -1029,10 +1041,6 @@ pub trait Function {
|
||||
false
|
||||
}
|
||||
|
||||
/// Determine whether an instruction is a move; if so, return the
|
||||
/// Operands for (src, dst).
|
||||
fn is_move(&self, insn: Inst) -> Option<(Operand, Operand)>;
|
||||
|
||||
// --------------------------
|
||||
// Instruction register slots
|
||||
// --------------------------
|
||||
@@ -1178,8 +1186,8 @@ pub struct ProgPoint {
|
||||
bits: u32,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ProgPoint {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
impl core::fmt::Debug for ProgPoint {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"progpoint{}{}",
|
||||
@@ -1326,19 +1334,43 @@ impl<'a> Iterator for OutputIter<'a> {
|
||||
pub struct MachineEnv {
|
||||
/// Preferred physical registers for each class. These are the
|
||||
/// registers that will be allocated first, if free.
|
||||
///
|
||||
/// If an explicit scratch register is provided in `scratch_by_class` then
|
||||
/// it must not appear in this list.
|
||||
pub preferred_regs_by_class: [Vec<PReg>; 2],
|
||||
|
||||
/// Non-preferred physical registers for each class. These are the
|
||||
/// registers that will be allocated if a preferred register is
|
||||
/// not available; using one of these is considered suboptimal,
|
||||
/// but still better than spilling.
|
||||
///
|
||||
/// If an explicit scratch register is provided in `scratch_by_class` then
|
||||
/// it must not appear in this list.
|
||||
pub non_preferred_regs_by_class: [Vec<PReg>; 2],
|
||||
|
||||
/// Optional dedicated scratch register per class. This is needed to perform
|
||||
/// moves between registers when cyclic move patterns occur. The
|
||||
/// register should not be placed in either the preferred or
|
||||
/// non-preferred list (i.e., it is not otherwise allocatable).
|
||||
///
|
||||
/// Note that the register allocator will freely use this register
|
||||
/// between instructions, but *within* the machine code generated
|
||||
/// by a single (regalloc-level) instruction, the client is free
|
||||
/// to use the scratch register. E.g., if one "instruction" causes
|
||||
/// the emission of two machine-code instructions, this lowering
|
||||
/// can use the scratch register between them.
|
||||
///
|
||||
/// If a scratch register is not provided then the register allocator will
|
||||
/// automatically allocate one as needed, spilling a value to the stack if
|
||||
/// necessary.
|
||||
pub scratch_by_class: [Option<PReg>; 2],
|
||||
|
||||
/// Some `PReg`s can be designated as locations on the stack rather than
|
||||
/// actual registers. These can be used to tell the register allocator about
|
||||
/// pre-defined stack slots used for function arguments and return values.
|
||||
///
|
||||
/// `PReg`s in this list cannot be used as an allocatable register.
|
||||
/// `PReg`s in this list cannot be used as an allocatable or scratch
|
||||
/// register.
|
||||
pub fixed_stack_slots: Vec<PReg>,
|
||||
}
|
||||
|
||||
@@ -1403,9 +1435,9 @@ impl Output {
|
||||
// binary_search_by returns the index of where it would have
|
||||
// been inserted in Err.
|
||||
if pos < ProgPoint::before(inst_range.first()) {
|
||||
std::cmp::Ordering::Less
|
||||
core::cmp::Ordering::Less
|
||||
} else {
|
||||
std::cmp::Ordering::Greater
|
||||
core::cmp::Ordering::Greater
|
||||
}
|
||||
})
|
||||
.unwrap_err();
|
||||
@@ -1444,12 +1476,13 @@ pub enum RegAllocError {
|
||||
TooManyLiveRegs,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RegAllocError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
impl core::fmt::Display for RegAllocError {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
write!(f, "{:?}", self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
impl std::error::Error for RegAllocError {}
|
||||
|
||||
/// Run the allocator.
|
||||
@@ -1458,7 +1491,13 @@ pub fn run<F: Function>(
|
||||
env: &MachineEnv,
|
||||
options: &RegallocOptions,
|
||||
) -> Result<Output, RegAllocError> {
|
||||
ion::run(func, env, options.verbose_log, options.validate_ssa)
|
||||
ion::run(
|
||||
func,
|
||||
env,
|
||||
options.verbose_log,
|
||||
options.validate_ssa,
|
||||
options.fast_alloc,
|
||||
)
|
||||
}
|
||||
|
||||
/// Options for allocation.
|
||||
@@ -1469,4 +1508,7 @@ pub struct RegallocOptions {
|
||||
|
||||
/// Run the SSA validator before allocating registers.
|
||||
pub validate_ssa: bool,
|
||||
|
||||
/// Use a fast algorithm that may return suboptimal results.
|
||||
pub fast_alloc: bool,
|
||||
}
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
*/
|
||||
|
||||
use crate::{ion::data_structures::u64_key, Allocation, PReg};
|
||||
use core::fmt::Debug;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::fmt::Debug;
|
||||
|
||||
/// A list of moves to be performed in sequence, with auxiliary data
|
||||
/// attached to each.
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
//! Fast postorder computation.
|
||||
|
||||
use crate::Block;
|
||||
use alloc::vec;
|
||||
use alloc::vec::Vec;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
pub fn calculate<'a, SuccFn: Fn(Block) -> &'a [Block]>(
|
||||
@@ -16,8 +18,7 @@ pub fn calculate<'a, SuccFn: Fn(Block) -> &'a [Block]>(
|
||||
let mut ret = vec![];
|
||||
|
||||
// State: visited-block map, and explicit DFS stack.
|
||||
let mut visited = vec![];
|
||||
visited.resize(num_blocks, false);
|
||||
let mut visited = vec![false; num_blocks];
|
||||
|
||||
struct State<'a> {
|
||||
block: Block,
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
|
||||
//! SSA-related utilities.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use alloc::vec;
|
||||
use hashbrown::HashSet;
|
||||
|
||||
use crate::cfg::CFGInfo;
|
||||
use crate::{Block, Function, Inst, OperandKind, RegAllocError, VReg};
|
||||
|
||||
Reference in New Issue
Block a user