first impl

This commit is contained in:
T0b1
2023-04-14 18:18:15 +02:00
parent 993074a974
commit e2061d2e04
5 changed files with 998 additions and 52 deletions

746
bench_res/first_impl.txt Normal file
View File

@@ -0,0 +1,746 @@
execution :: cycles :: benchmarks/hex-simd/benchmark.wasm
Δ = 345792.24 ± 11833.61 (confidence = 99%)
tmp/wasmtime_main.so is 7.36x to 7.81x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[47340 52501.32 190872] tmp/wasmtime_main.so
[365580 398293.56 542808] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-base64/benchmark.wasm
Δ = 1970853027.76 ± 6979113.24 (confidence = 99%)
tmp/wasmtime_main.so is 5.72x to 5.76x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[409600548 415637278.00 500391972] tmp/wasmtime_main.so
[2366821945 2386490305.76 2513644956] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-seqhash/benchmark.wasm
Δ = 46825190455.59 ± 127821876.94 (confidence = 99%)
tmp/wasmtime_main.so is 5.61x to 5.63x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[9956663856 10133140692.71 10544512428] tmp/wasmtime_main.so
[56352845593 56958331148.30 58657017995] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/meshoptimizer/benchmark.wasm
Δ = 20106010623.89 ± 139125939.44 (confidence = 99%)
tmp/wasmtime_main.so is 5.00x to 5.06x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[4815750421 4986455753.06 5219471557] tmp/wasmtime_main.so
[24698961323 25092466376.95 28372067424] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/blake3-scalar/benchmark.wasm
Δ = 1575653.08 ± 47716.10 (confidence = 99%)
tmp/wasmtime_main.so is 4.89x to 5.14x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[368532 392537.50 565414] tmp/wasmtime_main.so
[1832400 1968190.58 3389364] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-xchacha20/benchmark.wasm
Δ = 24043961.50 ± 125310.65 (confidence = 99%)
tmp/wasmtime_main.so is 4.86x to 4.91x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[5992344 6189280.03 8772444] tmp/wasmtime_main.so
[29597724 30233241.53 31265100] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/intgemm-simd/benchmark.wasm
Δ = 11790836237.58 ± 49258123.99 (confidence = 99%)
tmp/wasmtime_main.so is 4.77x to 4.80x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[3095937432 3117382542.95 3205026611] tmp/wasmtime_main.so
[14643634139 14908218780.53 15634756441] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/blake3-simd/benchmark.wasm
Δ = 1519308.88 ± 43278.47 (confidence = 99%)
tmp/wasmtime_main.so is 4.60x to 4.81x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[387108 410404.35 723458] tmp/wasmtime_main.so
[1825704 1929713.23 2913948] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/regex/benchmark.wasm
Δ = 881482054.62 ± 3682662.85 (confidence = 99%)
tmp/wasmtime_main.so is 4.56x to 4.59x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[243603396 246887000.62 255784177] tmp/wasmtime_main.so
[1114811388 1128369055.24 1169802000] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/bz2/benchmark.wasm
Δ = 357450293.22 ± 1549780.77 (confidence = 99%)
tmp/wasmtime_main.so is 4.37x to 4.40x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[103914719 105636434.33 114783229] tmp/wasmtime_main.so
[457542179 463086727.55 486926532] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-xblabla20/benchmark.wasm
Δ = 11882263.43 ± 71185.74 (confidence = 99%)
tmp/wasmtime_main.so is 4.29x to 4.33x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[3426984 3590238.35 4005036] tmp/wasmtime_main.so
[15081912 15472501.78 16709212] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-gimli/benchmark.wasm
Δ = 18851736.16 ± 108983.69 (confidence = 99%)
tmp/wasmtime_main.so is 4.27x to 4.31x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[5576796 5731722.69 6796368] tmp/wasmtime_main.so
[24021649 24583458.85 26077788] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-ratelimit/benchmark.wasm
Δ = 153466317.39 ± 987719.38 (confidence = 99%)
tmp/wasmtime_main.so is 4.09x to 4.13x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[47873412 49380867.56 51816349] tmp/wasmtime_main.so
[198883511 202847184.95 230991985] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-sieve/benchmark.wasm
Δ = 2917751368.96 ± 57306722.93 (confidence = 99%)
tmp/wasmtime_main.so is 3.89x to 4.01x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[909005867 990025561.72 1153883089] tmp/wasmtime_main.so
[3782853326 3907776930.68 5243343479] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-keccak/benchmark.wasm
Δ = 119732620.34 ± 7294642.74 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 3.70x to 4.05x faster than tmp/wasmtime_main.so!
[147621024 161371775.17 242314921] tmp/wasmtime_main.so
[34151940 41639154.83 84386880] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/blind-sig/benchmark.wasm
Δ = 761849961.52 ± 2634639.77 (confidence = 99%)
tmp/wasmtime_main.so is 3.42x to 3.44x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[307939679 313264599.69 338735987] tmp/wasmtime_main.so
[1066340736 1075114561.21 1133205407] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-ctype/benchmark.wasm
Δ = 2051594010.51 ± 10906506.10 (confidence = 99%)
tmp/wasmtime_main.so is 3.27x to 3.29x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[889577173 899301369.62 941784589] tmp/wasmtime_main.so
[2911972285 2950895380.13 3124423188] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-ed25519/benchmark.wasm
Δ = 26578151841.89 ± 149545429.75 (confidence = 99%)
tmp/wasmtime_main.so is 3.22x to 3.25x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[11730048733 11882966637.26 14017886712] tmp/wasmtime_main.so
[37780663608 38461118479.15 40601000196] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/spidermonkey/benchmark.wasm
Δ = 2276822920.39 ± 18631445.19 (confidence = 99%)
tmp/wasmtime_main.so is 3.13x to 3.16x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[1043458487 1062503164.66 1125259668] tmp/wasmtime_main.so
[3268311912 3339326085.05 3827897531] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-heapsort/benchmark.wasm
Δ = 6602600891.49 ± 27508477.21 (confidence = 99%)
tmp/wasmtime_main.so is 2.92x to 2.94x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[3407309028 3425050525.40 3470297579] tmp/wasmtime_main.so
[9912739643 10027651416.89 10301668992] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-ed25519/benchmark.wasm
Δ = 288104513.37 ± 9735499.60 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 2.81x to 2.94x faster than tmp/wasmtime_main.so!
[418829472 441878125.69 536600161] tmp/wasmtime_main.so
[138407509 153773612.32 240393780] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-keccak/benchmark.wasm
Δ = 58212400.30 ± 1705214.89 (confidence = 99%)
tmp/wasmtime_main.so is 2.81x to 2.92x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[29494764 31245282.73 47847564] tmp/wasmtime_main.so
[83949804 89457683.03 119340505] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-minicsv/benchmark.wasm
Δ = 3205249194.33 ± 18911258.58 (confidence = 99%)
tmp/wasmtime_main.so is 2.69x to 2.71x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[1851027229 1882842939.40 1942785576] tmp/wasmtime_main.so
[5022056881 5088092133.73 5357629800] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-matrix/benchmark.wasm
Δ = 1231745436.50 ± 5707664.58 (confidence = 99%)
tmp/wasmtime_main.so is 2.65x to 2.67x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[735814657 741951718.69 767631096] tmp/wasmtime_main.so
[1958603184 1973697155.19 2110201200] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm
Δ = 11897294.54 ± 122079.69 (confidence = 99%)
tmp/wasmtime_main.so is 2.41x to 2.44x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[8045424 8373467.87 10410839] tmp/wasmtime_main.so
[18899892 20270762.41 21215088] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-switch/benchmark.wasm
Δ = 204609415.39 ± 1598010.13 (confidence = 99%)
tmp/wasmtime_main.so is 2.40x to 2.42x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[142621019 145191091.72 152865612] tmp/wasmtime_main.so
[342664380 349800507.11 374884265] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-fib2/benchmark.wasm
Δ = 3236916155.06 ± 31613056.68 (confidence = 99%)
tmp/wasmtime_main.so is 1.89x to 1.91x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[3467702447 3591461293.98 3742357248] tmp/wasmtime_main.so
[6737314536 6828377449.04 7322136227] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/bz2/benchmark.wasm
Δ = 128770793.59 ± 7597401.75 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.83x to 1.94x faster than tmp/wasmtime_main.so!
[253517005 274203142.89 353141172] tmp/wasmtime_main.so
[128667349 145432349.30 217816236] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-switch/benchmark.wasm
Δ = 56438333.94 ± 7491692.69 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.58x to 1.76x faster than tmp/wasmtime_main.so!
[128765772 140553794.72 224904593] tmp/wasmtime_main.so
[69970176 84115460.78 148415723] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-sieve/benchmark.wasm
Δ = 23774967.71 ± 6707285.75 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.33x to 1.59x faster than tmp/wasmtime_main.so!
[63403956 75258605.97 137083860] tmp/wasmtime_main.so
[40798729 51483638.26 112740804] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-seqhash/benchmark.wasm
Δ = 12569754.53 ± 7044529.04 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.20x to 1.71x faster than tmp/wasmtime_main.so!
[30574403 40371559.79 114436297] tmp/wasmtime_main.so
[20335572 27801805.26 96580117] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-fib2/benchmark.wasm
Δ = 23443643.25 ± 6736282.13 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.32x to 1.58x faster than tmp/wasmtime_main.so!
[65146465 75878581.65 138438253] tmp/wasmtime_main.so
[41823324 52434938.40 112445784] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-nestedloop/benchmark.wasm
Δ = 23131358.99 ± 7040047.00 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.31x to 1.58x faster than tmp/wasmtime_main.so!
[64246896 75196786.96 137354796] tmp/wasmtime_main.so
[41746068 52065427.97 115099595] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-random/benchmark.wasm
Δ = 22180731.35 ± 7399359.46 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.28x to 1.57x faster than tmp/wasmtime_main.so!
[64062720 74368751.23 143287308] tmp/wasmtime_main.so
[41346179 52188019.88 122821552] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-matrix/benchmark.wasm
Δ = 23690644.52 ± 6102793.29 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.30x to 1.52x faster than tmp/wasmtime_main.so!
[70124616 81369306.83 143019432] tmp/wasmtime_main.so
[47633327 57678662.31 117778860] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-ctype/benchmark.wasm
Δ = 22184906.81 ± 6133213.98 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.30x to 1.52x faster than tmp/wasmtime_main.so!
[66025727 76273529.68 133334568] tmp/wasmtime_main.so
[44490997 54088622.87 117528480] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-base64/benchmark.wasm
Δ = 24097448.13 ± 6012729.31 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.30x to 1.51x faster than tmp/wasmtime_main.so!
[73622664 83718801.21 143594028] tmp/wasmtime_main.so
[50266079 59621353.08 112276475] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-ratelimit/benchmark.wasm
Δ = 23856950.87 ± 6442900.19 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.29x to 1.50x faster than tmp/wasmtime_main.so!
[72478980 83870223.68 143493840] tmp/wasmtime_main.so
[48298068 60013272.81 120159900] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-ackermann/benchmark.wasm
Δ = 25699694.19 ± 6260900.16 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.29x to 1.48x faster than tmp/wasmtime_main.so!
[74991707 92036959.79 150451669] tmp/wasmtime_main.so
[50091336 66337265.60 124640460] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/blake3-simd/benchmark.wasm
Δ = 11917241.79 ± 6120474.86 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.13x to 1.41x faster than tmp/wasmtime_main.so!
[45112464 56382136.04 124882526] tmp/wasmtime_main.so
[35716282 44464894.25 91276271] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/meshoptimizer/benchmark.wasm
Δ = 18268175.66 ± 12995516.57 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.06x to 1.35x faster than tmp/wasmtime_main.so!
[90282564 106583377.46 172790496] tmp/wasmtime_main.so
[68407524 88315201.80 509777639] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/blake3-simd/benchmark.wasm
Δ = 27081.34 ± 12431.92 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.11x to 1.29x faster than tmp/wasmtime_main.so!
[130644 165156.84 363528] tmp/wasmtime_main.so
[104508 138075.50 262080] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/regex/benchmark.wasm
Δ = 168480153.80 ± 16692141.15 (confidence = 99%)
tmp/wasmtime_main.so is 1.17x to 1.21x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[814605301 881137381.33 1113401592] tmp/wasmtime_main.so
[986134213 1049617535.13 1163637937] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/regex/benchmark.wasm
Δ = 75630.26 ± 29814.65 (confidence = 99%)
tmp/wasmtime_main.so is 1.08x to 1.19x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[442296 542271.59 863567] tmp/wasmtime_main.so
[465264 617901.85 877788] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/intgemm-simd/benchmark.wasm
Δ = 66461228.45 ± 8191971.57 (confidence = 99%)
tmp/wasmtime_main.so is 1.12x to 1.15x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[447294061 482654101.73 581954652] tmp/wasmtime_main.so
[515086921 549115330.18 605796372] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-xchacha20/benchmark.wasm
Δ = 17534.86 ± 16060.09 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.01x to 1.25x faster than tmp/wasmtime_main.so!
[107316 153205.19 385056] tmp/wasmtime_main.so
[91044 135670.33 278172] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-xblabla20/benchmark.wasm
Δ = 17051.40 ± 14734.77 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.02x to 1.23x faster than tmp/wasmtime_main.so!
[107388 153577.80 470376] tmp/wasmtime_main.so
[89028 136526.40 303156] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/spidermonkey/benchmark.wasm
Δ = 943126119.95 ± 164284193.32 (confidence = 99%)
tmp/wasmtime_main.so is 1.08x to 1.12x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[9021892788 9599833086.61 11546087076] tmp/wasmtime_main.so
[9978139369 10542959206.56 11402534341] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-sieve/benchmark.wasm
Δ = 15852.97 ± 11475.80 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.03x to 1.16x faster than tmp/wasmtime_main.so!
[147888 182349.73 321804] tmp/wasmtime_main.so
[134604 166496.76 312156] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-ackermann/benchmark.wasm
Δ = 17296.92 ± 15752.90 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.01x to 1.18x faster than tmp/wasmtime_main.so!
[164268 205161.84 472392] tmp/wasmtime_main.so
[144648 187864.92 385272] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/blake3-scalar/benchmark.wasm
Δ = 19482138.65 ± 6431347.52 (confidence = 99%)
tmp/wasmtime_main.so is 1.06x to 1.12x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[199113301 217289885.09 288788372] tmp/wasmtime_main.so
[220745124 236772023.74 306132408] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-ctype/benchmark.wasm
Δ = 14145.49 ± 10664.87 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.02x to 1.14x faster than tmp/wasmtime_main.so!
[148536 187047.01 297001] tmp/wasmtime_main.so
[140796 172901.52 304200] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-fib2/benchmark.wasm
Δ = 13499.29 ± 13201.25 (confidence = 99%)
wasmtime/target/release/libwasmtime_bench_api.so is 1.00x to 1.16x faster than tmp/wasmtime_main.so!
[152316 184395.97 362197] tmp/wasmtime_main.so
[131760 170896.68 335880] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/hex-simd/benchmark.wasm
Δ = 14701654.36 ± 7722355.67 (confidence = 99%)
tmp/wasmtime_main.so is 1.03x to 1.11x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[187211376 205180475.08 282106044] tmp/wasmtime_main.so
[198510264 219882129.44 284877252] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-memmove/benchmark.wasm
Δ = 2750310.45 ± 142406.89 (confidence = 99%)
tmp/wasmtime_main.so is 1.07x to 1.07x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[38182931 39016919.77 40971169] tmp/wasmtime_main.so
[41085541 41767230.22 42774553] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-random/benchmark.wasm
Δ = 36038424.04 ± 857900.63 (confidence = 99%)
tmp/wasmtime_main.so is 1.05x to 1.05x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[669296593 673652082.43 682587649] tmp/wasmtime_main.so
[705019320 709690506.47 716079853] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm
Δ = 16340886.19 ± 7483652.81 (confidence = 99%)
tmp/wasmtime_main.so is 1.02x to 1.06x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[365185836 401650579.46 470100098] tmp/wasmtime_main.so
[392953752 417991465.65 484354441] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/blind-sig/benchmark.wasm
Δ = 20118786.07 ± 13423183.44 (confidence = 99%)
tmp/wasmtime_main.so is 1.01x to 1.06x faster than wasmtime/target/release/libwasmtime_bench_api.so!
[515765845 573135907.05 772132716] tmp/wasmtime_main.so
[550619425 593254693.12 704204705] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-ackermann/benchmark.wasm
No difference in performance.
[504 2611.08 183780] tmp/wasmtime_main.so
[540 751.68 1296] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-gimli/benchmark.wasm
No difference in performance.
[7092684 15453006.13 84934656] tmp/wasmtime_main.so
[5726555 10028783.61 45153864] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-switch/benchmark.wasm
No difference in performance.
[149832 238550.40 6512688] tmp/wasmtime_main.so
[134496 170645.40 450072] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-xchacha20/benchmark.wasm
No difference in performance.
[24658307 35091571.34 110428560] tmp/wasmtime_main.so
[19204344 28347037.36 97184879] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/bz2/benchmark.wasm
No difference in performance.
[188748 273112.54 5585866] tmp/wasmtime_main.so
[186048 224155.80 456408] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-memmove/benchmark.wasm
No difference in performance.
[24383809 34604974.24 110344320] tmp/wasmtime_main.so
[19233251 28705933.78 97765704] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-xblabla20/benchmark.wasm
No difference in performance.
[24109811 35190977.32 111999528] tmp/wasmtime_main.so
[19003032 29830501.31 108302112] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-heapsort/benchmark.wasm
No difference in performance.
[25825104 36284578.84 98705267] tmp/wasmtime_main.so
[20983716 31298895.22 81193032] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/shootout-minicsv/benchmark.wasm
No difference in performance.
[10303488 19091732.71 95030243] tmp/wasmtime_main.so
[9972324 17266338.21 86076144] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/noop/benchmark.wasm
No difference in performance.
[94788 124173.74 283140] tmp/wasmtime_main.so
[85608 113814.01 316476] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-nestedloop/benchmark.wasm
No difference in performance.
[126900 173581.19 297072] tmp/wasmtime_main.so
[105984 159178.68 334008] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-matrix/benchmark.wasm
No difference in performance.
[151956 181208.16 367992] tmp/wasmtime_main.so
[142164 167950.09 523081] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-memmove/benchmark.wasm
No difference in performance.
[107568 151283.52 417996] tmp/wasmtime_main.so
[96552 140387.04 551844] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-keccak/benchmark.wasm
No difference in performance.
[104436 161983.80 325260] tmp/wasmtime_main.so
[90216 150572.50 284940] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-seqhash/benchmark.wasm
No difference in performance.
[133848 161197.20 268956] tmp/wasmtime_main.so
[122652 152241.84 404784] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-minicsv/benchmark.wasm
No difference in performance.
[130500 156812.06 297000] tmp/wasmtime_main.so
[120384 148217.39 255168] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-ed25519/benchmark.wasm
No difference in performance.
[145584 173084.40 341136] tmp/wasmtime_main.so
[135252 165701.52 326484] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/blake3-scalar/benchmark.wasm
No difference in performance.
[184392 212838.50 532908] tmp/wasmtime_main.so
[165528 203829.85 354708] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/hex-simd/benchmark.wasm
No difference in performance.
[209052 273107.88 423720] tmp/wasmtime_main.so
[187704 261877.34 427392] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-random/benchmark.wasm
No difference in performance.
[145620 177143.04 297648] tmp/wasmtime_main.so
[135648 169880.05 291132] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/blind-sig/benchmark.wasm
No difference in performance.
[192348 226756.79 469619] tmp/wasmtime_main.so
[182232 217715.76 380268] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-base64/benchmark.wasm
No difference in performance.
[149328 174356.64 295560] tmp/wasmtime_main.so
[138492 167680.81 302436] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/intgemm-simd/benchmark.wasm
No difference in performance.
[178056 207135.36 320040] tmp/wasmtime_main.so
[167760 200227.68 337680] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/noop/benchmark.wasm
No difference in performance.
[432 615.96 1008] tmp/wasmtime_main.so
[396 637.20 1116] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/meshoptimizer/benchmark.wasm
No difference in performance.
[179676 212660.64 351072] tmp/wasmtime_main.so
[167292 219323.16 435815] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-ratelimit/benchmark.wasm
No difference in performance.
[140976 172092.24 285480] tmp/wasmtime_main.so
[135180 167450.04 334512] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/spidermonkey/benchmark.wasm
No difference in performance.
[579672 632428.92 826056] tmp/wasmtime_main.so
[545976 645584.76 1196496] wasmtime/target/release/libwasmtime_bench_api.so
compilation :: cycles :: benchmarks/noop/benchmark.wasm
No difference in performance.
[17553708 26606839.96 100542240] tmp/wasmtime_main.so
[18061812 26108957.05 84559104] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm
No difference in performance.
[237672 275609.15 440675] tmp/wasmtime_main.so
[215172 279336.61 616680] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-heapsort/benchmark.wasm
No difference in performance.
[130644 158666.76 299736] tmp/wasmtime_main.so
[121464 157300.20 329328] wasmtime/target/release/libwasmtime_bench_api.so
instantiation :: cycles :: benchmarks/shootout-gimli/benchmark.wasm
No difference in performance.
[84456 123928.57 425556] tmp/wasmtime_main.so
[86580 122936.41 249768] wasmtime/target/release/libwasmtime_bench_api.so
execution :: cycles :: benchmarks/shootout-nestedloop/benchmark.wasm
No difference in performance.
[468 739.08 1476] tmp/wasmtime_main.so
[468 736.56 2700] wasmtime/target/release/libwasmtime_bench_api.so

View File

@@ -11,6 +11,6 @@ fuzz_target!(|func: Func| {
let _ = env_logger::try_init(); let _ = env_logger::try_init();
log::trace!("func:\n{:?}", func); log::trace!("func:\n{:?}", func);
let env = regalloc2::fuzzing::func::machine_env(); let env = regalloc2::fuzzing::func::machine_env();
let _out = let _out = regalloc2::fuzzing::ion::run(&func, &env, false, false, true)
regalloc2::fuzzing::ion::run(&func, &env, false, false).expect("regalloc did not succeed"); .expect("regalloc did not succeed");
}); });

View File

@@ -40,8 +40,8 @@ fuzz_target!(|testcase: TestCase| {
let _ = env_logger::try_init(); let _ = env_logger::try_init();
log::trace!("func:\n{:?}", func); log::trace!("func:\n{:?}", func);
let env = regalloc2::fuzzing::func::machine_env(); let env = regalloc2::fuzzing::func::machine_env();
let out = let out = regalloc2::fuzzing::ion::run(&func, &env, true, false, true)
regalloc2::fuzzing::ion::run(&func, &env, true, false).expect("regalloc did not succeed"); .expect("regalloc did not succeed");
let mut checker = Checker::new(&func, &env); let mut checker = Checker::new(&func, &env);
checker.prepare(&out); checker.prepare(&out);

View File

@@ -37,6 +37,6 @@ impl Arbitrary<'_> for TestCase {
} }
fuzz_target!(|t: TestCase| { fuzz_target!(|t: TestCase| {
let cfginfo = CFGInfo::new(&t.f).expect("could not create CFG info"); let cfginfo = CFGInfo::new(&t.f, true).expect("could not create CFG info");
validate_ssa(&t.f, &cfginfo).expect("invalid SSA"); validate_ssa(&t.f, &cfginfo).expect("invalid SSA");
}); });

View File

@@ -1,8 +1,10 @@
use alloc::vec; use alloc::format;
use alloc::vec::Vec; use alloc::vec::Vec;
use alloc::{string::String, vec};
use smallvec::{smallvec, SmallVec}; use smallvec::{smallvec, SmallVec};
use std::{convert::TryFrom, println}; use std::{convert::TryFrom, println};
use crate::InstPosition;
use crate::{ use crate::{
cfg::CFGInfo, Allocation, Block, Edit, Function, Inst, MachineEnv, Operand, OperandConstraint, cfg::CFGInfo, Allocation, Block, Edit, Function, Inst, MachineEnv, Operand, OperandConstraint,
OperandKind, OperandPos, Output, PReg, PRegSet, ProgPoint, RegAllocError, RegClass, SpillSlot, OperandKind, OperandPos, Output, PReg, PRegSet, ProgPoint, RegAllocError, RegClass, SpillSlot,
@@ -26,17 +28,18 @@ struct PRegData {
#[derive(Default, Clone, Copy)] #[derive(Default, Clone, Copy)]
struct BlockData { struct BlockData {
pub allocated: bool, pub reg_allocated: bool,
pub params_allocated: bool,
} }
struct ReadOnlyData { struct ReadOnlyData {
pub preorder: Vec<Block>, pub postorder: Vec<Block>,
pub reg_order_int: Vec<PReg>, pub reg_order_int: Vec<PReg>,
pub reg_order_float: Vec<PReg>, pub reg_order_float: Vec<PReg>,
} }
impl ReadOnlyData { impl ReadOnlyData {
pub fn init<F: Function>(func: &F, mach_env: &MachineEnv) -> Self { pub fn init<F: Function>(func: &F, mach_env: &MachineEnv, cfg: &CFGInfo) -> Self {
let reg_order_int = { let reg_order_int = {
let class = RegClass::Int as usize; let class = RegClass::Int as usize;
let amount = mach_env.preferred_regs_by_class[class].len() let amount = mach_env.preferred_regs_by_class[class].len()
@@ -60,14 +63,14 @@ impl ReadOnlyData {
Self { Self {
reg_order_int, reg_order_int,
reg_order_float, reg_order_float,
preorder: Self::calc_preorder(func), postorder: cfg.postorder.clone(),
} }
} }
pub fn reg_order(&self, class: RegClass) -> &[PReg] { pub fn reg_order(&self, class: RegClass) -> &[PReg] {
match class { match class {
RegClass::Int => &self.reg_order_int, RegClass::Int => &self.reg_order_int,
RegClass::Float => &self.reg_order_int, RegClass::Float => &self.reg_order_float,
} }
} }
@@ -173,6 +176,22 @@ impl<'a, F: Function> FastAllocState<'a, F> {
let mut inst_alloc_offsets = Vec::with_capacity(func.num_insts()); let mut inst_alloc_offsets = Vec::with_capacity(func.num_insts());
inst_alloc_offsets.resize(func.num_insts(), 0); inst_alloc_offsets.resize(func.num_insts(), 0);
// we need to create the alloc array beforehand because it needs to be sorted by inst index
// which we cannot guarantee when iterating through the blocks in reverse post-order
let allocs = {
let block_count = func.num_blocks();
let mut cur_idx = 0;
for i in 0..block_count {
for inst in func.block_insns(Block::new(i)).iter() {
inst_alloc_offsets[inst.index()] = cur_idx as u32;
cur_idx += func.inst_operands(inst).len();
}
}
let mut allocs = Vec::with_capacity(cur_idx);
allocs.resize(cur_idx, Allocation::none());
allocs
};
Self { Self {
vregs, vregs,
pregs, pregs,
@@ -186,7 +205,7 @@ impl<'a, F: Function> FastAllocState<'a, F> {
stack_slot_count_float: u8::try_from(func.spillslot_size(RegClass::Float)) stack_slot_count_float: u8::try_from(func.spillslot_size(RegClass::Float))
.expect("that's a big float"), .expect("that's a big float"),
allocs: Vec::new(), allocs,
inst_alloc_offsets, inst_alloc_offsets,
edits: Vec::new(), edits: Vec::new(),
safepoint_slots: Vec::new(), safepoint_slots: Vec::new(),
@@ -296,8 +315,15 @@ impl<'a, F: Function> FastAllocState<'a, F> {
} }
pub fn assign_preg(&mut self, preg: PReg, vreg: VReg) { pub fn assign_preg(&mut self, preg: PReg, vreg: VReg) {
// TODO: somewhere assign_preg is called without making sure the vreg is clear (or inspite of it)
// need to make sure this is intended behavior
self.clear_preg(preg);
self.pregs[preg.index()].vreg = Some(vreg.vreg() as u32); self.pregs[preg.index()].vreg = Some(vreg.vreg() as u32);
self.vregs[vreg.vreg()].preg = Some(preg); self.vregs[vreg.vreg()].preg = Some(preg);
if self.vregs[vreg.vreg()].reftype {
self.reftype_vregs_in_pregs_count += 1;
}
} }
pub fn clear_preg(&mut self, preg: PReg) { pub fn clear_preg(&mut self, preg: PReg) {
@@ -356,18 +382,38 @@ pub fn run<F: Function>(
} }
let mut state = FastAllocState::init(func, mach_env, &cfg); let mut state = FastAllocState::init(func, mach_env, &cfg);
let const_state = ReadOnlyData::init(func, mach_env); let const_state = ReadOnlyData::init(func, mach_env, &cfg);
let len = const_state.preorder.len(); state.blocks[func.entry_block().index()].params_allocated = true;
let len = const_state.postorder.len();
for i in 0..len { for i in 0..len {
let block = const_state.preorder[i]; let block = const_state.postorder[len - 1 - i];
// when handling branches later, we already have an input mapping for the block params if state.blocks[block.index()].reg_allocated {
state.blocks[block.index()].allocated = true; trace!("Block {} already allocated. Skipping", i);
continue;
}
state.blocks[block.index()].reg_allocated = true;
trace!("Allocating block {}", i);
allocate_block_insts(&mut state, &const_state, block)?; allocate_block_insts(&mut state, &const_state, block)?;
handle_out_block_params(&mut state, &const_state, block)?; handle_out_block_params(&mut state, &const_state, block)?;
} }
// we do not iterate the blocks in their index order so the order of edits might not be sorted by progpoint
// however it should be nearly sorted
state.edits.sort_by_key(|entry| entry.0);
trace!("Edits:");
for edit in &state.edits {
match edit.1 {
Edit::Move { from, to } => {
trace!("At {:?} from {} to {}", edit.0, from, to);
}
}
}
Ok(Output { Ok(Output {
num_spillslots: state.cur_stack_slot_idx as usize, num_spillslots: state.cur_stack_slot_idx as usize,
edits: state.edits, edits: state.edits,
@@ -385,15 +431,29 @@ fn allocate_block_insts<'a, F: Function>(
block: Block, block: Block,
) -> Result<(), RegAllocError> { ) -> Result<(), RegAllocError> {
for inst in state.func.block_insns(block).iter() { for inst in state.func.block_insns(block).iter() {
let edit_start_idx = state.edits.len();
let clobbers = state.func.inst_clobbers(inst); let clobbers = state.func.inst_clobbers(inst);
let operands = state.func.inst_operands(inst); let operands = state.func.inst_operands(inst);
let req_refs_on_stack = state.func.requires_refs_on_stack(inst); let req_refs_on_stack = state.func.requires_refs_on_stack(inst);
let alloc_idx = state.inst_alloc_offsets[inst.index()] as usize;
let alloc_idx = state.allocs.len(); trace!(
state.inst_alloc_offsets[inst.index()] = alloc_idx as u32; "Allocating Inst {} (refs_on_stack: {}, is_ret: {}, is_branch: {}, alloc_idx: {})",
state inst.index(),
.allocs req_refs_on_stack,
.resize(alloc_idx + operands.len(), Allocation::none()); state.func.is_ret(inst),
state.func.is_branch(inst),
alloc_idx
);
let mut str = String::new();
for preg in clobbers {
if str.is_empty() {
str.push_str(&format!("{}", preg));
} else {
str.push_str(&format!(", {}", preg));
}
}
trace!("Clobbers: {}", str);
// keep track of which pregs where allocated so we can clear them later on // keep track of which pregs where allocated so we can clear them later on
// TODO: wouldnt need this if we look up the inst a vreg was allocated at // TODO: wouldnt need this if we look up the inst a vreg was allocated at
@@ -413,6 +473,7 @@ fn allocate_block_insts<'a, F: Function>(
for vreg in state.reftype_vregs { for vreg in state.reftype_vregs {
let data = &state.vregs[vreg.vreg()]; let data = &state.vregs[vreg.vreg()];
if let Some(slot) = data.slot_idx { if let Some(slot) = data.slot_idx {
trace!("Marking vreg {} as saved on stack at {}", vreg, slot);
state state
.safepoint_slots .safepoint_slots
.push((pos, Allocation::stack(SpillSlot::new(slot as usize)))); .push((pos, Allocation::stack(SpillSlot::new(slot as usize))));
@@ -421,9 +482,32 @@ fn allocate_block_insts<'a, F: Function>(
} }
// we allocate fixed defs/uses and stack allocations first // we allocate fixed defs/uses and stack allocations first
trace!("First alloc pass");
for (i, op) in operands.iter().enumerate() { for (i, op) in operands.iter().enumerate() {
let vreg = op.vreg(); let vreg = op.vreg();
trace!("Operand {}: {}", i, op);
if vreg == VReg::invalid() {
// it seems cranelift emits fixed reg uses with invalid vregs, handle them here
// TODO: treat them like normal vregs by just using last_vreg_index+1 for them?
match op.constraint() {
OperandConstraint::FixedReg(reg) => {
state.clear_preg(reg);
regs_allocated.push(reg);
state.allocs[alloc_idx + i] = Allocation::reg(reg);
trace!("Chose {} for operand {}", reg, i);
late_write_disallow_regs.add(reg);
}
_ => {
panic!(
"Invalid op constraint {:?} for invalid vreg",
op.constraint()
);
}
}
continue;
}
match op.constraint() { match op.constraint() {
OperandConstraint::FixedReg(reg) => { OperandConstraint::FixedReg(reg) => {
match op.kind() { match op.kind() {
@@ -450,10 +534,12 @@ fn allocate_block_insts<'a, F: Function>(
return Err(RegAllocError::TooManyLiveRegs); return Err(RegAllocError::TooManyLiveRegs);
} }
trace!("Operand {}'s allocation may not be used by a late def", i);
// late uses cannot share a register with late defs // late uses cannot share a register with late defs
late_write_disallow_regs.add(reg); late_write_disallow_regs.add(reg);
} }
regs_allocated.push(reg); regs_allocated.push(reg);
trace!("Chose {} for operand {}", reg, i);
} }
OperandKind::Def => { OperandKind::Def => {
if op.pos() == OperandPos::Late { if op.pos() == OperandPos::Late {
@@ -469,6 +555,7 @@ fn allocate_block_insts<'a, F: Function>(
panic!("early def shares reg or is clobbered"); panic!("early def shares reg or is clobbered");
return Err(RegAllocError::TooManyLiveRegs); return Err(RegAllocError::TooManyLiveRegs);
} }
trace!("Operand {}'s allocation may not be used by a late def", i);
// early defs cannot share a register with late defs // early defs cannot share a register with late defs
late_write_disallow_regs.add(reg); late_write_disallow_regs.add(reg);
} }
@@ -527,11 +614,11 @@ fn allocate_block_insts<'a, F: Function>(
state.move_to_stack(tmp_reg, vreg, ProgPoint::after(inst)); state.move_to_stack(tmp_reg, vreg, ProgPoint::after(inst));
regs_allocated.push(tmp_reg); regs_allocated.push(tmp_reg);
} else { } else {
println!("2");
state.alloc_stack_slot(vreg); state.alloc_stack_slot(vreg);
state.move_to_stack(reg, vreg, ProgPoint::after(inst)); state.move_to_stack(reg, vreg, ProgPoint::after(inst));
regs_allocated.push(reg); regs_allocated.push(reg);
} }
trace!("Chose {} for operand {}", reg, i);
} }
} }
} }
@@ -542,14 +629,16 @@ fn allocate_block_insts<'a, F: Function>(
if let Some(slot) = &state.vregs[vreg.vreg()].slot_idx { if let Some(slot) = &state.vregs[vreg.vreg()].slot_idx {
state.allocs[alloc_idx + i] = state.allocs[alloc_idx + i] =
Allocation::stack(SpillSlot::new(*slot as usize)); Allocation::stack(SpillSlot::new(*slot as usize));
trace!("Chose slot {} for operand {}", slot, i);
} else { } else {
return Err(RegAllocError::SSA(vreg, inst)); return Err(RegAllocError::SSA(vreg, inst));
} }
} }
OperandKind::Def => { OperandKind::Def => {
state.allocs[alloc_idx + i] = Allocation::stack(SpillSlot::new( let slot = state.alloc_stack_slot(vreg);
state.alloc_stack_slot(vreg) as usize, state.allocs[alloc_idx + i] =
)); Allocation::stack(SpillSlot::new(slot as usize));
trace!("Chose slot {} for operand {}", slot, i);
} }
} }
} }
@@ -558,12 +647,18 @@ fn allocate_block_insts<'a, F: Function>(
} }
// alloc non-fixed uses and early defs in registers // alloc non-fixed uses and early defs in registers
trace!("Second alloc pass");
for (i, op) in operands.iter().enumerate() { for (i, op) in operands.iter().enumerate() {
if op.kind() == OperandKind::Def && op.pos() == OperandPos::Late { if op.kind() == OperandKind::Def && op.pos() == OperandPos::Late {
continue; continue;
} }
trace!("Operand {}: {}", i, op);
let vreg = op.vreg(); let vreg = op.vreg();
if vreg == VReg::invalid() {
continue;
}
match op.constraint() { match op.constraint() {
OperandConstraint::Reg => { OperandConstraint::Reg => {
@@ -592,9 +687,10 @@ fn allocate_block_insts<'a, F: Function>(
// early def // early def
state.assign_preg(reg, vreg); state.assign_preg(reg, vreg);
state.alloc_stack_slot(vreg); state.alloc_stack_slot(vreg);
println!("3");
state.move_to_stack(reg, vreg, ProgPoint::after(inst)); state.move_to_stack(reg, vreg, ProgPoint::after(inst));
} }
trace!("Chose {} for operand {}", reg, i);
allocated = true; allocated = true;
break; break;
} }
@@ -603,6 +699,7 @@ fn allocate_block_insts<'a, F: Function>(
continue; continue;
} }
trace!("Ran out of registers for operand {}", i);
// No register available // No register available
// TODO: try to evict vreg that does not need to be in a preg // TODO: try to evict vreg that does not need to be in a preg
panic!("Out of registers: {:?}", regs_allocated); panic!("Out of registers: {:?}", regs_allocated);
@@ -616,12 +713,18 @@ fn allocate_block_insts<'a, F: Function>(
} }
// alloc non-fixed late defs and reuse // alloc non-fixed late defs and reuse
trace!("Third alloc pass");
for (i, op) in operands.iter().enumerate() { for (i, op) in operands.iter().enumerate() {
if op.kind() != OperandKind::Def || op.pos() != OperandPos::Late { if op.kind() != OperandKind::Def || op.pos() != OperandPos::Late {
continue; continue;
} }
trace!("Operand {}: {}", i, op);
let vreg = op.vreg(); let vreg = op.vreg();
if vreg == VReg::invalid() {
continue;
}
match op.constraint() { match op.constraint() {
OperandConstraint::Reg => { OperandConstraint::Reg => {
// find first non-allocated register // find first non-allocated register
@@ -639,8 +742,8 @@ fn allocate_block_insts<'a, F: Function>(
state.clear_preg(reg); state.clear_preg(reg);
state.assign_preg(reg, vreg); state.assign_preg(reg, vreg);
state.alloc_stack_slot(vreg); state.alloc_stack_slot(vreg);
println!("4");
state.move_to_stack(reg, vreg, ProgPoint::after(inst)); state.move_to_stack(reg, vreg, ProgPoint::after(inst));
trace!("Chose {} for operand {}", reg, i);
allocated = true; allocated = true;
break; break;
} }
@@ -663,7 +766,6 @@ fn allocate_block_insts<'a, F: Function>(
state.clear_preg(preg); state.clear_preg(preg);
state.assign_preg(preg, vreg); state.assign_preg(preg, vreg);
state.alloc_stack_slot(vreg); state.alloc_stack_slot(vreg);
println!("5");
state.move_to_stack(preg, vreg, ProgPoint::after(inst)); state.move_to_stack(preg, vreg, ProgPoint::after(inst));
} }
_ => { _ => {
@@ -674,8 +776,28 @@ fn allocate_block_insts<'a, F: Function>(
// clear out all allocated regs // clear out all allocated regs
for reg in regs_allocated { for reg in regs_allocated {
trace!("Clearing {}", reg);
state.clear_preg(reg); state.clear_preg(reg);
} }
// fixup edit order
let mut first_post_pos = None;
for i in edit_start_idx..state.edits.len() {
debug_assert!(state.edits[i].0.inst() == inst);
match first_post_pos {
None => {
if state.edits[i].0.pos() == InstPosition::After {
first_post_pos = Some(i);
}
}
Some(pos) => {
if state.edits[i].0.pos() == InstPosition::Before {
state.edits.swap(pos, i);
first_post_pos = Some(pos + 1);
}
}
}
}
} }
Ok(()) Ok(())
@@ -686,13 +808,52 @@ fn handle_out_block_params<'a, F: Function>(
const_state: &ReadOnlyData, const_state: &ReadOnlyData,
block: Block, block: Block,
) -> Result<(), RegAllocError> { ) -> Result<(), RegAllocError> {
trace!("Allocating outgoing blockparams for {}", block.index());
let last_inst = state.func.block_insns(block).last(); let last_inst = state.func.block_insns(block).last();
if !state.func.is_branch(last_inst) { if !state.func.is_branch(last_inst) {
trace!("Last inst {} is not a branch", last_inst.index());
return Ok(()); return Ok(());
} }
let mut pregs_used_by_br = PRegSet::empty();
{
let alloc_start = state.inst_alloc_offsets[last_inst.index()] as usize;
let alloc_end = if last_inst.index() + 1 == state.inst_alloc_offsets.len() {
state.inst_alloc_offsets.len()
} else {
state.inst_alloc_offsets[last_inst.index() + 1] as usize
};
for i in alloc_start..alloc_end {
if let Some(reg) = state.allocs[i].clone().as_reg() {
pregs_used_by_br.add(reg);
}
}
}
// wouldn't need this if the edits for this were made before the moves for the branch inst but that has its own share of problems i think
let tmp_reg_int = 'block: {
for reg in const_state.reg_order(RegClass::Int) {
if !pregs_used_by_br.contains(*reg) {
break 'block *reg;
}
}
panic!("No usable tmp_reg for block param handling");
};
let tmp_reg_float = 'block: {
for reg in const_state.reg_order(RegClass::Float) {
if !pregs_used_by_br.contains(*reg) {
break 'block *reg;
}
}
panic!("No usable tmp_reg for block param handling");
};
let succs = state.func.block_succs(block); let succs = state.func.block_succs(block);
if succs.len() == 1 && state.blocks[succs[0].index()].allocated { if succs.len() == 1 && state.blocks[succs[0].index()].params_allocated {
trace!("Only one allocated successor, moving allocations");
let succ = succs[0]; let succ = succs[0];
// move values to the already allocated places // move values to the already allocated places
let in_params = state.func.block_params(succ); let in_params = state.func.block_params(succ);
@@ -706,7 +867,11 @@ fn handle_out_block_params<'a, F: Function>(
debug_assert!(state.vregs[in_vreg.vreg()].slot_idx.is_some()); debug_assert!(state.vregs[in_vreg.vreg()].slot_idx.is_some());
debug_assert!(state.vregs[out_vreg.vreg()].slot_idx.is_some()); debug_assert!(state.vregs[out_vreg.vreg()].slot_idx.is_some());
let tmp_reg = const_state.reg_order(out_vreg.class())[0]; let tmp_reg = if out_vreg.class() == RegClass::Int {
tmp_reg_int
} else {
tmp_reg_float
};
let out_slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); let out_slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap();
let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap(); let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap();
@@ -726,12 +891,20 @@ fn handle_out_block_params<'a, F: Function>(
)); ));
} }
} else { } else {
trace!("Successors not allocated. Creating allocation");
let mut allocs = SmallVec::<[(VReg, u32); 4]>::new();
// set incoming block params of successor to the current stack slot // set incoming block params of successor to the current stack slot
for (i, &succ) in state.func.block_succs(block).iter().enumerate() { for (i, &succ) in state.func.block_succs(block).iter().enumerate() {
if state.blocks[succ.index()].allocated { trace!("Creating block {}", succ.index());
if state.blocks[succ.index()].params_allocated {
return Err(RegAllocError::CritEdge(block, succ)); return Err(RegAllocError::CritEdge(block, succ));
} }
// we allocate the params here
// TODO: can there be a problem if the same successor occurs multiple times?
state.blocks[succ.index()].params_allocated = true;
let in_params = state.func.block_params(succ); let in_params = state.func.block_params(succ);
let out_params = state.func.branch_blockparams(block, last_inst, i); let out_params = state.func.branch_blockparams(block, last_inst, i);
debug_assert_eq!(in_params.len(), out_params.len()); debug_assert_eq!(in_params.len(), out_params.len());
@@ -743,29 +916,56 @@ fn handle_out_block_params<'a, F: Function>(
debug_assert!(state.vregs[out_vreg.vreg()].slot_idx.is_some()); debug_assert!(state.vregs[out_vreg.vreg()].slot_idx.is_some());
let out_slot_idx = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); let out_slot_idx = state.vregs[out_vreg.vreg()].slot_idx.unwrap();
// TODO: if out_vreg dies at this edge, we could reuse its stack slot
// TODO: we should also be able to reuse the slot if the successor only has one predecessor (us); check with AE
let mut no_alias = false;
if !vregs_passed.contains(&out_vreg) { if !vregs_passed.contains(&out_vreg) {
state.vregs[in_vreg.vreg()].slot_idx = Some(out_slot_idx); let mut alloced = false;
for alloc in &allocs {
if alloc.0 != out_vreg {
continue;
}
// we can use the already moved into stack slot
state.vregs[in_vreg.vreg()].slot_idx = Some(alloc.1);
vregs_passed.push(out_vreg);
alloced = true;
break;
}
if alloced {
continue;
}
vregs_passed.push(out_vreg); vregs_passed.push(out_vreg);
no_alias = true;
}
// need to duplicate to avoid aliasing or create a new stack slot
// TODO: this creates multiple duplications for multiple blocks, can be avoided
let tmp_reg = if out_vreg.class() == RegClass::Int {
tmp_reg_int
} else { } else {
// need to duplicate to avoid aliasing tmp_reg_float
// TODO: this creates multiple duplications for multiple blocks, can be avoided };
let tmp_reg = const_state.reg_order(out_vreg.class())[0]; let slot = state.create_stack_slot(out_vreg.class());
let slot = state.create_stack_slot(out_vreg.class()); state.edits.push((
state.edits.push(( ProgPoint::before(last_inst),
ProgPoint::before(last_inst), Edit::Move {
Edit::Move { from: Allocation::stack(SpillSlot::new(out_slot_idx as usize)),
from: Allocation::stack(SpillSlot::new(out_slot_idx as usize)), to: Allocation::reg(tmp_reg),
to: Allocation::reg(tmp_reg), },
}, ));
)); state.edits.push((
state.edits.push(( ProgPoint::before(last_inst),
ProgPoint::before(last_inst), Edit::Move {
Edit::Move { from: Allocation::reg(tmp_reg),
from: Allocation::reg(tmp_reg), to: Allocation::stack(SpillSlot::new(slot as usize)),
to: Allocation::stack(SpillSlot::new(slot as usize)), },
}, ));
)); state.vregs[in_vreg.vreg()].slot_idx = Some(slot);
state.vregs[in_vreg.vreg()].slot_idx = Some(slot);
if no_alias {
allocs.push((out_vreg, slot));
} }
} }
} }