diff --git a/bench_res/first_impl.txt b/bench_res/first_impl.txt new file mode 100644 index 0000000..c8cabbc --- /dev/null +++ b/bench_res/first_impl.txt @@ -0,0 +1,746 @@ + +execution :: cycles :: benchmarks/hex-simd/benchmark.wasm + + Δ = 345792.24 ± 11833.61 (confidence = 99%) + + tmp/wasmtime_main.so is 7.36x to 7.81x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [47340 52501.32 190872] tmp/wasmtime_main.so + [365580 398293.56 542808] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-base64/benchmark.wasm + + Δ = 1970853027.76 ± 6979113.24 (confidence = 99%) + + tmp/wasmtime_main.so is 5.72x to 5.76x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [409600548 415637278.00 500391972] tmp/wasmtime_main.so + [2366821945 2386490305.76 2513644956] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-seqhash/benchmark.wasm + + Δ = 46825190455.59 ± 127821876.94 (confidence = 99%) + + tmp/wasmtime_main.so is 5.61x to 5.63x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [9956663856 10133140692.71 10544512428] tmp/wasmtime_main.so + [56352845593 56958331148.30 58657017995] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/meshoptimizer/benchmark.wasm + + Δ = 20106010623.89 ± 139125939.44 (confidence = 99%) + + tmp/wasmtime_main.so is 5.00x to 5.06x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [4815750421 4986455753.06 5219471557] tmp/wasmtime_main.so + [24698961323 25092466376.95 28372067424] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/blake3-scalar/benchmark.wasm + + Δ = 1575653.08 ± 47716.10 (confidence = 99%) + + tmp/wasmtime_main.so is 4.89x to 5.14x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [368532 392537.50 565414] tmp/wasmtime_main.so + [1832400 1968190.58 3389364] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-xchacha20/benchmark.wasm + + Δ = 24043961.50 ± 125310.65 (confidence = 99%) + + tmp/wasmtime_main.so is 4.86x to 4.91x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [5992344 6189280.03 8772444] tmp/wasmtime_main.so + [29597724 30233241.53 31265100] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/intgemm-simd/benchmark.wasm + + Δ = 11790836237.58 ± 49258123.99 (confidence = 99%) + + tmp/wasmtime_main.so is 4.77x to 4.80x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [3095937432 3117382542.95 3205026611] tmp/wasmtime_main.so + [14643634139 14908218780.53 15634756441] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/blake3-simd/benchmark.wasm + + Δ = 1519308.88 ± 43278.47 (confidence = 99%) + + tmp/wasmtime_main.so is 4.60x to 4.81x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [387108 410404.35 723458] tmp/wasmtime_main.so + [1825704 1929713.23 2913948] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/regex/benchmark.wasm + + Δ = 881482054.62 ± 3682662.85 (confidence = 99%) + + tmp/wasmtime_main.so is 4.56x to 4.59x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [243603396 246887000.62 255784177] tmp/wasmtime_main.so + [1114811388 1128369055.24 1169802000] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/bz2/benchmark.wasm + + Δ = 357450293.22 ± 1549780.77 (confidence = 99%) + + tmp/wasmtime_main.so is 4.37x to 4.40x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [103914719 105636434.33 114783229] tmp/wasmtime_main.so + [457542179 463086727.55 486926532] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-xblabla20/benchmark.wasm + + Δ = 11882263.43 ± 71185.74 (confidence = 99%) + + tmp/wasmtime_main.so is 4.29x to 4.33x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [3426984 3590238.35 4005036] tmp/wasmtime_main.so + [15081912 15472501.78 16709212] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-gimli/benchmark.wasm + + Δ = 18851736.16 ± 108983.69 (confidence = 99%) + + tmp/wasmtime_main.so is 4.27x to 4.31x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [5576796 5731722.69 6796368] tmp/wasmtime_main.so + [24021649 24583458.85 26077788] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-ratelimit/benchmark.wasm + + Δ = 153466317.39 ± 987719.38 (confidence = 99%) + + tmp/wasmtime_main.so is 4.09x to 4.13x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [47873412 49380867.56 51816349] tmp/wasmtime_main.so + [198883511 202847184.95 230991985] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-sieve/benchmark.wasm + + Δ = 2917751368.96 ± 57306722.93 (confidence = 99%) + + tmp/wasmtime_main.so is 3.89x to 4.01x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [909005867 990025561.72 1153883089] tmp/wasmtime_main.so + [3782853326 3907776930.68 5243343479] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-keccak/benchmark.wasm + + Δ = 119732620.34 ± 7294642.74 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 3.70x to 4.05x faster than tmp/wasmtime_main.so! + + [147621024 161371775.17 242314921] tmp/wasmtime_main.so + [34151940 41639154.83 84386880] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/blind-sig/benchmark.wasm + + Δ = 761849961.52 ± 2634639.77 (confidence = 99%) + + tmp/wasmtime_main.so is 3.42x to 3.44x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [307939679 313264599.69 338735987] tmp/wasmtime_main.so + [1066340736 1075114561.21 1133205407] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-ctype/benchmark.wasm + + Δ = 2051594010.51 ± 10906506.10 (confidence = 99%) + + tmp/wasmtime_main.so is 3.27x to 3.29x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [889577173 899301369.62 941784589] tmp/wasmtime_main.so + [2911972285 2950895380.13 3124423188] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-ed25519/benchmark.wasm + + Δ = 26578151841.89 ± 149545429.75 (confidence = 99%) + + tmp/wasmtime_main.so is 3.22x to 3.25x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [11730048733 11882966637.26 14017886712] tmp/wasmtime_main.so + [37780663608 38461118479.15 40601000196] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/spidermonkey/benchmark.wasm + + Δ = 2276822920.39 ± 18631445.19 (confidence = 99%) + + tmp/wasmtime_main.so is 3.13x to 3.16x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [1043458487 1062503164.66 1125259668] tmp/wasmtime_main.so + [3268311912 3339326085.05 3827897531] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-heapsort/benchmark.wasm + + Δ = 6602600891.49 ± 27508477.21 (confidence = 99%) + + tmp/wasmtime_main.so is 2.92x to 2.94x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [3407309028 3425050525.40 3470297579] tmp/wasmtime_main.so + [9912739643 10027651416.89 10301668992] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-ed25519/benchmark.wasm + + Δ = 288104513.37 ± 9735499.60 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 2.81x to 2.94x faster than tmp/wasmtime_main.so! + + [418829472 441878125.69 536600161] tmp/wasmtime_main.so + [138407509 153773612.32 240393780] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-keccak/benchmark.wasm + + Δ = 58212400.30 ± 1705214.89 (confidence = 99%) + + tmp/wasmtime_main.so is 2.81x to 2.92x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [29494764 31245282.73 47847564] tmp/wasmtime_main.so + [83949804 89457683.03 119340505] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-minicsv/benchmark.wasm + + Δ = 3205249194.33 ± 18911258.58 (confidence = 99%) + + tmp/wasmtime_main.so is 2.69x to 2.71x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [1851027229 1882842939.40 1942785576] tmp/wasmtime_main.so + [5022056881 5088092133.73 5357629800] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-matrix/benchmark.wasm + + Δ = 1231745436.50 ± 5707664.58 (confidence = 99%) + + tmp/wasmtime_main.so is 2.65x to 2.67x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [735814657 741951718.69 767631096] tmp/wasmtime_main.so + [1958603184 1973697155.19 2110201200] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm + + Δ = 11897294.54 ± 122079.69 (confidence = 99%) + + tmp/wasmtime_main.so is 2.41x to 2.44x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [8045424 8373467.87 10410839] tmp/wasmtime_main.so + [18899892 20270762.41 21215088] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-switch/benchmark.wasm + + Δ = 204609415.39 ± 1598010.13 (confidence = 99%) + + tmp/wasmtime_main.so is 2.40x to 2.42x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [142621019 145191091.72 152865612] tmp/wasmtime_main.so + [342664380 349800507.11 374884265] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-fib2/benchmark.wasm + + Δ = 3236916155.06 ± 31613056.68 (confidence = 99%) + + tmp/wasmtime_main.so is 1.89x to 1.91x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [3467702447 3591461293.98 3742357248] tmp/wasmtime_main.so + [6737314536 6828377449.04 7322136227] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/bz2/benchmark.wasm + + Δ = 128770793.59 ± 7597401.75 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.83x to 1.94x faster than tmp/wasmtime_main.so! + + [253517005 274203142.89 353141172] tmp/wasmtime_main.so + [128667349 145432349.30 217816236] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-switch/benchmark.wasm + + Δ = 56438333.94 ± 7491692.69 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.58x to 1.76x faster than tmp/wasmtime_main.so! + + [128765772 140553794.72 224904593] tmp/wasmtime_main.so + [69970176 84115460.78 148415723] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-sieve/benchmark.wasm + + Δ = 23774967.71 ± 6707285.75 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.33x to 1.59x faster than tmp/wasmtime_main.so! + + [63403956 75258605.97 137083860] tmp/wasmtime_main.so + [40798729 51483638.26 112740804] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-seqhash/benchmark.wasm + + Δ = 12569754.53 ± 7044529.04 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.20x to 1.71x faster than tmp/wasmtime_main.so! + + [30574403 40371559.79 114436297] tmp/wasmtime_main.so + [20335572 27801805.26 96580117] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-fib2/benchmark.wasm + + Δ = 23443643.25 ± 6736282.13 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.32x to 1.58x faster than tmp/wasmtime_main.so! + + [65146465 75878581.65 138438253] tmp/wasmtime_main.so + [41823324 52434938.40 112445784] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-nestedloop/benchmark.wasm + + Δ = 23131358.99 ± 7040047.00 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.31x to 1.58x faster than tmp/wasmtime_main.so! + + [64246896 75196786.96 137354796] tmp/wasmtime_main.so + [41746068 52065427.97 115099595] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-random/benchmark.wasm + + Δ = 22180731.35 ± 7399359.46 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.28x to 1.57x faster than tmp/wasmtime_main.so! + + [64062720 74368751.23 143287308] tmp/wasmtime_main.so + [41346179 52188019.88 122821552] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-matrix/benchmark.wasm + + Δ = 23690644.52 ± 6102793.29 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.30x to 1.52x faster than tmp/wasmtime_main.so! + + [70124616 81369306.83 143019432] tmp/wasmtime_main.so + [47633327 57678662.31 117778860] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-ctype/benchmark.wasm + + Δ = 22184906.81 ± 6133213.98 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.30x to 1.52x faster than tmp/wasmtime_main.so! + + [66025727 76273529.68 133334568] tmp/wasmtime_main.so + [44490997 54088622.87 117528480] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-base64/benchmark.wasm + + Δ = 24097448.13 ± 6012729.31 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.30x to 1.51x faster than tmp/wasmtime_main.so! + + [73622664 83718801.21 143594028] tmp/wasmtime_main.so + [50266079 59621353.08 112276475] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-ratelimit/benchmark.wasm + + Δ = 23856950.87 ± 6442900.19 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.29x to 1.50x faster than tmp/wasmtime_main.so! + + [72478980 83870223.68 143493840] tmp/wasmtime_main.so + [48298068 60013272.81 120159900] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-ackermann/benchmark.wasm + + Δ = 25699694.19 ± 6260900.16 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.29x to 1.48x faster than tmp/wasmtime_main.so! + + [74991707 92036959.79 150451669] tmp/wasmtime_main.so + [50091336 66337265.60 124640460] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/blake3-simd/benchmark.wasm + + Δ = 11917241.79 ± 6120474.86 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.13x to 1.41x faster than tmp/wasmtime_main.so! + + [45112464 56382136.04 124882526] tmp/wasmtime_main.so + [35716282 44464894.25 91276271] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/meshoptimizer/benchmark.wasm + + Δ = 18268175.66 ± 12995516.57 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.06x to 1.35x faster than tmp/wasmtime_main.so! + + [90282564 106583377.46 172790496] tmp/wasmtime_main.so + [68407524 88315201.80 509777639] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/blake3-simd/benchmark.wasm + + Δ = 27081.34 ± 12431.92 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.11x to 1.29x faster than tmp/wasmtime_main.so! + + [130644 165156.84 363528] tmp/wasmtime_main.so + [104508 138075.50 262080] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/regex/benchmark.wasm + + Δ = 168480153.80 ± 16692141.15 (confidence = 99%) + + tmp/wasmtime_main.so is 1.17x to 1.21x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [814605301 881137381.33 1113401592] tmp/wasmtime_main.so + [986134213 1049617535.13 1163637937] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/regex/benchmark.wasm + + Δ = 75630.26 ± 29814.65 (confidence = 99%) + + tmp/wasmtime_main.so is 1.08x to 1.19x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [442296 542271.59 863567] tmp/wasmtime_main.so + [465264 617901.85 877788] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/intgemm-simd/benchmark.wasm + + Δ = 66461228.45 ± 8191971.57 (confidence = 99%) + + tmp/wasmtime_main.so is 1.12x to 1.15x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [447294061 482654101.73 581954652] tmp/wasmtime_main.so + [515086921 549115330.18 605796372] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-xchacha20/benchmark.wasm + + Δ = 17534.86 ± 16060.09 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.01x to 1.25x faster than tmp/wasmtime_main.so! + + [107316 153205.19 385056] tmp/wasmtime_main.so + [91044 135670.33 278172] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-xblabla20/benchmark.wasm + + Δ = 17051.40 ± 14734.77 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.02x to 1.23x faster than tmp/wasmtime_main.so! + + [107388 153577.80 470376] tmp/wasmtime_main.so + [89028 136526.40 303156] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/spidermonkey/benchmark.wasm + + Δ = 943126119.95 ± 164284193.32 (confidence = 99%) + + tmp/wasmtime_main.so is 1.08x to 1.12x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [9021892788 9599833086.61 11546087076] tmp/wasmtime_main.so + [9978139369 10542959206.56 11402534341] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-sieve/benchmark.wasm + + Δ = 15852.97 ± 11475.80 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.03x to 1.16x faster than tmp/wasmtime_main.so! + + [147888 182349.73 321804] tmp/wasmtime_main.so + [134604 166496.76 312156] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-ackermann/benchmark.wasm + + Δ = 17296.92 ± 15752.90 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.01x to 1.18x faster than tmp/wasmtime_main.so! + + [164268 205161.84 472392] tmp/wasmtime_main.so + [144648 187864.92 385272] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/blake3-scalar/benchmark.wasm + + Δ = 19482138.65 ± 6431347.52 (confidence = 99%) + + tmp/wasmtime_main.so is 1.06x to 1.12x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [199113301 217289885.09 288788372] tmp/wasmtime_main.so + [220745124 236772023.74 306132408] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-ctype/benchmark.wasm + + Δ = 14145.49 ± 10664.87 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.02x to 1.14x faster than tmp/wasmtime_main.so! + + [148536 187047.01 297001] tmp/wasmtime_main.so + [140796 172901.52 304200] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-fib2/benchmark.wasm + + Δ = 13499.29 ± 13201.25 (confidence = 99%) + + wasmtime/target/release/libwasmtime_bench_api.so is 1.00x to 1.16x faster than tmp/wasmtime_main.so! + + [152316 184395.97 362197] tmp/wasmtime_main.so + [131760 170896.68 335880] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/hex-simd/benchmark.wasm + + Δ = 14701654.36 ± 7722355.67 (confidence = 99%) + + tmp/wasmtime_main.so is 1.03x to 1.11x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [187211376 205180475.08 282106044] tmp/wasmtime_main.so + [198510264 219882129.44 284877252] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-memmove/benchmark.wasm + + Δ = 2750310.45 ± 142406.89 (confidence = 99%) + + tmp/wasmtime_main.so is 1.07x to 1.07x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [38182931 39016919.77 40971169] tmp/wasmtime_main.so + [41085541 41767230.22 42774553] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-random/benchmark.wasm + + Δ = 36038424.04 ± 857900.63 (confidence = 99%) + + tmp/wasmtime_main.so is 1.05x to 1.05x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [669296593 673652082.43 682587649] tmp/wasmtime_main.so + [705019320 709690506.47 716079853] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm + + Δ = 16340886.19 ± 7483652.81 (confidence = 99%) + + tmp/wasmtime_main.so is 1.02x to 1.06x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [365185836 401650579.46 470100098] tmp/wasmtime_main.so + [392953752 417991465.65 484354441] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/blind-sig/benchmark.wasm + + Δ = 20118786.07 ± 13423183.44 (confidence = 99%) + + tmp/wasmtime_main.so is 1.01x to 1.06x faster than wasmtime/target/release/libwasmtime_bench_api.so! + + [515765845 573135907.05 772132716] tmp/wasmtime_main.so + [550619425 593254693.12 704204705] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-ackermann/benchmark.wasm + + No difference in performance. + + [504 2611.08 183780] tmp/wasmtime_main.so + [540 751.68 1296] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-gimli/benchmark.wasm + + No difference in performance. + + [7092684 15453006.13 84934656] tmp/wasmtime_main.so + [5726555 10028783.61 45153864] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-switch/benchmark.wasm + + No difference in performance. + + [149832 238550.40 6512688] tmp/wasmtime_main.so + [134496 170645.40 450072] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-xchacha20/benchmark.wasm + + No difference in performance. + + [24658307 35091571.34 110428560] tmp/wasmtime_main.so + [19204344 28347037.36 97184879] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/bz2/benchmark.wasm + + No difference in performance. + + [188748 273112.54 5585866] tmp/wasmtime_main.so + [186048 224155.80 456408] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-memmove/benchmark.wasm + + No difference in performance. + + [24383809 34604974.24 110344320] tmp/wasmtime_main.so + [19233251 28705933.78 97765704] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-xblabla20/benchmark.wasm + + No difference in performance. + + [24109811 35190977.32 111999528] tmp/wasmtime_main.so + [19003032 29830501.31 108302112] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-heapsort/benchmark.wasm + + No difference in performance. + + [25825104 36284578.84 98705267] tmp/wasmtime_main.so + [20983716 31298895.22 81193032] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/shootout-minicsv/benchmark.wasm + + No difference in performance. + + [10303488 19091732.71 95030243] tmp/wasmtime_main.so + [9972324 17266338.21 86076144] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/noop/benchmark.wasm + + No difference in performance. + + [94788 124173.74 283140] tmp/wasmtime_main.so + [85608 113814.01 316476] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-nestedloop/benchmark.wasm + + No difference in performance. + + [126900 173581.19 297072] tmp/wasmtime_main.so + [105984 159178.68 334008] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-matrix/benchmark.wasm + + No difference in performance. + + [151956 181208.16 367992] tmp/wasmtime_main.so + [142164 167950.09 523081] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-memmove/benchmark.wasm + + No difference in performance. + + [107568 151283.52 417996] tmp/wasmtime_main.so + [96552 140387.04 551844] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-keccak/benchmark.wasm + + No difference in performance. + + [104436 161983.80 325260] tmp/wasmtime_main.so + [90216 150572.50 284940] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-seqhash/benchmark.wasm + + No difference in performance. + + [133848 161197.20 268956] tmp/wasmtime_main.so + [122652 152241.84 404784] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-minicsv/benchmark.wasm + + No difference in performance. + + [130500 156812.06 297000] tmp/wasmtime_main.so + [120384 148217.39 255168] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-ed25519/benchmark.wasm + + No difference in performance. + + [145584 173084.40 341136] tmp/wasmtime_main.so + [135252 165701.52 326484] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/blake3-scalar/benchmark.wasm + + No difference in performance. + + [184392 212838.50 532908] tmp/wasmtime_main.so + [165528 203829.85 354708] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/hex-simd/benchmark.wasm + + No difference in performance. + + [209052 273107.88 423720] tmp/wasmtime_main.so + [187704 261877.34 427392] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-random/benchmark.wasm + + No difference in performance. + + [145620 177143.04 297648] tmp/wasmtime_main.so + [135648 169880.05 291132] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/blind-sig/benchmark.wasm + + No difference in performance. + + [192348 226756.79 469619] tmp/wasmtime_main.so + [182232 217715.76 380268] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-base64/benchmark.wasm + + No difference in performance. + + [149328 174356.64 295560] tmp/wasmtime_main.so + [138492 167680.81 302436] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/intgemm-simd/benchmark.wasm + + No difference in performance. + + [178056 207135.36 320040] tmp/wasmtime_main.so + [167760 200227.68 337680] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/noop/benchmark.wasm + + No difference in performance. + + [432 615.96 1008] tmp/wasmtime_main.so + [396 637.20 1116] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/meshoptimizer/benchmark.wasm + + No difference in performance. + + [179676 212660.64 351072] tmp/wasmtime_main.so + [167292 219323.16 435815] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-ratelimit/benchmark.wasm + + No difference in performance. + + [140976 172092.24 285480] tmp/wasmtime_main.so + [135180 167450.04 334512] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/spidermonkey/benchmark.wasm + + No difference in performance. + + [579672 632428.92 826056] tmp/wasmtime_main.so + [545976 645584.76 1196496] wasmtime/target/release/libwasmtime_bench_api.so + +compilation :: cycles :: benchmarks/noop/benchmark.wasm + + No difference in performance. + + [17553708 26606839.96 100542240] tmp/wasmtime_main.so + [18061812 26108957.05 84559104] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm + + No difference in performance. + + [237672 275609.15 440675] tmp/wasmtime_main.so + [215172 279336.61 616680] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-heapsort/benchmark.wasm + + No difference in performance. + + [130644 158666.76 299736] tmp/wasmtime_main.so + [121464 157300.20 329328] wasmtime/target/release/libwasmtime_bench_api.so + +instantiation :: cycles :: benchmarks/shootout-gimli/benchmark.wasm + + No difference in performance. + + [84456 123928.57 425556] tmp/wasmtime_main.so + [86580 122936.41 249768] wasmtime/target/release/libwasmtime_bench_api.so + +execution :: cycles :: benchmarks/shootout-nestedloop/benchmark.wasm + + No difference in performance. + + [468 739.08 1476] tmp/wasmtime_main.so + [468 736.56 2700] wasmtime/target/release/libwasmtime_bench_api.so \ No newline at end of file diff --git a/fuzz/fuzz_targets/ion.rs b/fuzz/fuzz_targets/ion.rs index aa754c7..3fbd470 100644 --- a/fuzz/fuzz_targets/ion.rs +++ b/fuzz/fuzz_targets/ion.rs @@ -11,6 +11,6 @@ fuzz_target!(|func: Func| { let _ = env_logger::try_init(); log::trace!("func:\n{:?}", func); let env = regalloc2::fuzzing::func::machine_env(); - let _out = - regalloc2::fuzzing::ion::run(&func, &env, false, false).expect("regalloc did not succeed"); + let _out = regalloc2::fuzzing::ion::run(&func, &env, false, false, true) + .expect("regalloc did not succeed"); }); diff --git a/fuzz/fuzz_targets/ion_checker.rs b/fuzz/fuzz_targets/ion_checker.rs index c5aac16..ab92846 100644 --- a/fuzz/fuzz_targets/ion_checker.rs +++ b/fuzz/fuzz_targets/ion_checker.rs @@ -40,8 +40,8 @@ fuzz_target!(|testcase: TestCase| { let _ = env_logger::try_init(); log::trace!("func:\n{:?}", func); let env = regalloc2::fuzzing::func::machine_env(); - let out = - regalloc2::fuzzing::ion::run(&func, &env, true, false).expect("regalloc did not succeed"); + let out = regalloc2::fuzzing::ion::run(&func, &env, true, false, true) + .expect("regalloc did not succeed"); let mut checker = Checker::new(&func, &env); checker.prepare(&out); diff --git a/fuzz/fuzz_targets/ssagen.rs b/fuzz/fuzz_targets/ssagen.rs index 4b7a0ae..59dd634 100644 --- a/fuzz/fuzz_targets/ssagen.rs +++ b/fuzz/fuzz_targets/ssagen.rs @@ -37,6 +37,6 @@ impl Arbitrary<'_> for TestCase { } fuzz_target!(|t: TestCase| { - let cfginfo = CFGInfo::new(&t.f).expect("could not create CFG info"); + let cfginfo = CFGInfo::new(&t.f, true).expect("could not create CFG info"); validate_ssa(&t.f, &cfginfo).expect("invalid SSA"); }); diff --git a/src/ion/fast_alloc.rs b/src/ion/fast_alloc.rs index e3c2dc0..1558148 100644 --- a/src/ion/fast_alloc.rs +++ b/src/ion/fast_alloc.rs @@ -1,8 +1,10 @@ -use alloc::vec; +use alloc::format; use alloc::vec::Vec; +use alloc::{string::String, vec}; use smallvec::{smallvec, SmallVec}; use std::{convert::TryFrom, println}; +use crate::InstPosition; use crate::{ cfg::CFGInfo, Allocation, Block, Edit, Function, Inst, MachineEnv, Operand, OperandConstraint, OperandKind, OperandPos, Output, PReg, PRegSet, ProgPoint, RegAllocError, RegClass, SpillSlot, @@ -26,17 +28,18 @@ struct PRegData { #[derive(Default, Clone, Copy)] struct BlockData { - pub allocated: bool, + pub reg_allocated: bool, + pub params_allocated: bool, } struct ReadOnlyData { - pub preorder: Vec, + pub postorder: Vec, pub reg_order_int: Vec, pub reg_order_float: Vec, } impl ReadOnlyData { - pub fn init(func: &F, mach_env: &MachineEnv) -> Self { + pub fn init(func: &F, mach_env: &MachineEnv, cfg: &CFGInfo) -> Self { let reg_order_int = { let class = RegClass::Int as usize; let amount = mach_env.preferred_regs_by_class[class].len() @@ -60,14 +63,14 @@ impl ReadOnlyData { Self { reg_order_int, reg_order_float, - preorder: Self::calc_preorder(func), + postorder: cfg.postorder.clone(), } } pub fn reg_order(&self, class: RegClass) -> &[PReg] { match class { RegClass::Int => &self.reg_order_int, - RegClass::Float => &self.reg_order_int, + RegClass::Float => &self.reg_order_float, } } @@ -173,6 +176,22 @@ impl<'a, F: Function> FastAllocState<'a, F> { let mut inst_alloc_offsets = Vec::with_capacity(func.num_insts()); inst_alloc_offsets.resize(func.num_insts(), 0); + // we need to create the alloc array beforehand because it needs to be sorted by inst index + // which we cannot guarantee when iterating through the blocks in reverse post-order + let allocs = { + let block_count = func.num_blocks(); + let mut cur_idx = 0; + for i in 0..block_count { + for inst in func.block_insns(Block::new(i)).iter() { + inst_alloc_offsets[inst.index()] = cur_idx as u32; + cur_idx += func.inst_operands(inst).len(); + } + } + let mut allocs = Vec::with_capacity(cur_idx); + allocs.resize(cur_idx, Allocation::none()); + allocs + }; + Self { vregs, pregs, @@ -186,7 +205,7 @@ impl<'a, F: Function> FastAllocState<'a, F> { stack_slot_count_float: u8::try_from(func.spillslot_size(RegClass::Float)) .expect("that's a big float"), - allocs: Vec::new(), + allocs, inst_alloc_offsets, edits: Vec::new(), safepoint_slots: Vec::new(), @@ -296,8 +315,15 @@ impl<'a, F: Function> FastAllocState<'a, F> { } pub fn assign_preg(&mut self, preg: PReg, vreg: VReg) { + // TODO: somewhere assign_preg is called without making sure the vreg is clear (or inspite of it) + // need to make sure this is intended behavior + self.clear_preg(preg); + self.pregs[preg.index()].vreg = Some(vreg.vreg() as u32); self.vregs[vreg.vreg()].preg = Some(preg); + if self.vregs[vreg.vreg()].reftype { + self.reftype_vregs_in_pregs_count += 1; + } } pub fn clear_preg(&mut self, preg: PReg) { @@ -356,18 +382,38 @@ pub fn run( } let mut state = FastAllocState::init(func, mach_env, &cfg); - let const_state = ReadOnlyData::init(func, mach_env); + let const_state = ReadOnlyData::init(func, mach_env, &cfg); - let len = const_state.preorder.len(); + state.blocks[func.entry_block().index()].params_allocated = true; + + let len = const_state.postorder.len(); for i in 0..len { - let block = const_state.preorder[i]; - // when handling branches later, we already have an input mapping for the block params - state.blocks[block.index()].allocated = true; + let block = const_state.postorder[len - 1 - i]; + if state.blocks[block.index()].reg_allocated { + trace!("Block {} already allocated. Skipping", i); + continue; + } + state.blocks[block.index()].reg_allocated = true; + + trace!("Allocating block {}", i); allocate_block_insts(&mut state, &const_state, block)?; handle_out_block_params(&mut state, &const_state, block)?; } + // we do not iterate the blocks in their index order so the order of edits might not be sorted by progpoint + // however it should be nearly sorted + state.edits.sort_by_key(|entry| entry.0); + + trace!("Edits:"); + for edit in &state.edits { + match edit.1 { + Edit::Move { from, to } => { + trace!("At {:?} from {} to {}", edit.0, from, to); + } + } + } + Ok(Output { num_spillslots: state.cur_stack_slot_idx as usize, edits: state.edits, @@ -385,15 +431,29 @@ fn allocate_block_insts<'a, F: Function>( block: Block, ) -> Result<(), RegAllocError> { for inst in state.func.block_insns(block).iter() { + let edit_start_idx = state.edits.len(); let clobbers = state.func.inst_clobbers(inst); let operands = state.func.inst_operands(inst); let req_refs_on_stack = state.func.requires_refs_on_stack(inst); + let alloc_idx = state.inst_alloc_offsets[inst.index()] as usize; - let alloc_idx = state.allocs.len(); - state.inst_alloc_offsets[inst.index()] = alloc_idx as u32; - state - .allocs - .resize(alloc_idx + operands.len(), Allocation::none()); + trace!( + "Allocating Inst {} (refs_on_stack: {}, is_ret: {}, is_branch: {}, alloc_idx: {})", + inst.index(), + req_refs_on_stack, + state.func.is_ret(inst), + state.func.is_branch(inst), + alloc_idx + ); + let mut str = String::new(); + for preg in clobbers { + if str.is_empty() { + str.push_str(&format!("{}", preg)); + } else { + str.push_str(&format!(", {}", preg)); + } + } + trace!("Clobbers: {}", str); // keep track of which pregs where allocated so we can clear them later on // TODO: wouldnt need this if we look up the inst a vreg was allocated at @@ -413,6 +473,7 @@ fn allocate_block_insts<'a, F: Function>( for vreg in state.reftype_vregs { let data = &state.vregs[vreg.vreg()]; if let Some(slot) = data.slot_idx { + trace!("Marking vreg {} as saved on stack at {}", vreg, slot); state .safepoint_slots .push((pos, Allocation::stack(SpillSlot::new(slot as usize)))); @@ -421,9 +482,32 @@ fn allocate_block_insts<'a, F: Function>( } // we allocate fixed defs/uses and stack allocations first + trace!("First alloc pass"); for (i, op) in operands.iter().enumerate() { let vreg = op.vreg(); + trace!("Operand {}: {}", i, op); + if vreg == VReg::invalid() { + // it seems cranelift emits fixed reg uses with invalid vregs, handle them here + // TODO: treat them like normal vregs by just using last_vreg_index+1 for them? + match op.constraint() { + OperandConstraint::FixedReg(reg) => { + state.clear_preg(reg); + regs_allocated.push(reg); + state.allocs[alloc_idx + i] = Allocation::reg(reg); + trace!("Chose {} for operand {}", reg, i); + late_write_disallow_regs.add(reg); + } + _ => { + panic!( + "Invalid op constraint {:?} for invalid vreg", + op.constraint() + ); + } + } + continue; + } + match op.constraint() { OperandConstraint::FixedReg(reg) => { match op.kind() { @@ -450,10 +534,12 @@ fn allocate_block_insts<'a, F: Function>( return Err(RegAllocError::TooManyLiveRegs); } + trace!("Operand {}'s allocation may not be used by a late def", i); // late uses cannot share a register with late defs late_write_disallow_regs.add(reg); } regs_allocated.push(reg); + trace!("Chose {} for operand {}", reg, i); } OperandKind::Def => { if op.pos() == OperandPos::Late { @@ -469,6 +555,7 @@ fn allocate_block_insts<'a, F: Function>( panic!("early def shares reg or is clobbered"); return Err(RegAllocError::TooManyLiveRegs); } + trace!("Operand {}'s allocation may not be used by a late def", i); // early defs cannot share a register with late defs late_write_disallow_regs.add(reg); } @@ -527,11 +614,11 @@ fn allocate_block_insts<'a, F: Function>( state.move_to_stack(tmp_reg, vreg, ProgPoint::after(inst)); regs_allocated.push(tmp_reg); } else { - println!("2"); state.alloc_stack_slot(vreg); state.move_to_stack(reg, vreg, ProgPoint::after(inst)); regs_allocated.push(reg); } + trace!("Chose {} for operand {}", reg, i); } } } @@ -542,14 +629,16 @@ fn allocate_block_insts<'a, F: Function>( if let Some(slot) = &state.vregs[vreg.vreg()].slot_idx { state.allocs[alloc_idx + i] = Allocation::stack(SpillSlot::new(*slot as usize)); + trace!("Chose slot {} for operand {}", slot, i); } else { return Err(RegAllocError::SSA(vreg, inst)); } } OperandKind::Def => { - state.allocs[alloc_idx + i] = Allocation::stack(SpillSlot::new( - state.alloc_stack_slot(vreg) as usize, - )); + let slot = state.alloc_stack_slot(vreg); + state.allocs[alloc_idx + i] = + Allocation::stack(SpillSlot::new(slot as usize)); + trace!("Chose slot {} for operand {}", slot, i); } } } @@ -558,12 +647,18 @@ fn allocate_block_insts<'a, F: Function>( } // alloc non-fixed uses and early defs in registers + trace!("Second alloc pass"); for (i, op) in operands.iter().enumerate() { if op.kind() == OperandKind::Def && op.pos() == OperandPos::Late { continue; } + trace!("Operand {}: {}", i, op); + let vreg = op.vreg(); + if vreg == VReg::invalid() { + continue; + } match op.constraint() { OperandConstraint::Reg => { @@ -592,9 +687,10 @@ fn allocate_block_insts<'a, F: Function>( // early def state.assign_preg(reg, vreg); state.alloc_stack_slot(vreg); - println!("3"); state.move_to_stack(reg, vreg, ProgPoint::after(inst)); } + + trace!("Chose {} for operand {}", reg, i); allocated = true; break; } @@ -603,6 +699,7 @@ fn allocate_block_insts<'a, F: Function>( continue; } + trace!("Ran out of registers for operand {}", i); // No register available // TODO: try to evict vreg that does not need to be in a preg panic!("Out of registers: {:?}", regs_allocated); @@ -616,12 +713,18 @@ fn allocate_block_insts<'a, F: Function>( } // alloc non-fixed late defs and reuse + trace!("Third alloc pass"); for (i, op) in operands.iter().enumerate() { if op.kind() != OperandKind::Def || op.pos() != OperandPos::Late { continue; } + trace!("Operand {}: {}", i, op); let vreg = op.vreg(); + if vreg == VReg::invalid() { + continue; + } + match op.constraint() { OperandConstraint::Reg => { // find first non-allocated register @@ -639,8 +742,8 @@ fn allocate_block_insts<'a, F: Function>( state.clear_preg(reg); state.assign_preg(reg, vreg); state.alloc_stack_slot(vreg); - println!("4"); state.move_to_stack(reg, vreg, ProgPoint::after(inst)); + trace!("Chose {} for operand {}", reg, i); allocated = true; break; } @@ -663,7 +766,6 @@ fn allocate_block_insts<'a, F: Function>( state.clear_preg(preg); state.assign_preg(preg, vreg); state.alloc_stack_slot(vreg); - println!("5"); state.move_to_stack(preg, vreg, ProgPoint::after(inst)); } _ => { @@ -674,8 +776,28 @@ fn allocate_block_insts<'a, F: Function>( // clear out all allocated regs for reg in regs_allocated { + trace!("Clearing {}", reg); state.clear_preg(reg); } + + // fixup edit order + let mut first_post_pos = None; + for i in edit_start_idx..state.edits.len() { + debug_assert!(state.edits[i].0.inst() == inst); + match first_post_pos { + None => { + if state.edits[i].0.pos() == InstPosition::After { + first_post_pos = Some(i); + } + } + Some(pos) => { + if state.edits[i].0.pos() == InstPosition::Before { + state.edits.swap(pos, i); + first_post_pos = Some(pos + 1); + } + } + } + } } Ok(()) @@ -686,13 +808,52 @@ fn handle_out_block_params<'a, F: Function>( const_state: &ReadOnlyData, block: Block, ) -> Result<(), RegAllocError> { + trace!("Allocating outgoing blockparams for {}", block.index()); let last_inst = state.func.block_insns(block).last(); if !state.func.is_branch(last_inst) { + trace!("Last inst {} is not a branch", last_inst.index()); return Ok(()); } + let mut pregs_used_by_br = PRegSet::empty(); + { + let alloc_start = state.inst_alloc_offsets[last_inst.index()] as usize; + let alloc_end = if last_inst.index() + 1 == state.inst_alloc_offsets.len() { + state.inst_alloc_offsets.len() + } else { + state.inst_alloc_offsets[last_inst.index() + 1] as usize + }; + for i in alloc_start..alloc_end { + if let Some(reg) = state.allocs[i].clone().as_reg() { + pregs_used_by_br.add(reg); + } + } + } + + // wouldn't need this if the edits for this were made before the moves for the branch inst but that has its own share of problems i think + let tmp_reg_int = 'block: { + for reg in const_state.reg_order(RegClass::Int) { + if !pregs_used_by_br.contains(*reg) { + break 'block *reg; + } + } + + panic!("No usable tmp_reg for block param handling"); + }; + + let tmp_reg_float = 'block: { + for reg in const_state.reg_order(RegClass::Float) { + if !pregs_used_by_br.contains(*reg) { + break 'block *reg; + } + } + + panic!("No usable tmp_reg for block param handling"); + }; + let succs = state.func.block_succs(block); - if succs.len() == 1 && state.blocks[succs[0].index()].allocated { + if succs.len() == 1 && state.blocks[succs[0].index()].params_allocated { + trace!("Only one allocated successor, moving allocations"); let succ = succs[0]; // move values to the already allocated places let in_params = state.func.block_params(succ); @@ -706,7 +867,11 @@ fn handle_out_block_params<'a, F: Function>( debug_assert!(state.vregs[in_vreg.vreg()].slot_idx.is_some()); debug_assert!(state.vregs[out_vreg.vreg()].slot_idx.is_some()); - let tmp_reg = const_state.reg_order(out_vreg.class())[0]; + let tmp_reg = if out_vreg.class() == RegClass::Int { + tmp_reg_int + } else { + tmp_reg_float + }; let out_slot = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); let in_slot = state.vregs[in_vreg.vreg()].slot_idx.unwrap(); @@ -726,12 +891,20 @@ fn handle_out_block_params<'a, F: Function>( )); } } else { + trace!("Successors not allocated. Creating allocation"); + + let mut allocs = SmallVec::<[(VReg, u32); 4]>::new(); // set incoming block params of successor to the current stack slot for (i, &succ) in state.func.block_succs(block).iter().enumerate() { - if state.blocks[succ.index()].allocated { + trace!("Creating block {}", succ.index()); + if state.blocks[succ.index()].params_allocated { return Err(RegAllocError::CritEdge(block, succ)); } + // we allocate the params here + // TODO: can there be a problem if the same successor occurs multiple times? + state.blocks[succ.index()].params_allocated = true; + let in_params = state.func.block_params(succ); let out_params = state.func.branch_blockparams(block, last_inst, i); debug_assert_eq!(in_params.len(), out_params.len()); @@ -743,29 +916,56 @@ fn handle_out_block_params<'a, F: Function>( debug_assert!(state.vregs[out_vreg.vreg()].slot_idx.is_some()); let out_slot_idx = state.vregs[out_vreg.vreg()].slot_idx.unwrap(); + // TODO: if out_vreg dies at this edge, we could reuse its stack slot + // TODO: we should also be able to reuse the slot if the successor only has one predecessor (us); check with AE + let mut no_alias = false; if !vregs_passed.contains(&out_vreg) { - state.vregs[in_vreg.vreg()].slot_idx = Some(out_slot_idx); + let mut alloced = false; + for alloc in &allocs { + if alloc.0 != out_vreg { + continue; + } + + // we can use the already moved into stack slot + state.vregs[in_vreg.vreg()].slot_idx = Some(alloc.1); + vregs_passed.push(out_vreg); + alloced = true; + break; + } + + if alloced { + continue; + } vregs_passed.push(out_vreg); + no_alias = true; + } + + // need to duplicate to avoid aliasing or create a new stack slot + // TODO: this creates multiple duplications for multiple blocks, can be avoided + let tmp_reg = if out_vreg.class() == RegClass::Int { + tmp_reg_int } else { - // need to duplicate to avoid aliasing - // TODO: this creates multiple duplications for multiple blocks, can be avoided - let tmp_reg = const_state.reg_order(out_vreg.class())[0]; - let slot = state.create_stack_slot(out_vreg.class()); - state.edits.push(( - ProgPoint::before(last_inst), - Edit::Move { - from: Allocation::stack(SpillSlot::new(out_slot_idx as usize)), - to: Allocation::reg(tmp_reg), - }, - )); - state.edits.push(( - ProgPoint::before(last_inst), - Edit::Move { - from: Allocation::reg(tmp_reg), - to: Allocation::stack(SpillSlot::new(slot as usize)), - }, - )); - state.vregs[in_vreg.vreg()].slot_idx = Some(slot); + tmp_reg_float + }; + let slot = state.create_stack_slot(out_vreg.class()); + state.edits.push(( + ProgPoint::before(last_inst), + Edit::Move { + from: Allocation::stack(SpillSlot::new(out_slot_idx as usize)), + to: Allocation::reg(tmp_reg), + }, + )); + state.edits.push(( + ProgPoint::before(last_inst), + Edit::Move { + from: Allocation::reg(tmp_reg), + to: Allocation::stack(SpillSlot::new(slot as usize)), + }, + )); + state.vregs[in_vreg.vreg()].slot_idx = Some(slot); + + if no_alias { + allocs.push((out_vreg, slot)); } } }